1 /** 2 Numeric related utilities used by TSV Utilities. 3 4 Utilities in this file: 5 $(LIST 6 * [formatNumber] - An alternate print format for numbers, especially useful when 7 doubles are being used to represent integer and float values. 8 9 * [rangeMedian] - Finds the median value of a range. 10 11 * [quantile] - Generates quantile values for a data set. 12 ) 13 14 Copyright (c) 2016-2021, eBay Inc. 15 Initially written by Jon Degenhardt 16 17 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt) 18 */ 19 20 module tsv_utils.common.numerics; 21 22 import std.traits : isFloatingPoint, isIntegral, Unqual; 23 24 /** 25 formatNumber is an alternate way to print numbers. It is especially useful when 26 representing both integral and floating point values with float point data types. 27 28 formatNumber was created for tsv-summarize, where all calculations are done as doubles, 29 but may be integers by nature. In addition, output may be either for human consumption 30 or for additional machine processing. Integers are printed normally. Floating point is 31 printed as follows: 32 $(LIST 33 * Values that are exact integral values are printed as integers, as long as they 34 are within the range of where all integers are represented exactly by the floating 35 point type. The practical effect is to avoid switching to exponential notion. 36 37 * If the specified floatPrecision is between 0 and readablePrecisionMax, then 38 floatPrecision is used to set the significant digits following the decimal point. 39 Otherwise, it is used to set total significant digits. This does not apply to 40 really large numbers, for doubles, those larger than 2^53. Trailing zeros are 41 chopped in all cases. 42 ) 43 */ 44 auto formatNumber(T, size_t readablePrecisionMax = 6)(T num, const size_t floatPrecision = 12) 45 if (isFloatingPoint!T || isIntegral!T) 46 { 47 alias UT = Unqual!T; 48 49 import std.conv : to; 50 import std.format : format; 51 52 static if (isIntegral!T) 53 { 54 return format("%d", num); // The easy case. 55 } 56 else 57 { 58 static assert(isFloatingPoint!T); 59 60 static if (!is(UT == float) && !is(UT == double)) 61 { 62 /* Not a double or float, but a floating point. Punt on refinements. */ 63 return format("%.*g", floatPrecision, num); 64 } 65 else 66 { 67 static assert(is(UT == float) || is(UT == double)); 68 69 if (floatPrecision <= readablePrecisionMax) 70 { 71 /* Print with a fixed precision beyond the decimal point (%.*f), but 72 * remove trailing zeros. Notes: 73 * - This handles integer values stored in floating point types. 74 * - Values like NaN and infinity also handled. 75 */ 76 immutable str = format("%.*f", floatPrecision, num); 77 size_t trimToLength = str.length; 78 79 if (floatPrecision != 0 && str.length > floatPrecision + 1) 80 { 81 import std.ascii : isDigit; 82 assert(str.length - floatPrecision - 1 > 0); 83 size_t decimalIndex = str.length - floatPrecision - 1; 84 85 if (str[decimalIndex] == '.' && str[decimalIndex - 1].isDigit) 86 { 87 size_t lastNonZeroDigit = str.length - 1; 88 assert(decimalIndex < lastNonZeroDigit); 89 while (str[lastNonZeroDigit] == '0') lastNonZeroDigit--; 90 trimToLength = (decimalIndex < lastNonZeroDigit) 91 ? lastNonZeroDigit + 1 92 : decimalIndex; 93 } 94 } 95 96 return str[0 .. trimToLength]; 97 } 98 else 99 { 100 /* Determine if the number is subject to special integer value printing. 101 * Goal is to avoid exponential notion for integer values that '%.*g' 102 * generates. Numbers within the significant digit range of floatPrecision 103 * will print as desired with '%.*g', whether there is a fractional part 104 * or not. The '%.*g' format, with exponential notation, is also used for 105 * really large numbers. "Really large" being numbers outside the range 106 * of integers exactly representable by the floating point type. 107 */ 108 109 enum UT maxConsecutiveUTInteger = 2.0^^UT.mant_dig; 110 enum bool maxUTIntFitsInLong = (maxConsecutiveUTInteger <= long.max); 111 112 import std.math : fabs; 113 immutable UT absNum = num.fabs; 114 115 if (!maxUTIntFitsInLong || 116 absNum < 10.0^^floatPrecision || 117 absNum > maxConsecutiveUTInteger) 118 { 119 /* Within signficant digits range or very large. */ 120 return format("%.*g", floatPrecision, num); 121 } 122 else 123 { 124 /* Check for integral values needing to be printed in decimal format. 125 * modf/modff are used to determine if the value has a non-zero 126 * fractional component. 127 */ 128 import core.stdc.math : modf, modff; 129 130 static if (is(UT == float)) alias modfUT = modff; 131 else static if (is(UT == double)) alias modfUT = modf; 132 else static assert(0); 133 134 UT integerPart; 135 136 if (modfUT(num, &integerPart) == 0.0) return format("%d", num.to!long); 137 else return format("%.*g", floatPrecision, num); 138 } 139 } 140 } 141 } 142 assert(0); 143 } 144 145 unittest // formatNumber unit tests 146 { 147 import std.conv : to; 148 import std.format : format; 149 150 /* Integers */ 151 assert(formatNumber(0) == "0"); 152 assert(formatNumber(1) == "1"); 153 assert(formatNumber(-1) == "-1"); 154 assert(formatNumber(999) == "999"); 155 assert(formatNumber(12345678912345) == "12345678912345"); 156 assert(formatNumber(-12345678912345) == "-12345678912345"); 157 158 size_t a1 = 10; assert(a1.formatNumber == "10"); 159 const int a2 = -33234; assert(a2.formatNumber == "-33234"); 160 immutable long a3 = -12345678912345; assert(a3.formatNumber == "-12345678912345"); 161 162 // Specifying precision should never matter for integer values. 163 assert(formatNumber(0, 0) == "0"); 164 assert(formatNumber(1, 0) == "1"); 165 assert(formatNumber(-1, 0) == "-1"); 166 assert(formatNumber(999, 0) == "999"); 167 assert(formatNumber(12345678912345, 0) == "12345678912345"); 168 assert(formatNumber(-12345678912345, 0) == "-12345678912345"); 169 170 assert(formatNumber(0, 3) == "0"); 171 assert(formatNumber(1, 3) == "1"); 172 assert(formatNumber(-1, 3 ) == "-1"); 173 assert(formatNumber(999, 3) == "999"); 174 assert(formatNumber(12345678912345, 3) == "12345678912345"); 175 assert(formatNumber(-12345678912345, 3) == "-12345678912345"); 176 177 assert(formatNumber(0, 9) == "0"); 178 assert(formatNumber(1, 9) == "1"); 179 assert(formatNumber(-1, 9 ) == "-1"); 180 assert(formatNumber(999, 9) == "999"); 181 assert(formatNumber(12345678912345, 9) == "12345678912345"); 182 assert(formatNumber(-12345678912345, 9) == "-12345678912345"); 183 184 /* Doubles */ 185 assert(formatNumber(0.0) == "0"); 186 assert(formatNumber(0.2) == "0.2"); 187 assert(formatNumber(0.123412, 0) == "0"); 188 assert(formatNumber(0.123412, 1) == "0.1"); 189 assert(formatNumber(0.123412, 2) == "0.12"); 190 assert(formatNumber(0.123412, 5) == "0.12341"); 191 assert(formatNumber(0.123412, 6) == "0.123412"); 192 assert(formatNumber(0.123412, 7) == "0.123412"); 193 assert(formatNumber(9.123412, 5) == "9.12341"); 194 assert(formatNumber(9.123412, 6) == "9.123412"); 195 assert(formatNumber(99.123412, 5) == "99.12341"); 196 assert(formatNumber(99.123412, 6) == "99.123412"); 197 assert(formatNumber(99.123412, 7) == "99.12341"); 198 assert(formatNumber(999.123412, 0) == "999"); 199 assert(formatNumber(999.123412, 1) == "999.1"); 200 assert(formatNumber(999.123412, 2) == "999.12"); 201 assert(formatNumber(999.123412, 3) == "999.123"); 202 assert(formatNumber(999.123412, 4) == "999.1234"); 203 assert(formatNumber(999.123412, 5) == "999.12341"); 204 assert(formatNumber(999.123412, 6) == "999.123412"); 205 assert(formatNumber(999.123412, 7) == "999.1234"); 206 assert(formatNumber!(double, 9)(999.12341234, 7) == "999.1234123"); 207 assert(formatNumber(9001.0) == "9001"); 208 assert(formatNumber(1234567891234.0) == "1234567891234"); 209 assert(formatNumber(1234567891234.0, 0) == "1234567891234"); 210 assert(formatNumber(1234567891234.0, 1) == "1234567891234"); 211 212 /* Test round off cases. Note: These tests will not pass on Windows 32-bit builds. 213 * Use the -m64 flag on Windows to get a 64-bit build. 214 * 215 * Note: The first test case does not generate correct results on Windows 64-bit 216 * builds prior to DMD 2.096. In DMD 2.096 the code was moved into Phobos rather 217 * than being forwared to the C library. Both formatNumber and the underlying 218 * format call are included. See: https://github.com/dlang/phobos/pull/7757. 219 */ 220 version(Windows) 221 { 222 static if (__VERSION__ <= 2095) 223 { 224 // Incorrect 225 assert(format("%.*f", 0, 0.6) == "0"); 226 assert(formatNumber(0.6, 0) == "0"); 227 } 228 else 229 { 230 assert(format("%.*f", 0, 0.6) == "1"); 231 assert(formatNumber(0.6, 0) == "1"); 232 } 233 } 234 else 235 { 236 assert(format("%.*f", 0, 0.6) == "1"); 237 assert(formatNumber(0.6, 0) == "1"); 238 } 239 240 assert(formatNumber(0.6, 1) == "0.6"); 241 assert(formatNumber(0.06, 0) == "0"); 242 assert(formatNumber(0.06, 1) == "0.1"); 243 assert(formatNumber(0.06, 2) == "0.06"); 244 assert(formatNumber(0.06, 3) == "0.06"); 245 assert(formatNumber(9.49999, 0) == "9"); 246 assert(formatNumber(9.49999, 1) == "9.5"); 247 assert(formatNumber(9.6, 0) == "10"); 248 assert(formatNumber(9.6, 1) == "9.6"); 249 assert(formatNumber(99.99, 0) == "100"); 250 assert(formatNumber(99.99, 1) == "100"); 251 assert(formatNumber(99.99, 2) == "99.99"); 252 assert(formatNumber(9999.9996, 3) == "10000"); 253 assert(formatNumber(9999.9996, 4) == "9999.9996"); 254 assert(formatNumber(99999.99996, 4) == "100000"); 255 assert(formatNumber(99999.99996, 5) == "99999.99996"); 256 assert(formatNumber(999999.999996, 5) == "1000000"); 257 assert(formatNumber(999999.999996, 6) == "999999.999996"); 258 259 /* Turn off precision, the 'human readable' style. 260 * Note: Remains o if both are zero (first test). If it becomes desirable to support 261 * turning it off when for the precision equal zero case the simple extension is to 262 * allow the 'human readable' precision template parameter to be negative. 263 */ 264 assert(formatNumber!(double, 0)(999.123412, 0) == "999"); 265 assert(formatNumber!(double, 0)(999.123412, 1) == "1e+03"); 266 assert(formatNumber!(double, 0)(999.123412, 2) == "1e+03"); 267 assert(formatNumber!(double, 0)(999.123412, 3) == "999"); 268 assert(formatNumber!(double, 0)(999.123412, 4) == "999.1"); 269 270 // Default number printing 271 assert(formatNumber(1.2) == "1.2"); 272 assert(formatNumber(12.3) == "12.3"); 273 assert(formatNumber(12.34) == "12.34"); 274 assert(formatNumber(123.45) == "123.45"); 275 assert(formatNumber(123.456) == "123.456"); 276 assert(formatNumber(1234.567) == "1234.567"); 277 assert(formatNumber(1234.5678) == "1234.5678"); 278 assert(formatNumber(12345.6789) == "12345.6789"); 279 assert(formatNumber(12345.67891) == "12345.67891"); 280 assert(formatNumber(123456.78912) == "123456.78912"); 281 assert(formatNumber(123456.789123) == "123456.789123"); 282 assert(formatNumber(1234567.891234) == "1234567.89123"); 283 assert(formatNumber(12345678.912345) == "12345678.9123"); 284 assert(formatNumber(123456789.12345) == "123456789.123"); 285 assert(formatNumber(1234567891.2345) == "1234567891.23"); 286 assert(formatNumber(12345678912.345) == "12345678912.3"); 287 assert(formatNumber(123456789123.45) == "123456789123"); 288 assert(formatNumber(1234567891234.5) == "1.23456789123e+12"); 289 assert(formatNumber(12345678912345.6) == "1.23456789123e+13"); 290 assert(formatNumber(123456789123456.0) == "123456789123456"); 291 assert(formatNumber(0.3) == "0.3"); 292 assert(formatNumber(0.03) == "0.03"); 293 assert(formatNumber(0.003) == "0.003"); 294 assert(formatNumber(0.0003) == "0.0003"); 295 assert(formatNumber(0.00003) == "3e-05" || formatNumber(0.00003) == "3e-5"); 296 assert(formatNumber(0.000003) == "3e-06" || formatNumber(0.000003) == "3e-6"); 297 assert(formatNumber(0.0000003) == "3e-07" || formatNumber(0.0000003) == "3e-7"); 298 299 // Large number inside and outside the contiguous integer representation range 300 double dlarge = 2.0^^(double.mant_dig - 2) - 10.0; 301 double dhuge = 2.0^^(double.mant_dig + 1) + 1000.0; 302 303 assert(dlarge.formatNumber == format("%d", dlarge.to!long)); 304 assert(dhuge.formatNumber!(double) == format("%.12g", dhuge)); 305 306 // Negative values - Repeat most of above tests. 307 assert(formatNumber(-0.0) == "-0" || formatNumber(-0.0) == "0"); 308 assert(formatNumber(-0.2) == "-0.2"); 309 assert(formatNumber(-0.123412, 0) == "-0"); 310 assert(formatNumber(-0.123412, 1) == "-0.1"); 311 assert(formatNumber(-0.123412, 2) == "-0.12"); 312 assert(formatNumber(-0.123412, 5) == "-0.12341"); 313 assert(formatNumber(-0.123412, 6) == "-0.123412"); 314 assert(formatNumber(-0.123412, 7) == "-0.123412"); 315 assert(formatNumber(-9.123412, 5) == "-9.12341"); 316 assert(formatNumber(-9.123412, 6) == "-9.123412"); 317 assert(formatNumber(-99.123412, 5) == "-99.12341"); 318 assert(formatNumber(-99.123412, 6) == "-99.123412"); 319 assert(formatNumber(-99.123412, 7) == "-99.12341"); 320 assert(formatNumber(-999.123412, 0) == "-999"); 321 assert(formatNumber(-999.123412, 1) == "-999.1"); 322 assert(formatNumber(-999.123412, 2) == "-999.12"); 323 assert(formatNumber(-999.123412, 3) == "-999.123"); 324 assert(formatNumber(-999.123412, 4) == "-999.1234"); 325 assert(formatNumber(-999.123412, 5) == "-999.12341"); 326 assert(formatNumber(-999.123412, 6) == "-999.123412"); 327 assert(formatNumber(-999.123412, 7) == "-999.1234"); 328 assert(formatNumber!(double, 9)(-999.12341234, 7) == "-999.1234123"); 329 assert(formatNumber(-9001.0) == "-9001"); 330 assert(formatNumber(-1234567891234.0) == "-1234567891234"); 331 assert(formatNumber(-1234567891234.0, 0) == "-1234567891234"); 332 assert(formatNumber(-1234567891234.0, 1) == "-1234567891234"); 333 334 /* Test round off cases with negative numbers. Note: These tests will not pass 335 * on Windows 32-bit builds. Use the -m64 flag on Windows to get a 64-bit build. 336 * 337 * Note: The first test case does not generate correct results on Windows 64-bit 338 * builds prior to DMD 2.096. In DMD 2.096 the code was moved into Phobos rather 339 * than being forwared to the C library. Both formatNumber and the underlying 340 * format call are included. See: https://github.com/dlang/phobos/pull/7757. 341 */ 342 version(Windows) 343 { 344 static if (__VERSION__ <= 2095) 345 { 346 // Incorrect 347 assert(format("%.*f", 0, -0.6) == "-0"); 348 assert(formatNumber(-0.6, 0) == "-0"); 349 } 350 } 351 else 352 { 353 assert(format("%.*f", 0, -0.6) == "-1"); 354 assert(formatNumber(-0.6, 0) == "-1"); 355 } 356 357 assert(formatNumber(-0.6, 1) == "-0.6"); 358 assert(formatNumber(-0.06, 0) == "-0"); 359 assert(formatNumber(-0.06, 1) == "-0.1"); 360 assert(formatNumber(-0.06, 2) == "-0.06"); 361 assert(formatNumber(-0.06, 3) == "-0.06"); 362 assert(formatNumber(-9.49999, 0) == "-9"); 363 assert(formatNumber(-9.49999, 1) == "-9.5"); 364 assert(formatNumber(-9.6, 0) == "-10"); 365 assert(formatNumber(-9.6, 1) == "-9.6"); 366 assert(formatNumber(-99.99, 0) == "-100"); 367 assert(formatNumber(-99.99, 1) == "-100"); 368 assert(formatNumber(-99.99, 2) == "-99.99"); 369 assert(formatNumber(-9999.9996, 3) == "-10000"); 370 assert(formatNumber(-9999.9996, 4) == "-9999.9996"); 371 assert(formatNumber(-99999.99996, 4) == "-100000"); 372 assert(formatNumber(-99999.99996, 5) == "-99999.99996"); 373 assert(formatNumber(-999999.999996, 5) == "-1000000"); 374 assert(formatNumber(-999999.999996, 6) == "-999999.999996"); 375 376 assert(formatNumber!(double, 0)(-999.123412, 0) == "-999"); 377 assert(formatNumber!(double, 0)(-999.123412, 1) == "-1e+03"); 378 assert(formatNumber!(double, 0)(-999.123412, 2) == "-1e+03"); 379 assert(formatNumber!(double, 0)(-999.123412, 3) == "-999"); 380 assert(formatNumber!(double, 0)(-999.123412, 4) == "-999.1"); 381 382 // Default number printing 383 assert(formatNumber(-1.2) == "-1.2"); 384 assert(formatNumber(-12.3) == "-12.3"); 385 assert(formatNumber(-12.34) == "-12.34"); 386 assert(formatNumber(-123.45) == "-123.45"); 387 assert(formatNumber(-123.456) == "-123.456"); 388 assert(formatNumber(-1234.567) == "-1234.567"); 389 assert(formatNumber(-1234.5678) == "-1234.5678"); 390 assert(formatNumber(-12345.6789) == "-12345.6789"); 391 assert(formatNumber(-12345.67891) == "-12345.67891"); 392 assert(formatNumber(-123456.78912) == "-123456.78912"); 393 assert(formatNumber(-123456.789123) == "-123456.789123"); 394 assert(formatNumber(-1234567.891234) == "-1234567.89123"); 395 396 assert(formatNumber(-12345678.912345) == "-12345678.9123"); 397 assert(formatNumber(-123456789.12345) == "-123456789.123"); 398 assert(formatNumber(-1234567891.2345) == "-1234567891.23"); 399 assert(formatNumber(-12345678912.345) == "-12345678912.3"); 400 assert(formatNumber(-123456789123.45) == "-123456789123"); 401 assert(formatNumber(-1234567891234.5) == "-1.23456789123e+12"); 402 assert(formatNumber(-12345678912345.6) == "-1.23456789123e+13"); 403 assert(formatNumber(-123456789123456.0) == "-123456789123456"); 404 405 assert(formatNumber(-0.3) == "-0.3"); 406 assert(formatNumber(-0.03) == "-0.03"); 407 assert(formatNumber(-0.003) == "-0.003"); 408 assert(formatNumber(-0.0003) == "-0.0003"); 409 assert(formatNumber(-0.00003) == "-3e-05" || formatNumber(-0.00003) == "-3e-5"); 410 assert(formatNumber(-0.000003) == "-3e-06" || formatNumber(-0.000003) == "-3e-6"); 411 assert(formatNumber(-0.0000003) == "-3e-07" || formatNumber(-0.0000003) == "-3e-7"); 412 413 const double dlargeNeg = -2.0^^(double.mant_dig - 2) + 10.0; 414 immutable double dhugeNeg = -2.0^^(double.mant_dig + 1) - 1000.0; 415 416 assert(dlargeNeg.formatNumber == format("%d", dlargeNeg.to!long)); 417 assert(dhugeNeg.formatNumber!(double) == format("%.12g", dhugeNeg)); 418 419 // Type qualifiers 420 const double b1 = 0.0; assert(formatNumber(b1) == "0"); 421 const double b2 = 0.2; assert(formatNumber(b2) == "0.2"); 422 const double b3 = 0.123412; assert(formatNumber(b3, 2) == "0.12"); 423 immutable double b4 = 99.123412; assert(formatNumber(b4, 5) == "99.12341"); 424 immutable double b5 = 99.123412; assert(formatNumber(b5, 7) == "99.12341"); 425 426 // Special values 427 assert(formatNumber(double.nan) == "nan"); 428 assert(formatNumber(double.nan, 0) == "nan"); 429 assert(formatNumber(double.nan, 1) == "nan"); 430 assert(formatNumber(double.nan, 9) == "nan"); 431 assert(formatNumber(double.infinity) == "inf"); 432 assert(formatNumber(double.infinity, 0) == "inf"); 433 assert(formatNumber(double.infinity, 1) == "inf"); 434 assert(formatNumber(double.infinity, 9) == "inf"); 435 436 // Float. Mix negative and type qualifiers in. 437 assert(formatNumber(0.0f) == "0"); 438 assert(formatNumber(0.5f) == "0.5"); 439 assert(formatNumber(0.123412f, 0) == "0"); 440 assert(formatNumber(0.123412f, 1) == "0.1"); 441 assert(formatNumber(-0.123412f, 2) == "-0.12"); 442 assert(formatNumber(9.123412f, 5) == "9.12341"); 443 assert(formatNumber(9.123412f, 6) == "9.123412"); 444 assert(formatNumber(-99.123412f, 5) == "-99.12341"); 445 assert(formatNumber(99.123412f, 7) == "99.12341"); 446 assert(formatNumber(-999.123412f, 5) == "-999.12341"); 447 448 float c1 = 999.123412f; assert(formatNumber(c1, 7) == "999.1234"); 449 float c2 = 999.1234f; assert(formatNumber!(float, 9)(c2, 3) == "999.123"); 450 const float c3 = 9001.0f; assert(formatNumber(c3) == "9001"); 451 const float c4 = -12345678.0f; assert(formatNumber(c4) == "-12345678"); 452 immutable float c5 = 12345678.0f; assert(formatNumber(c5, 0) == "12345678"); 453 immutable float c6 = 12345678.0f; assert(formatNumber(c6, 1) == "12345678"); 454 455 float flarge = 2.0^^(float.mant_dig - 2) - 10.0; 456 float fhuge = 2.0^^(float.mant_dig + 1) + 1000.0; 457 458 assert(flarge.formatNumber == format("%d", flarge.to!long)); 459 assert(fhuge.formatNumber!(float, 12) == format("%.12g", fhuge)); 460 461 // Reals - No special formatting 462 real d1 = 2.0^^(double.mant_dig) - 1000.0; assert(formatNumber(d1) == format("%.12g", d1)); 463 real d2 = 123456789.12341234L; assert(formatNumber(d2, 12) == format("%.12g", d2)); 464 } 465 466 /** 467 rangeMedian. Finds the median. Modifies the range via topN or sort in the process. 468 469 Note: topN is the preferred algorithm, but the version prior to Phobos 2.073 470 is pathologically slow on certain data sets. Use topN in 2.073 and later, 471 sort in earlier versions. 472 473 See: https://issues.dlang.org/show_bug.cgi?id=16517 474 https://github.com/dlang/phobos/pull/4815 475 http://forum.dlang.org/post/ujuugklmbibuheptdwcn@forum.dlang.org 476 */ 477 static if (__VERSION__ >= 2073) 478 { 479 version = rangeMedianViaTopN; 480 } 481 else 482 { 483 version = rangeMedianViaSort; 484 } 485 486 auto rangeMedian (Range) (Range r) 487 if (isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range) 488 { 489 version(rangeMedianViaSort) 490 { 491 version(rangeMedianViaTopN) 492 { 493 assert(0, "Both rangeMedianViaSort and rangeMedianViaTopN assigned as versions. Assign only one."); 494 } 495 } 496 else version(rangeMedianViaTopN) 497 { 498 } 499 else 500 { 501 static assert(0, "A version of rangeMedianViaSort or rangeMedianViaTopN must be assigned."); 502 } 503 504 import std.traits : isFloatingPoint; 505 506 ElementType!Range median; 507 508 if (r.length > 0) 509 { 510 size_t medianIndex = r.length / 2; 511 512 version(rangeMedianViaSort) 513 { 514 import std.algorithm : sort; 515 sort(r); 516 median = r[medianIndex]; 517 518 static if (isFloatingPoint!(ElementType!Range)) 519 { 520 if (r.length % 2 == 0) 521 { 522 /* Even number of values. Split the difference. */ 523 median = (median + r[medianIndex - 1]) / 2.0; 524 } 525 } 526 } 527 else version(rangeMedianViaTopN) 528 { 529 import std.algorithm : maxElement, topN; 530 topN(r, medianIndex); 531 median = r[medianIndex]; 532 533 static if (isFloatingPoint!(ElementType!Range)) 534 { 535 if (r.length % 2 == 0) 536 { 537 /* Even number of values. Split the difference. */ 538 if (r[medianIndex - 1] < median) 539 { 540 median = (median + r[0..medianIndex].maxElement) / 2.0; 541 } 542 } 543 } 544 } 545 else 546 { 547 static assert(0, "A version of rangeMedianViaSort or rangeMedianViaTopN must be assigned."); 548 } 549 } 550 551 return median; 552 } 553 554 /* rangeMedian unit tests. */ 555 @safe unittest 556 { 557 import std.math : isNaN; 558 import std.algorithm : all, permutations; 559 560 // Median of empty range is (type).init. Zero for int, nan for floats/doubles 561 assert(rangeMedian(new int[0]) == int.init); 562 assert(rangeMedian(new double[0]).isNaN && double.init.isNaN); 563 assert(rangeMedian(new string[0]) == ""); 564 565 assert(rangeMedian([3]) == 3); 566 assert(rangeMedian([3.0]) == 3.0); 567 assert(rangeMedian([3.5]) == 3.5); 568 assert(rangeMedian(["aaa"]) == "aaa"); 569 570 /* Even number of elements: Split the difference for floating point, but not other types. */ 571 assert(rangeMedian([3, 4]) == 4); 572 assert(rangeMedian([3.0, 4.0]) == 3.5); 573 574 assert(rangeMedian([3, 6, 12]) == 6); 575 assert(rangeMedian([3.0, 6.5, 12.5]) == 6.5); 576 577 // Do the rest with permutations 578 assert([4, 7].permutations.all!(x => (x.rangeMedian == 7))); 579 assert([4.0, 7.0].permutations.all!(x => (x.rangeMedian == 5.5))); 580 assert(["aaa", "bbb"].permutations.all!(x => (x.rangeMedian == "bbb"))); 581 582 assert([4, 7, 19].permutations.all!(x => (x.rangeMedian == 7))); 583 assert([4.5, 7.5, 19.5].permutations.all!(x => (x.rangeMedian == 7.5))); 584 assert(["aaa", "bbb", "ccc"].permutations.all!(x => (x.rangeMedian == "bbb"))); 585 586 assert([4.5, 7.5, 19.5, 21.0].permutations.all!(x => (x.rangeMedian == 13.5))); 587 assert([4.5, 7.5, 19.5, 20.5, 36.0].permutations.all!(x => (x.rangeMedian == 19.5))); 588 assert([4.5, 7.5, 19.5, 24.0, 24.5, 25.0].permutations.all!(x => (x.rangeMedian == 21.75))); 589 assert([1.5, 3.25, 3.55, 4.5, 24.5, 25.0, 25.6].permutations.all!(x => (x.rangeMedian == 4.5))); 590 } 591 592 /// Quantiles 593 594 /** The different quantile interpolation methods. 595 * See: https://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html 596 */ 597 enum QuantileInterpolation 598 { 599 R1 = 1, /// R quantile type 1 600 R2 = 2, /// R quantile type 2 601 R3 = 3, /// R quantile type 3 602 R4 = 4, /// R quantile type 4 603 R5 = 5, /// R quantile type 5 604 R6 = 6, /// R quantile type 6 605 R7 = 7, /// R quantile type 7 606 R8 = 8, /// R quantile type 8 607 R9 = 9, /// R quantile type 9 608 } 609 610 611 import std.traits : isFloatingPoint, isNumeric, Unqual; 612 import std.range; 613 614 /** 615 Returns the quantile in a data vector for a cumulative probability. 616 617 Takes a data vector and a probability and returns the quantile cut point for the 618 probability. The vector must be sorted and the probability in the range [0.0, 1.0]. 619 The interpolation methods available are the same as in R and available in a number 620 of statistical packages. See the R documentation or wikipedia for details 621 (https://en.wikipedia.org/wiki/Quantile). 622 623 Examples: 624 ---- 625 double data = [22, 57, 73, 97, 113]; 626 double median = quantile(0.5, data); // 73 627 auto q1 = [0.25, 0.5, 0.75].map!(p => p.quantile(data)); // 57, 73, 97 628 auto q2 = [0.25, 0.5, 0.75].map!(p => p.quantile(data), QuantileInterpolation.R8); //45.3333, 73, 102.333 629 ---- 630 */ 631 double quantile(ProbType, Range) 632 (const ProbType prob, Range data, QuantileInterpolation method = QuantileInterpolation.R7) 633 if (isRandomAccessRange!Range && hasLength!Range && isNumeric!(ElementType!Range) && 634 isFloatingPoint!ProbType) 635 in 636 { 637 import std.algorithm : isSorted; 638 assert(0.0 <= prob && prob <= 1.0); 639 assert(method >= QuantileInterpolation.min && method <= QuantileInterpolation.max); 640 assert(data.isSorted); 641 } 642 do 643 { 644 import core.stdc.math : modf; 645 import std.algorithm : max, min; 646 import std.conv : to; 647 import std.math : ceil, lrint; 648 649 /* Note: In the implementation below, 'h1' is the 1-based index into the data vector. 650 * This follows the wikipedia notation for the interpolation methods. One will be 651 * subtracted before the vector is accessed. 652 */ 653 654 double q = double.nan; // The return value. 655 656 if (data.length == 1) q = data[0].to!double; 657 else if (data.length > 1) 658 { 659 if (method == QuantileInterpolation.R1) 660 { 661 q = data[((data.length * prob).ceil - 1.0).to!long.max(0).to!size_t].to!double; 662 } 663 else if (method == QuantileInterpolation.R2) 664 { 665 immutable double h1 = data.length * prob + 0.5; 666 immutable size_t lo = ((h1 - 0.5).ceil.to!long - 1).max(0).to!size_t; 667 immutable size_t hi = ((h1 + 0.5).to!size_t - 1).min(data.length - 1); 668 q = (data[lo].to!double + data[hi].to!double) / 2.0; 669 } 670 else if (method == QuantileInterpolation.R3) 671 { 672 /* Implementation notes: 673 * - R3 uses 'banker's rounding', where 0.5 is rounded to the nearest even 674 * value. The 'lrint' routine does this. 675 * - DMD will sometimes choose the incorrect 0.5 rounding if the calculation 676 * is done as a single step. The separate calculation of 'h1' avoids this. 677 */ 678 immutable double h1 = data.length * prob; 679 q = data[h1.lrint.max(1).to!size_t - 1].to!double; 680 } 681 else if ((method == QuantileInterpolation.R4) || 682 (method == QuantileInterpolation.R5) || 683 (method == QuantileInterpolation.R6) || 684 (method == QuantileInterpolation.R7) || 685 (method == QuantileInterpolation.R8) || 686 (method == QuantileInterpolation.R9)) 687 { 688 /* Methods 4-9 have different formulas for generating the real-valued index, 689 * but work the same after that, choosing the final value by linear interpolation. 690 */ 691 double h1; 692 switch (method) 693 { 694 case QuantileInterpolation.R4: h1 = data.length * prob; break; 695 case QuantileInterpolation.R5: h1 = data.length * prob + 0.5; break; 696 case QuantileInterpolation.R6: h1 = (data.length + 1) * prob; break; 697 case QuantileInterpolation.R7: h1 = (data.length - 1) * prob + 1.0; break; 698 case QuantileInterpolation.R8: h1 = (data.length.to!double + 1.0/3.0) * prob + 1.0/3.0; break; 699 case QuantileInterpolation.R9: h1 = (data.length + 0.25) * prob + 3.0/8.0; break; 700 default: assert(0); 701 } 702 703 double h1IntegerPart; 704 immutable double h1FractionPart = modf(h1, &h1IntegerPart); 705 immutable size_t lo = (h1IntegerPart - 1.0).to!long.max(0).min(data.length - 1).to!size_t; 706 q = data[lo]; 707 if (h1FractionPart > 0.0) 708 { 709 immutable size_t hi = h1IntegerPart.to!long.min(data.length - 1).to!size_t; 710 q += h1FractionPart * (data[hi].to!double - data[lo].to!double); 711 } 712 } 713 else assert(0); 714 } 715 return q; 716 } 717 718 unittest 719 { 720 import std.algorithm : equal, map; 721 import std.array : array; 722 import std.traits : EnumMembers; 723 724 /* A couple simple tests. */ 725 assert(quantile(0.5, [22, 57, 73, 97, 113]) == 73); 726 assert(quantile(0.5, [22.5, 57.5, 73.5, 97.5, 113.5]) == 73.5); 727 assert([0.25, 0.5, 0.75].map!(p => p.quantile([22, 57, 73, 97, 113])).array == [57.0, 73.0, 97.0]); 728 assert([0.25, 0.5, 0.75].map!(p => p.quantile([22, 57, 73, 97, 113], QuantileInterpolation.R1)).array == [57.0, 73.0, 97.0]); 729 730 /* Data arrays. */ 731 double[] d1 = []; 732 double[] d2 = [5.5]; 733 double[] d3 = [0.0, 1.0]; 734 double[] d4 = [-1.0, 1.0]; 735 double[] d5 = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]; 736 double[] d6 = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]; 737 double[] d7 = [ 31.79, 64.19, 81.77]; 738 double[] d8 = [-94.43, -74.55, -50.81, 27.45, 78.79]; 739 double[] d9 = [-89.17, 20.93, 38.51, 48.03, 76.43, 77.02]; 740 double[] d10 = [-99.53, -76.87, -76.69, -67.81, -40.26, -11.29, 21.02]; 741 double[] d11 = [-78.32, -52.22, -50.86, 13.45, 15.96, 17.25, 46.35, 85.00]; 742 double[] d12 = [-81.36, -70.87, -53.56, -42.14, -9.18, 7.23, 49.52, 80.43, 98.50]; 743 double[] d13 = [ 38.37, 44.36, 45.70, 50.69, 51.36, 55.66, 56.91, 58.95, 62.01, 65.25]; 744 745 /* Spot check a few other data types. Same expected outputs.*/ 746 int[] d3Int = [0, 1]; 747 int[] d4Int = [-1, 1]; 748 int[] d5Int = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; 749 size_t[] d6Size_t = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; 750 float[] d7Float = [ 31.79f, 64.19f, 81.77f]; 751 float[] d8Float = [-94.43f, -74.55f, -50.81f, 27.45f, 78.79f]; 752 float[] d9Float = [-89.17f, 20.93f, 38.51f, 48.03f, 76.43f, 77.02f]; 753 float[] d10Float = [-99.53f, -76.87f, -76.69f, -67.81f, -40.26f, -11.29f, 21.02f]; 754 755 /* Probability values. */ 756 double[] probs = [0.0, 0.05, 0.1, 0.25, 0.4, 0.49, 0.5, 0.51, 0.75, 0.9, 0.95, 0.98, 1.0]; 757 758 /* Expected values for each data array, for 'probs'. One expected result for each of the nine methods. 759 * The expected values were generated by R and Octave. 760 */ 761 double[13][9] d1_expected; // All values double.nan, the default. 762 double[13][9] d2_expected = [ 763 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 764 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 765 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 766 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 767 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 768 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 769 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 770 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 771 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 772 ]; 773 double[13][9] d3_expected = [ 774 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 775 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 776 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], 777 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02, 0.5, 0.8, 0.9, 0.96, 1.0], 778 [0.0, 0.0, 0.0, 0.0, 0.3, 0.48, 0.5, 0.52, 1.0, 1.0, 1.0, 1.0, 1.0], 779 [0.0, 0.0, 0.0, 0.0, 0.2, 0.47, 0.5, 0.53, 1.0, 1.0, 1.0, 1.0, 1.0], 780 [0.0, 0.05, 0.1, 0.25, 0.4, 0.49, 0.5, 0.51, 0.75, 0.9, 0.95, 0.98, 1.0], 781 [0.0, 0.0, 0.0, 0.0, 0.2666667, 0.4766667, 0.5, 0.5233333, 1.0, 1.0, 1.0, 1.0, 1.0], 782 [0.0, 0.0, 0.0, 0.0, 0.275, 0.4775, 0.5, 0.5225, 1.0, 1.0, 1.0, 1.0, 1.0], 783 ]; 784 double[13][9] d4_expected = [ 785 [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 786 [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 787 [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 788 [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -0.96, 0.0, 0.6, 0.8, 0.92, 1.0], 789 [-1.0, -1.0, -1.0, -1.0, -0.4, -0.04, 0.0, 0.04, 1.0, 1.0, 1.0, 1.0, 1.0], 790 [-1.0, -1.0, -1.0, -1.0, -0.6, -0.06, 0.0, 0.06, 1.0, 1.0, 1.0, 1.0, 1.0], 791 [-1.0, -0.9, -0.8, -0.5, -0.2, -0.02, 0.0, 0.02, 0.5, 0.8, 0.9, 0.96, 1.0], 792 [-1.0, -1.0, -1.0, -1.0, -0.4666667, -0.04666667, -4.440892e-16, 0.04666667, 1.0, 1.0, 1.0, 1.0, 1.0], 793 [-1.0, -1.0, -1.0, -1.0, -0.45, -0.045, 0.0, 0.045, 1.0, 1.0, 1.0, 1.0, 1.0], 794 ]; 795 double[13][9] d5_expected = [ 796 [0.0, 0.0, 1.0, 2.0, 4.0, 5.0, 5.0, 5.0, 8.0, 9.0, 10.0, 10.0, 10.0], 797 [0.0, 0.0, 1.0, 2.0, 4.0, 5.0, 5.0, 5.0, 8.0, 9.0, 10.0, 10.0, 10.0], 798 [0.0, 0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 5.0, 7.0, 9.0, 9.0, 10.0, 10.0], 799 [0.0, 0.0, 0.1, 1.75, 3.4, 4.39, 4.5, 4.61, 7.25, 8.9, 9.45, 9.78, 10.0], 800 [0.0, 0.05, 0.6, 2.25, 3.9, 4.89, 5.0, 5.11, 7.75, 9.4, 9.95, 10.0, 10.0], 801 [0.0, 0.0, 0.2, 2.0, 3.8, 4.88, 5.0, 5.12, 8.0, 9.8, 10.0, 10.0, 10.0], 802 [0.0, 0.5, 1.0, 2.5, 4.0, 4.9, 5.0, 5.1, 7.5, 9.0, 9.5, 9.8, 10.0], 803 [0.0, 0.0, 0.4666667, 2.166667, 3.866667, 4.886667, 5.0, 5.113333, 7.833333, 9.533333, 10.0, 10.0, 10.0], 804 [0.0, 0.0, 0.5, 2.1875, 3.875, 4.8875, 5.0, 5.1125, 7.8125, 9.5, 10.0, 10.0, 10.0], 805 ]; 806 double[13][9] d6_expected = [ 807 [0.0, 0.0, 1.0, 2.0, 4.0, 5.0, 5.0, 6.0, 8.0, 10.0, 11.0, 11.0, 11.0], 808 [0.0, 0.0, 1.0, 2.5, 4.0, 5.0, 5.5, 6.0, 8.5, 10.0, 11.0, 11.0, 11.0], 809 [0.0, 0.0, 0.0, 2.0, 4.0, 5.0, 5.0, 5.0, 8.0, 10.0, 10.0, 11.0, 11.0], 810 [0.0, 0.0, 0.2, 2.0, 3.8, 4.88, 5.0, 5.12, 8.0, 9.8, 10.4, 10.76, 11.0], 811 [0.0, 0.1, 0.7, 2.5, 4.3, 5.38, 5.5, 5.62, 8.5, 10.3, 10.9, 11.0, 11.0], 812 [0.0, 0.0, 0.3, 2.25, 4.2, 5.37, 5.5, 5.63, 8.75, 10.7, 11.0, 11.0, 11.0], 813 [0.0, 0.55, 1.1, 2.75, 4.4, 5.39, 5.5, 5.61, 8.25, 9.9, 10.45, 10.78, 11.0], 814 [0.0, 0.0, 0.5666667, 2.416667, 4.266667, 5.376667, 5.5, 5.623333, 8.583333, 10.43333, 11.0, 11.0, 11.0], 815 [0.0, 0.0, 0.6, 2.4375, 4.275, 5.3775, 5.5, 5.6225, 8.5625, 10.4, 11.0, 11.0, 11.0], 816 ]; 817 double[13][9] d7_expected = [ 818 [31.79, 31.79, 31.79, 31.79, 64.19, 64.19, 64.19, 64.19, 81.77, 81.77, 81.77, 81.77, 81.77], 819 [31.79, 31.79, 31.79, 31.79, 64.19, 64.19, 64.19, 64.19, 81.77, 81.77, 81.77, 81.77, 81.77], 820 [31.79, 31.79, 31.79, 31.79, 31.79, 31.79, 64.19, 64.19, 64.19, 81.77, 81.77, 81.77, 81.77], 821 [31.79, 31.79, 31.79, 31.79, 38.27, 47.018, 47.99, 48.962, 68.585, 76.496, 79.133, 80.7152, 81.77], 822 [31.79, 31.79, 31.79, 39.89, 54.47, 63.218, 64.19, 64.7174, 77.375, 81.77, 81.77, 81.77, 81.77], 823 [31.79, 31.79, 31.79, 31.79, 51.23, 62.894, 64.19, 64.8932, 81.77, 81.77, 81.77, 81.77, 81.77], 824 [31.79, 35.03, 38.27, 47.99, 57.71, 63.542, 64.19, 64.5416, 72.98, 78.254, 80.012, 81.0668, 81.77], 825 [31.79, 31.79, 31.79, 37.19, 53.39, 63.11, 64.19, 64.776, 78.84, 81.77, 81.77, 81.77, 81.77], 826 [31.79, 31.79, 31.79, 37.865, 53.66, 63.137, 64.19, 64.76135, 78.47375, 81.77, 81.77, 81.77, 81.77], 827 ]; 828 double[13][9] d8_expected = [ 829 [-94.43, -94.43, -94.43, -74.55, -74.55, -50.81, -50.81, -50.81, 27.45, 78.79, 78.79, 78.79, 78.79], 830 [-94.43, -94.43, -94.43, -74.55, -62.68, -50.81, -50.81, -50.81, 27.45, 78.79, 78.79, 78.79, 78.79], 831 [-94.43, -94.43, -94.43, -94.43, -74.55, -74.55, -74.55, -50.81, 27.45, 27.45, 78.79, 78.79, 78.79], 832 [-94.43, -94.43, -94.43, -89.46, -74.55, -63.867, -62.68, -61.493, 7.885, 53.12, 65.955, 73.656, 78.79], 833 [-94.43, -94.43, -94.43, -79.52, -62.68, -51.997, -50.81, -46.897, 40.285, 78.79, 78.79, 78.79, 78.79], 834 [-94.43, -94.43, -94.43, -84.49, -65.054, -52.2344, -50.81, -46.1144, 53.12, 78.79, 78.79, 78.79, 78.79], 835 [-94.43, -90.454, -86.478, -74.55, -60.306, -51.7596, -50.81, -47.6796, 27.45, 58.254, 68.522, 74.6828, 78.79], 836 [-94.43, -94.43, -94.43, -81.17667, -63.47133, -52.07613, -50.81, -46.63613, 44.56333, 78.79, 78.79, 78.79, 78.79], 837 [-94.43, -94.43, -94.43, -80.7625, -63.2735, -52.05635, -50.81, -46.70135, 43.49375, 78.79, 78.79, 78.79, 78.79], 838 ]; 839 double[13][9] d9_expected = [ 840 [-89.17, -89.17, -89.17, 20.93, 38.51, 38.51, 38.51, 48.03, 76.43, 77.02, 77.02, 77.02, 77.02], 841 [-89.17, -89.17, -89.17, 20.93, 38.51, 38.51, 43.27, 48.03, 76.43, 77.02, 77.02, 77.02, 77.02], 842 [-89.17, -89.17, -89.17, 20.93, 20.93, 38.51, 38.51, 38.51, 48.03, 76.43, 77.02, 77.02, 77.02], 843 [-89.17, -89.17, -89.17, -34.12, 27.962, 37.4552, 38.51, 39.0812, 62.23, 76.666, 76.843, 76.9492, 77.02], 844 [-89.17, -89.17, -78.16, 20.93, 36.752, 42.6988, 43.27, 43.8412, 76.43, 76.961, 77.02, 77.02, 77.02], 845 [-89.17, -89.17, -89.17, -6.595, 34.994, 42.6036, 43.27, 43.9364, 76.5775, 77.02, 77.02, 77.02, 77.02], 846 [-89.17, -61.645, -34.12, 25.325, 38.51, 42.794, 43.27, 43.746, 69.33, 76.725, 76.8725, 76.961, 77.02], 847 [-89.17, -89.17, -89.17, 11.755, 36.166, 42.66707, 43.27, 43.87293, 76.47917, 77.02, 77.02, 77.02, 77.02], 848 [-89.17, -89.17, -89.17, 14.04875, 36.3125, 42.675, 43.27, 43.865, 76.46688, 77.02, 77.02, 77.02, 77.02], 849 ]; 850 double[13][9] d10_expected = [ 851 [-99.53, -99.53, -99.53, -76.87, -76.69, -67.81, -67.81, -67.81, -11.29, 21.02, 21.02, 21.02, 21.02], 852 [-99.53, -99.53, -99.53, -76.87, -76.69, -67.81, -67.81, -67.81, -11.29, 21.02, 21.02, 21.02, 21.02], 853 [-99.53, -99.53, -99.53, -76.87, -76.69, -76.69, -67.81, -67.81, -40.26, -11.29, 21.02, 21.02, 21.02], 854 [-99.53, -99.53, -99.53, -82.535, -76.726, -72.8716, -72.25, -71.6284, -33.0175, -1.597, 9.7115, 16.4966, 21.02], 855 [-99.53, -99.53, -94.998, -76.825, -74.026, -68.4316, -67.81, -65.8815, -18.5325, 14.558, 21.02, 21.02, 21.02], 856 [-99.53, -99.53, -99.53, -76.87, -74.914, -68.5204, -67.81, -65.606, -11.29, 21.02, 21.02, 21.02, 21.02], 857 [-99.53, -92.732, -85.934, -76.78, -73.138, -68.3428, -67.81, -66.157, -25.775, 1.634, 11.327, 17.1428, 21.02], 858 [-99.53, -99.53, -98.01933, -76.84, -74.322, -68.4612, -67.81, -65.78967, -16.11833, 18.866, 21.02, 21.02, 21.02], 859 [-99.53, -99.53, -97.264, -76.83625, -74.248, -68.4538, -67.81, -65.81263, -16.72187, 17.789, 21.02, 21.02, 21.02], 860 ]; 861 double[13][9] d11_expected = [ 862 [-78.32, -78.32, -78.32, -52.22, 13.45, 13.45, 13.45, 15.96, 17.25, 85.0, 85.0, 85.0, 85.0], 863 [-78.32, -78.32, -78.32, -51.54, 13.45, 13.45, 14.705, 15.96, 31.8, 85.0, 85.0, 85.0, 85.0], 864 [-78.32, -78.32, -78.32, -52.22, -50.86, 13.45, 13.45, 13.45, 17.25, 46.35, 85.0, 85.0, 85.0], 865 [-78.32, -78.32, -78.32, -52.22, -37.998, 8.3052, 13.45, 13.6508, 17.25, 54.08, 69.54, 78.816, 85.0], 866 [-78.32, -78.32, -70.49, -51.54, -5.843, 14.5042, 14.705, 14.9058, 31.8, 73.405, 85.0, 85.0, 85.0], 867 [-78.32, -78.32, -78.32, -51.88, -12.274, 14.4791, 14.705, 14.9309, 39.075, 85.0, 85.0, 85.0, 85.0], 868 [-78.32, -69.185, -60.05, -51.2, 0.588, 14.5293, 14.705, 14.8807, 24.525, 57.945, 71.4725, 79.589, 85.0], 869 [-78.32, -78.32, -73.97, -51.65333, -7.986667, 14.49583, 14.705, 14.91417, 34.225, 78.55833, 85.0, 85.0, 85.0], 870 [-78.32, -78.32, -73.1, -51.625, -7.45075, 14.49792, 14.705, 14.91208, 33.61875, 77.27, 85.0, 85.0, 85.0], 871 ]; 872 double[13][9] d12_expected = [ 873 [-81.36, -81.36, -81.36, -53.56, -42.14, -9.18, -9.18, -9.18, 49.52, 98.5, 98.5, 98.5, 98.5], 874 [-81.36, -81.36, -81.36, -53.56, -42.14, -9.18, -9.18, -9.18, 49.52, 98.5, 98.5, 98.5, 98.5], 875 [-81.36, -81.36, -81.36, -70.87, -42.14, -42.14, -42.14, -9.18, 49.52, 80.43, 98.5, 98.5, 98.5], 876 [-81.36, -81.36, -81.36, -66.5425, -46.708, -28.6264, -25.66, -22.6936, 38.9475, 82.237, 90.3685, 95.2474, 98.5], 877 [-81.36, -81.36, -77.164, -57.8875, -38.844, -12.1464, -9.18, -7.7031, 57.2475, 91.272, 98.5, 98.5, 98.5], 878 [-81.36, -81.36, -81.36, -62.215, -42.14, -12.476, -9.18, -7.539, 64.975, 98.5, 98.5, 98.5, 98.5], 879 [-81.36, -77.164, -72.968, -53.56, -35.548, -11.8168, -9.18, -7.8672, 49.52, 84.044, 91.272, 95.6088, 98.5], 880 [-81.36, -81.36, -78.56267, -59.33, -39.94267, -12.25627, -9.18, -7.6484, 59.82333, 93.68133, 98.5, 98.5, 98.5], 881 [-81.36, -81.36, -78.213, -58.96938, -39.668, -12.2288, -9.18, -7.662075, 59.17938, 93.079, 98.5, 98.5, 98.5], 882 ]; 883 double[13][9] d13_expected = [ 884 [38.37, 38.37, 38.37, 45.7, 50.69, 51.36, 51.36, 55.66, 58.95, 62.01, 65.25, 65.25, 65.25], 885 [38.37, 38.37, 41.365, 45.7, 51.025, 51.36, 53.51, 55.66, 58.95, 63.63, 65.25, 65.25, 65.25], 886 [38.37, 38.37, 38.37, 44.36, 50.69, 51.36, 51.36, 51.36, 58.95, 62.01, 65.25, 65.25, 65.25], 887 [38.37, 38.37, 38.37, 45.03, 50.69, 51.293, 51.36, 51.79, 57.93, 62.01, 63.63, 64.602, 65.25], 888 [38.37, 38.37, 41.365, 45.7, 51.025, 53.08, 53.51, 53.94, 58.95, 63.63, 65.25, 65.25, 65.25], 889 [38.37, 38.37, 38.969, 45.365, 50.958, 53.037, 53.51, 53.983, 59.715, 64.926, 65.25, 65.25, 65.25], 890 [38.37, 41.0655, 43.761, 46.9475, 51.092, 53.123, 53.51, 53.897, 58.44, 62.334, 63.792, 64.6668, 65.25], 891 [38.37, 38.37, 40.56633, 45.58833, 51.00267, 53.06567, 53.51, 53.95433, 59.205, 64.062, 65.25, 65.25, 65.25], 892 [38.37, 38.37, 40.766, 45.61625, 51.00825, 53.06925, 53.51, 53.95075, 59.14125, 63.954, 65.25, 65.25, 65.25], 893 ]; 894 895 void compareResults(const double[] actual, const double[] expected, string dataset, QuantileInterpolation method) 896 { 897 import std.conv : to; 898 import std.format : format; 899 import std.math : isNaN; 900 import std.range : lockstep; 901 902 // Expected results are from printed representations (from R and Octave) and 903 // are not at double precision accuracy. Use a relaxed equality test at more 904 // expected float precision. Eventually need more precise expected values. 905 // Some tests make double-float-double round-trips. These need float precision. 906 907 double maxRelDiff = 4.0 * float.epsilon.to!double; 908 double maxAbsDiff = 2.0 * double.epsilon; 909 910 foreach (i, actualValue, expectedValue; lockstep(actual, expected)) 911 { 912 assert(nearEqual!(double, Yes.areNaNsEqual)(actualValue, expectedValue, maxRelDiff, maxAbsDiff), 913 format("Quantile unit test failure, dataset %s, method: %s, index: %d, expected: %.32g, actual: %.32g", 914 dataset, method.to!string, i, expectedValue, actualValue)); 915 } 916 } 917 918 foreach(methodIndex, method; EnumMembers!QuantileInterpolation) 919 { 920 compareResults(probs.map!(p => p.quantile(d1, method)).array, d1_expected[methodIndex], "d1", method); 921 compareResults(probs.map!(p => p.quantile(d2, method)).array, d2_expected[methodIndex], "d2", method); 922 compareResults(probs.map!(p => p.quantile(d3, method)).array, d3_expected[methodIndex], "d3", method); 923 compareResults(probs.map!(p => p.quantile(d3Int, method)).array, d3_expected[methodIndex], "d3Int", method); 924 compareResults(probs.map!(p => p.quantile(d4, method)).array, d4_expected[methodIndex], "d4", method); 925 compareResults(probs.map!(p => p.quantile(d4Int, method)).array, d4_expected[methodIndex], "d4Int", method); 926 compareResults(probs.map!(p => p.quantile(d5, method)).array, d5_expected[methodIndex], "d5", method); 927 compareResults(probs.map!(p => p.quantile(d5Int, method)).array, d5_expected[methodIndex], "d5Int", method); 928 compareResults(probs.map!(p => p.quantile(d6, method)).array, d6_expected[methodIndex], "d6", method); 929 compareResults(probs.map!(p => p.quantile(d6Size_t, method)).array, d6_expected[methodIndex], "d6Size_t", method); 930 compareResults(probs.map!(p => p.quantile(d7, method)).array, d7_expected[methodIndex], "d7", method); 931 compareResults(probs.map!(p => p.quantile(d7Float, method)).array, d7_expected[methodIndex], "d7Float", method); 932 compareResults(probs.map!(p => p.quantile(d8, method)).array, d8_expected[methodIndex], "d8", method); 933 compareResults(probs.map!(p => p.quantile(d8Float, method)).array, d8_expected[methodIndex], "d8Float", method); 934 compareResults(probs.map!(p => p.quantile(d9, method)).array, d9_expected[methodIndex], "d9", method); 935 compareResults(probs.map!(p => p.quantile(d9Float, method)).array, d9_expected[methodIndex], "d9Float", method); 936 compareResults(probs.map!(p => p.quantile(d10, method)).array, d10_expected[methodIndex], "d10", method); 937 compareResults(probs.map!(p => p.quantile(d10Float, method)).array, d10_expected[methodIndex], "d10Float", method); 938 compareResults(probs.map!(p => p.quantile(d11, method)).array, d11_expected[methodIndex], "d11", method); 939 compareResults(probs.map!(p => p.quantile(d12, method)).array, d12_expected[methodIndex], "d12", method); 940 compareResults(probs.map!(p => p.quantile(d13, method)).array, d13_expected[methodIndex], "d13", method); 941 } 942 } 943 /** Flag use by the nearEqual template. */ 944 alias AreNaNsEqual = Flag!"areNaNsEqual"; 945 946 /** 947 nearEqual checks if two floating point numbers are "near equal". 948 949 nearEqual is an alternative to the approxEqual and isClose functions in the Phobos 950 standard library. It should be regarded as experimental. Currently it is used only in 951 unit tests. 952 953 Default relative diff tolerance is small. Absolute diff tolerance is also small, but 954 non-zero. This means comparing number near zero to zero will considered near equal by 955 defaults. 956 957 Default tolerances will not suffice for float-to-double conversion. Use tolerances 958 based on float in these cases. 959 */ 960 bool nearEqual(T, AreNaNsEqual naNsAreEqual = No.areNaNsEqual) 961 (T x, T y, T maxRelDiff = 4.0 * T.epsilon, T maxAbsDiff = T.epsilon) 962 if (isFloatingPoint!T) 963 { 964 import std.algorithm : max; 965 import std.math : abs, isNaN; 966 967 if (x == y) return true; 968 969 static if (naNsAreEqual) if (x.isNaN && y.isNaN) return true; 970 971 if (x.isNaN || y.isNaN) return false; 972 973 immutable absDiff = abs(x - y); 974 975 if (absDiff <= maxAbsDiff) return true; 976 977 immutable relDiff = absDiff / max(abs(x), abs(y)); 978 979 return (relDiff <= maxRelDiff); 980 } 981 982 @safe unittest 983 { 984 enum float defaultMaxRelDiffFloat = 4 * float.epsilon; 985 enum double defaultMaxRelDiffDouble = 4 * double.epsilon; 986 987 // +0.0 and -0.0 are always equal, even with zero relative and absolute diffs. 988 assert(nearEqual(0.0f, 0.0f, 0.0f, 0.0f)); 989 assert(nearEqual(-0.0f, +0.0f, 0.0f, 0.0f)); 990 assert(nearEqual(0.0, 0.0, 0.0, 0.0)); 991 assert(nearEqual(-0.0, +0.0, 0.0, 0.0)); 992 993 // NaNs are equal or not depending on template parameters 994 assert(!nearEqual(float.nan, float.nan, 0.0, 0.0)); 995 assert(!nearEqual(double.nan, double.nan, 0.0, 0.0)); 996 assert(nearEqual!(float, Yes.areNaNsEqual)(float.nan, float.nan, 0.0, 0.0)); 997 assert(nearEqual!(double, Yes.areNaNsEqual)(double.nan, double.nan, 0.0, 0.0)); 998 999 // Infinity tests 1000 assert(nearEqual(float.infinity, float.infinity, 0.0f, 0.0f)); 1001 assert(nearEqual(-float.infinity, -float.infinity, 0.0f, 0.0f)); 1002 assert(!nearEqual(-float.infinity, float.infinity, 0.0f, 0.0f)); 1003 1004 assert(nearEqual(double.infinity, double.infinity, 0.0f, 0.0f)); 1005 assert(nearEqual(-double.infinity, -double.infinity, 0.0f, 0.0f)); 1006 assert(!nearEqual(-double.infinity, double.infinity, 0.0f, 0.0f)); 1007 1008 assert(!nearEqual(float.infinity, float.max, 0.0f, 0.0f)); 1009 assert(!nearEqual(-float.infinity, -float.max, 0.0f, 0.0f)); 1010 assert(!nearEqual(double.infinity, double.max, 0.0f, 0.0f)); 1011 assert(!nearEqual(-double.infinity, -double.max, 0.0f, 0.0f)); 1012 1013 // Near Zero tests 1014 assert(nearEqual(0.0f, float.min_normal)); 1015 assert(nearEqual(0.0f, -float.min_normal)); 1016 assert(nearEqual(0.0, double.min_normal)); 1017 assert(nearEqual(0.0, -double.min_normal)); 1018 1019 assert(!nearEqual(0.0f, float.min_normal, defaultMaxRelDiffFloat, 0.0f)); 1020 assert(!nearEqual(0.0f, -float.min_normal, defaultMaxRelDiffFloat, 0.0f)); 1021 assert(!nearEqual(0.0, double.min_normal, defaultMaxRelDiffDouble, 0.0)); 1022 assert(!nearEqual(0.0, -double.min_normal, defaultMaxRelDiffDouble, 0.0)); 1023 1024 // Tests with some different sizes 1025 assert(!nearEqual(1.11f, 1.110001f)); 1026 assert( nearEqual(1.11f, 1.1100001f)); 1027 1028 assert(!nearEqual(1.11, 1.11000000000001)); 1029 assert( nearEqual(1.11, 1.110000000000001)); 1030 1031 assert(!nearEqual(-1.11f, -1.110001f)); 1032 assert( nearEqual(-1.11f, -1.1100001f)); 1033 1034 assert(!nearEqual(-1.11, -1.11000000000001)); 1035 assert( nearEqual(-1.11, -1.110000000000001)); 1036 1037 assert(!nearEqual(3739.7f, 3739.69f)); 1038 assert( nearEqual(3739.7f, 3739.699f)); 1039 1040 assert(!nearEqual(3739.7, 3739.69999999999)); 1041 assert( nearEqual(3739.7, 3739.699999999999)); 1042 1043 assert(!nearEqual(732156983.0f, 732157367.0f)); // Delta 384 1044 assert( nearEqual(732156983.0f, 732157303.0f)); // Delta 320 1045 1046 assert(!nearEqual(732156983.0, 732156983.000001)); 1047 assert( nearEqual(732156983.0, 732156983.0000001)); 1048 1049 assert(!nearEqual(-0.0007370567f, -0.0007372567f)); 1050 assert( nearEqual(-0.0007370567f, -0.0007371567f)); 1051 1052 assert(!nearEqual(-0.0000007370567, -0.000000737056701)); 1053 assert( nearEqual(-0.0000007370567, -0.0000007370567001)); 1054 1055 // More near zero tests 1056 { 1057 float a = 1.0f; 1058 float b = 2.0f; 1059 float c = 3.0f; 1060 float x = (a + b) - c; 1061 1062 assert(nearEqual(x, 0.0f)); 1063 1064 foreach (i; 0 .. 3) 1065 { 1066 // Divide by ten and repeat the comparisons. 1067 a /= 10.0f; 1068 b /= 10.0f; 1069 c /= 10.0f; 1070 x /= 10.0f; 1071 1072 float y = (a + b) - c; 1073 1074 assert(nearEqual(x, 0.0f)); 1075 assert(nearEqual(y, 0.0f)); 1076 assert(nearEqual(x, y)); 1077 } 1078 } 1079 { 1080 double a = 1.0; 1081 double b = 2.0; 1082 double c = 3.0; 1083 double x = (a + b) - c; 1084 1085 assert(nearEqual(x, 0.0)); 1086 1087 foreach (i; 0 .. 3) 1088 { 1089 // Divide by ten and repeat the comparisons. 1090 a /= 10.0; 1091 b /= 10.0; 1092 c /= 10.0; 1093 x /= 10.0; 1094 1095 double y = (a + b) - c; 1096 1097 assert(nearEqual(x, 0.0)); 1098 assert(nearEqual(y, 0.0)); 1099 assert(nearEqual(x, y)); 1100 } 1101 } 1102 1103 // Double to double to float round trip. Use float tolerances. 1104 // Note: Don't use immutables. Compiler will preserve doubles. 1105 { 1106 import std.conv : to; 1107 1108 double q = 31.79; 1109 float rf = q; 1110 double r = rf; 1111 1112 assert(!nearEqual(q, r)); 1113 assert(nearEqual(q, r, defaultMaxRelDiffFloat.to!double)); 1114 } 1115 }