1 /** 2 Numeric related utilities used by TSV Utilities. 3 4 Utilities in this file: 5 $(LIST 6 * [formatNumber] - An alternate print format for numbers, especially useful when 7 doubles are being used to represent integer and float values. 8 9 * [rangeMedian] - Finds the median value of a range. 10 11 * [quantile] - Generates quantile values for a data set. 12 ) 13 14 Copyright (c) 2016-2020, eBay Inc. 15 Initially written by Jon Degenhardt 16 17 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt) 18 */ 19 20 module tsv_utils.common.numerics; 21 22 import std.traits : isFloatingPoint, isIntegral, Unqual; 23 24 /** 25 formatNumber is an alternate way to print numbers. It is especially useful when 26 representing both integral and floating point values with float point data types. 27 28 formatNumber was created for tsv-summarize, where all calculations are done as doubles, 29 but may be integers by nature. In addition, output may be either for human consumption 30 or for additional machine processing. Integers are printed normally. Floating point is 31 printed as follows: 32 $(LIST 33 * Values that are exact integral values are printed as integers, as long as they 34 are within the range of where all integers are represented exactly by the floating 35 point type. The practical effect is to avoid switching to exponential notion. 36 37 * If the specified floatPrecision is between 0 and readablePrecisionMax, then 38 floatPrecision is used to set the significant digits following the decimal point. 39 Otherwise, it is used to set total significant digits. This does not apply to 40 really large numbers, for doubles, those larger than 2^53. Trailing zeros are 41 chopped in all cases. 42 ) 43 */ 44 auto formatNumber(T, size_t readablePrecisionMax = 6)(T num, const size_t floatPrecision = 12) 45 if (isFloatingPoint!T || isIntegral!T) 46 { 47 alias UT = Unqual!T; 48 49 import std.conv : to; 50 import std.format : format; 51 52 static if (isIntegral!T) 53 { 54 return format("%d", num); // The easy case. 55 } 56 else 57 { 58 static assert(isFloatingPoint!T); 59 60 static if (!is(UT == float) && !is(UT == double)) 61 { 62 /* Not a double or float, but a floating point. Punt on refinements. */ 63 return format("%.*g", floatPrecision, num); 64 } 65 else 66 { 67 static assert(is(UT == float) || is(UT == double)); 68 69 if (floatPrecision <= readablePrecisionMax) 70 { 71 /* Print with a fixed precision beyond the decimal point (%.*f), but 72 * remove trailing zeros. Notes: 73 * - This handles integer values stored in floating point types. 74 * - Values like NaN and infinity also handled. 75 */ 76 immutable str = format("%.*f", floatPrecision, num); 77 size_t trimToLength = str.length; 78 79 if (floatPrecision != 0 && str.length > floatPrecision + 1) 80 { 81 import std.ascii : isDigit; 82 assert(str.length - floatPrecision - 1 > 0); 83 size_t decimalIndex = str.length - floatPrecision - 1; 84 85 if (str[decimalIndex] == '.' && str[decimalIndex - 1].isDigit) 86 { 87 size_t lastNonZeroDigit = str.length - 1; 88 assert(decimalIndex < lastNonZeroDigit); 89 while (str[lastNonZeroDigit] == '0') lastNonZeroDigit--; 90 trimToLength = (decimalIndex < lastNonZeroDigit) 91 ? lastNonZeroDigit + 1 92 : decimalIndex; 93 } 94 } 95 96 return str[0 .. trimToLength]; 97 } 98 else 99 { 100 /* Determine if the number is subject to special integer value printing. 101 * Goal is to avoid exponential notion for integer values that '%.*g' 102 * generates. Numbers within the significant digit range of floatPrecision 103 * will print as desired with '%.*g', whether there is a fractional part 104 * or not. The '%.*g' format, with exponential notation, is also used for 105 * really large numbers. "Really large" being numbers outside the range 106 * of integers exactly representable by the floating point type. 107 */ 108 109 enum UT maxConsecutiveUTInteger = 2.0^^UT.mant_dig; 110 enum bool maxUTIntFitsInLong = (maxConsecutiveUTInteger <= long.max); 111 112 import std.math : fabs; 113 immutable UT absNum = num.fabs; 114 115 if (!maxUTIntFitsInLong || 116 absNum < 10.0^^floatPrecision || 117 absNum > maxConsecutiveUTInteger) 118 { 119 /* Within signficant digits range or very large. */ 120 return format("%.*g", floatPrecision, num); 121 } 122 else 123 { 124 /* Check for integral values needing to be printed in decimal format. 125 * modf/modff are used to determine if the value has a non-zero 126 * fractional component. 127 */ 128 import core.stdc.math : modf, modff; 129 130 static if (is(UT == float)) alias modfUT = modff; 131 else static if (is(UT == double)) alias modfUT = modf; 132 else static assert(0); 133 134 UT integerPart; 135 136 if (modfUT(num, &integerPart) == 0.0) return format("%d", num.to!long); 137 else return format("%.*g", floatPrecision, num); 138 } 139 } 140 } 141 } 142 assert(0); 143 } 144 145 unittest // formatNumber unit tests 146 { 147 import std.conv : to; 148 import std.format : format; 149 150 /* Integers */ 151 assert(formatNumber(0) == "0"); 152 assert(formatNumber(1) == "1"); 153 assert(formatNumber(-1) == "-1"); 154 assert(formatNumber(999) == "999"); 155 assert(formatNumber(12345678912345) == "12345678912345"); 156 assert(formatNumber(-12345678912345) == "-12345678912345"); 157 158 size_t a1 = 10; assert(a1.formatNumber == "10"); 159 const int a2 = -33234; assert(a2.formatNumber == "-33234"); 160 immutable long a3 = -12345678912345; assert(a3.formatNumber == "-12345678912345"); 161 162 // Specifying precision should never matter for integer values. 163 assert(formatNumber(0, 0) == "0"); 164 assert(formatNumber(1, 0) == "1"); 165 assert(formatNumber(-1, 0) == "-1"); 166 assert(formatNumber(999, 0) == "999"); 167 assert(formatNumber(12345678912345, 0) == "12345678912345"); 168 assert(formatNumber(-12345678912345, 0) == "-12345678912345"); 169 170 assert(formatNumber(0, 3) == "0"); 171 assert(formatNumber(1, 3) == "1"); 172 assert(formatNumber(-1, 3 ) == "-1"); 173 assert(formatNumber(999, 3) == "999"); 174 assert(formatNumber(12345678912345, 3) == "12345678912345"); 175 assert(formatNumber(-12345678912345, 3) == "-12345678912345"); 176 177 assert(formatNumber(0, 9) == "0"); 178 assert(formatNumber(1, 9) == "1"); 179 assert(formatNumber(-1, 9 ) == "-1"); 180 assert(formatNumber(999, 9) == "999"); 181 assert(formatNumber(12345678912345, 9) == "12345678912345"); 182 assert(formatNumber(-12345678912345, 9) == "-12345678912345"); 183 184 /* Doubles */ 185 assert(formatNumber(0.0) == "0"); 186 assert(formatNumber(0.2) == "0.2"); 187 assert(formatNumber(0.123412, 0) == "0"); 188 assert(formatNumber(0.123412, 1) == "0.1"); 189 assert(formatNumber(0.123412, 2) == "0.12"); 190 assert(formatNumber(0.123412, 5) == "0.12341"); 191 assert(formatNumber(0.123412, 6) == "0.123412"); 192 assert(formatNumber(0.123412, 7) == "0.123412"); 193 assert(formatNumber(9.123412, 5) == "9.12341"); 194 assert(formatNumber(9.123412, 6) == "9.123412"); 195 assert(formatNumber(99.123412, 5) == "99.12341"); 196 assert(formatNumber(99.123412, 6) == "99.123412"); 197 assert(formatNumber(99.123412, 7) == "99.12341"); 198 assert(formatNumber(999.123412, 0) == "999"); 199 assert(formatNumber(999.123412, 1) == "999.1"); 200 assert(formatNumber(999.123412, 2) == "999.12"); 201 assert(formatNumber(999.123412, 3) == "999.123"); 202 assert(formatNumber(999.123412, 4) == "999.1234"); 203 assert(formatNumber(999.123412, 5) == "999.12341"); 204 assert(formatNumber(999.123412, 6) == "999.123412"); 205 assert(formatNumber(999.123412, 7) == "999.1234"); 206 assert(formatNumber!(double, 9)(999.12341234, 7) == "999.1234123"); 207 assert(formatNumber(9001.0) == "9001"); 208 assert(formatNumber(1234567891234.0) == "1234567891234"); 209 assert(formatNumber(1234567891234.0, 0) == "1234567891234"); 210 assert(formatNumber(1234567891234.0, 1) == "1234567891234"); 211 212 /* Test round off cases. Note: These tests will not pass on Windows 32-bit builds. 213 * Use the -m64 flag on Windows to get a 64-bit build. 214 * 215 * Note: The first test case does not generate the correct result on Windows 216 * 64-bit builds. The current Windows result is included both for documentation 217 * and to provide an alert if the correct result starts getting populated. Both 218 * formatNumber and the underlying format call are included. 219 */ 220 version(Windows) 221 { 222 // Incorrect 223 assert(format("%.*f", 0, 0.6) == "0"); 224 assert(formatNumber(0.6, 0) == "0"); 225 } 226 else 227 { 228 assert(format("%.*f", 0, 0.6) == "1"); 229 assert(formatNumber(0.6, 0) == "1"); 230 } 231 232 assert(formatNumber(0.6, 1) == "0.6"); 233 assert(formatNumber(0.06, 0) == "0"); 234 assert(formatNumber(0.06, 1) == "0.1"); 235 assert(formatNumber(0.06, 2) == "0.06"); 236 assert(formatNumber(0.06, 3) == "0.06"); 237 assert(formatNumber(9.49999, 0) == "9"); 238 assert(formatNumber(9.49999, 1) == "9.5"); 239 assert(formatNumber(9.6, 0) == "10"); 240 assert(formatNumber(9.6, 1) == "9.6"); 241 assert(formatNumber(99.99, 0) == "100"); 242 assert(formatNumber(99.99, 1) == "100"); 243 assert(formatNumber(99.99, 2) == "99.99"); 244 assert(formatNumber(9999.9996, 3) == "10000"); 245 assert(formatNumber(9999.9996, 4) == "9999.9996"); 246 assert(formatNumber(99999.99996, 4) == "100000"); 247 assert(formatNumber(99999.99996, 5) == "99999.99996"); 248 assert(formatNumber(999999.999996, 5) == "1000000"); 249 assert(formatNumber(999999.999996, 6) == "999999.999996"); 250 251 /* Turn off precision, the 'human readable' style. 252 * Note: Remains o if both are zero (first test). If it becomes desirable to support 253 * turning it off when for the precision equal zero case the simple extension is to 254 * allow the 'human readable' precision template parameter to be negative. 255 */ 256 assert(formatNumber!(double, 0)(999.123412, 0) == "999"); 257 assert(formatNumber!(double, 0)(999.123412, 1) == "1e+03"); 258 assert(formatNumber!(double, 0)(999.123412, 2) == "1e+03"); 259 assert(formatNumber!(double, 0)(999.123412, 3) == "999"); 260 assert(formatNumber!(double, 0)(999.123412, 4) == "999.1"); 261 262 // Default number printing 263 assert(formatNumber(1.2) == "1.2"); 264 assert(formatNumber(12.3) == "12.3"); 265 assert(formatNumber(12.34) == "12.34"); 266 assert(formatNumber(123.45) == "123.45"); 267 assert(formatNumber(123.456) == "123.456"); 268 assert(formatNumber(1234.567) == "1234.567"); 269 assert(formatNumber(1234.5678) == "1234.5678"); 270 assert(formatNumber(12345.6789) == "12345.6789"); 271 assert(formatNumber(12345.67891) == "12345.67891"); 272 assert(formatNumber(123456.78912) == "123456.78912"); 273 assert(formatNumber(123456.789123) == "123456.789123"); 274 assert(formatNumber(1234567.891234) == "1234567.89123"); 275 assert(formatNumber(12345678.912345) == "12345678.9123"); 276 assert(formatNumber(123456789.12345) == "123456789.123"); 277 assert(formatNumber(1234567891.2345) == "1234567891.23"); 278 assert(formatNumber(12345678912.345) == "12345678912.3"); 279 assert(formatNumber(123456789123.45) == "123456789123"); 280 assert(formatNumber(1234567891234.5) == "1.23456789123e+12"); 281 assert(formatNumber(12345678912345.6) == "1.23456789123e+13"); 282 assert(formatNumber(123456789123456.0) == "123456789123456"); 283 assert(formatNumber(0.3) == "0.3"); 284 assert(formatNumber(0.03) == "0.03"); 285 assert(formatNumber(0.003) == "0.003"); 286 assert(formatNumber(0.0003) == "0.0003"); 287 assert(formatNumber(0.00003) == "3e-05" || formatNumber(0.00003) == "3e-5"); 288 assert(formatNumber(0.000003) == "3e-06" || formatNumber(0.000003) == "3e-6"); 289 assert(formatNumber(0.0000003) == "3e-07" || formatNumber(0.0000003) == "3e-7"); 290 291 // Large number inside and outside the contiguous integer representation range 292 double dlarge = 2.0^^(double.mant_dig - 2) - 10.0; 293 double dhuge = 2.0^^(double.mant_dig + 1) + 1000.0; 294 295 assert(dlarge.formatNumber == format("%d", dlarge.to!long)); 296 assert(dhuge.formatNumber!(double) == format("%.12g", dhuge)); 297 298 // Negative values - Repeat most of above tests. 299 assert(formatNumber(-0.0) == "-0" || formatNumber(-0.0) == "0"); 300 assert(formatNumber(-0.2) == "-0.2"); 301 assert(formatNumber(-0.123412, 0) == "-0"); 302 assert(formatNumber(-0.123412, 1) == "-0.1"); 303 assert(formatNumber(-0.123412, 2) == "-0.12"); 304 assert(formatNumber(-0.123412, 5) == "-0.12341"); 305 assert(formatNumber(-0.123412, 6) == "-0.123412"); 306 assert(formatNumber(-0.123412, 7) == "-0.123412"); 307 assert(formatNumber(-9.123412, 5) == "-9.12341"); 308 assert(formatNumber(-9.123412, 6) == "-9.123412"); 309 assert(formatNumber(-99.123412, 5) == "-99.12341"); 310 assert(formatNumber(-99.123412, 6) == "-99.123412"); 311 assert(formatNumber(-99.123412, 7) == "-99.12341"); 312 assert(formatNumber(-999.123412, 0) == "-999"); 313 assert(formatNumber(-999.123412, 1) == "-999.1"); 314 assert(formatNumber(-999.123412, 2) == "-999.12"); 315 assert(formatNumber(-999.123412, 3) == "-999.123"); 316 assert(formatNumber(-999.123412, 4) == "-999.1234"); 317 assert(formatNumber(-999.123412, 5) == "-999.12341"); 318 assert(formatNumber(-999.123412, 6) == "-999.123412"); 319 assert(formatNumber(-999.123412, 7) == "-999.1234"); 320 assert(formatNumber!(double, 9)(-999.12341234, 7) == "-999.1234123"); 321 assert(formatNumber(-9001.0) == "-9001"); 322 assert(formatNumber(-1234567891234.0) == "-1234567891234"); 323 assert(formatNumber(-1234567891234.0, 0) == "-1234567891234"); 324 assert(formatNumber(-1234567891234.0, 1) == "-1234567891234"); 325 326 /* Test round off cases with negative numbers. Note: These tests will not pass 327 * on Windows 32-bit builds. Use the -m64 flag on Windows to get a 64-bit build. 328 * 329 * Note: The first test case does not generate the correct result on Windows 330 * 64-bit builds. The current Windows result is included both for documentation 331 * and to provide an alert if the correct result starts getting populated. Both 332 * formatNumber and the underlying format call are included. 333 */ 334 version(Windows) 335 { 336 // Incorrect 337 assert(format("%.*f", 0, -0.6) == "-0"); 338 assert(formatNumber(-0.6, 0) == "-0"); 339 } 340 else 341 { 342 assert(format("%.*f", 0, -0.6) == "-1"); 343 assert(formatNumber(-0.6, 0) == "-1"); 344 } 345 346 assert(formatNumber(-0.6, 1) == "-0.6"); 347 assert(formatNumber(-0.06, 0) == "-0"); 348 assert(formatNumber(-0.06, 1) == "-0.1"); 349 assert(formatNumber(-0.06, 2) == "-0.06"); 350 assert(formatNumber(-0.06, 3) == "-0.06"); 351 assert(formatNumber(-9.49999, 0) == "-9"); 352 assert(formatNumber(-9.49999, 1) == "-9.5"); 353 assert(formatNumber(-9.6, 0) == "-10"); 354 assert(formatNumber(-9.6, 1) == "-9.6"); 355 assert(formatNumber(-99.99, 0) == "-100"); 356 assert(formatNumber(-99.99, 1) == "-100"); 357 assert(formatNumber(-99.99, 2) == "-99.99"); 358 assert(formatNumber(-9999.9996, 3) == "-10000"); 359 assert(formatNumber(-9999.9996, 4) == "-9999.9996"); 360 assert(formatNumber(-99999.99996, 4) == "-100000"); 361 assert(formatNumber(-99999.99996, 5) == "-99999.99996"); 362 assert(formatNumber(-999999.999996, 5) == "-1000000"); 363 assert(formatNumber(-999999.999996, 6) == "-999999.999996"); 364 365 assert(formatNumber!(double, 0)(-999.123412, 0) == "-999"); 366 assert(formatNumber!(double, 0)(-999.123412, 1) == "-1e+03"); 367 assert(formatNumber!(double, 0)(-999.123412, 2) == "-1e+03"); 368 assert(formatNumber!(double, 0)(-999.123412, 3) == "-999"); 369 assert(formatNumber!(double, 0)(-999.123412, 4) == "-999.1"); 370 371 // Default number printing 372 assert(formatNumber(-1.2) == "-1.2"); 373 assert(formatNumber(-12.3) == "-12.3"); 374 assert(formatNumber(-12.34) == "-12.34"); 375 assert(formatNumber(-123.45) == "-123.45"); 376 assert(formatNumber(-123.456) == "-123.456"); 377 assert(formatNumber(-1234.567) == "-1234.567"); 378 assert(formatNumber(-1234.5678) == "-1234.5678"); 379 assert(formatNumber(-12345.6789) == "-12345.6789"); 380 assert(formatNumber(-12345.67891) == "-12345.67891"); 381 assert(formatNumber(-123456.78912) == "-123456.78912"); 382 assert(formatNumber(-123456.789123) == "-123456.789123"); 383 assert(formatNumber(-1234567.891234) == "-1234567.89123"); 384 385 assert(formatNumber(-12345678.912345) == "-12345678.9123"); 386 assert(formatNumber(-123456789.12345) == "-123456789.123"); 387 assert(formatNumber(-1234567891.2345) == "-1234567891.23"); 388 assert(formatNumber(-12345678912.345) == "-12345678912.3"); 389 assert(formatNumber(-123456789123.45) == "-123456789123"); 390 assert(formatNumber(-1234567891234.5) == "-1.23456789123e+12"); 391 assert(formatNumber(-12345678912345.6) == "-1.23456789123e+13"); 392 assert(formatNumber(-123456789123456.0) == "-123456789123456"); 393 394 assert(formatNumber(-0.3) == "-0.3"); 395 assert(formatNumber(-0.03) == "-0.03"); 396 assert(formatNumber(-0.003) == "-0.003"); 397 assert(formatNumber(-0.0003) == "-0.0003"); 398 assert(formatNumber(-0.00003) == "-3e-05" || formatNumber(-0.00003) == "-3e-5"); 399 assert(formatNumber(-0.000003) == "-3e-06" || formatNumber(-0.000003) == "-3e-6"); 400 assert(formatNumber(-0.0000003) == "-3e-07" || formatNumber(-0.0000003) == "-3e-7"); 401 402 const double dlargeNeg = -2.0^^(double.mant_dig - 2) + 10.0; 403 immutable double dhugeNeg = -2.0^^(double.mant_dig + 1) - 1000.0; 404 405 assert(dlargeNeg.formatNumber == format("%d", dlargeNeg.to!long)); 406 assert(dhugeNeg.formatNumber!(double) == format("%.12g", dhugeNeg)); 407 408 // Type qualifiers 409 const double b1 = 0.0; assert(formatNumber(b1) == "0"); 410 const double b2 = 0.2; assert(formatNumber(b2) == "0.2"); 411 const double b3 = 0.123412; assert(formatNumber(b3, 2) == "0.12"); 412 immutable double b4 = 99.123412; assert(formatNumber(b4, 5) == "99.12341"); 413 immutable double b5 = 99.123412; assert(formatNumber(b5, 7) == "99.12341"); 414 415 // Special values 416 assert(formatNumber(double.nan) == "nan"); 417 assert(formatNumber(double.nan, 0) == "nan"); 418 assert(formatNumber(double.nan, 1) == "nan"); 419 assert(formatNumber(double.nan, 9) == "nan"); 420 assert(formatNumber(double.infinity) == "inf"); 421 assert(formatNumber(double.infinity, 0) == "inf"); 422 assert(formatNumber(double.infinity, 1) == "inf"); 423 assert(formatNumber(double.infinity, 9) == "inf"); 424 425 // Float. Mix negative and type qualifiers in. 426 assert(formatNumber(0.0f) == "0"); 427 assert(formatNumber(0.5f) == "0.5"); 428 assert(formatNumber(0.123412f, 0) == "0"); 429 assert(formatNumber(0.123412f, 1) == "0.1"); 430 assert(formatNumber(-0.123412f, 2) == "-0.12"); 431 assert(formatNumber(9.123412f, 5) == "9.12341"); 432 assert(formatNumber(9.123412f, 6) == "9.123412"); 433 assert(formatNumber(-99.123412f, 5) == "-99.12341"); 434 assert(formatNumber(99.123412f, 7) == "99.12341"); 435 assert(formatNumber(-999.123412f, 5) == "-999.12341"); 436 437 float c1 = 999.123412f; assert(formatNumber(c1, 7) == "999.1234"); 438 float c2 = 999.1234f; assert(formatNumber!(float, 9)(c2, 3) == "999.123"); 439 const float c3 = 9001.0f; assert(formatNumber(c3) == "9001"); 440 const float c4 = -12345678.0f; assert(formatNumber(c4) == "-12345678"); 441 immutable float c5 = 12345678.0f; assert(formatNumber(c5, 0) == "12345678"); 442 immutable float c6 = 12345678.0f; assert(formatNumber(c6, 1) == "12345678"); 443 444 float flarge = 2.0^^(float.mant_dig - 2) - 10.0; 445 float fhuge = 2.0^^(float.mant_dig + 1) + 1000.0; 446 447 assert(flarge.formatNumber == format("%d", flarge.to!long)); 448 assert(fhuge.formatNumber!(float, 12) == format("%.12g", fhuge)); 449 450 // Reals - No special formatting 451 real d1 = 2.0^^(double.mant_dig) - 1000.0; assert(formatNumber(d1) == format("%.12g", d1)); 452 real d2 = 123456789.12341234L; assert(formatNumber(d2, 12) == format("%.12g", d2)); 453 } 454 455 /** 456 rangeMedian. Finds the median. Modifies the range via topN or sort in the process. 457 458 Note: topN is the preferred algorithm, but the version prior to Phobos 2.073 459 is pathologically slow on certain data sets. Use topN in 2.073 and later, 460 sort in earlier versions. 461 462 See: https://issues.dlang.org/show_bug.cgi?id=16517 463 https://github.com/dlang/phobos/pull/4815 464 http://forum.dlang.org/post/ujuugklmbibuheptdwcn@forum.dlang.org 465 */ 466 static if (__VERSION__ >= 2073) 467 { 468 version = rangeMedianViaTopN; 469 } 470 else 471 { 472 version = rangeMedianViaSort; 473 } 474 475 auto rangeMedian (Range) (Range r) 476 if (isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range) 477 { 478 version(rangeMedianViaSort) 479 { 480 version(rangeMedianViaTopN) 481 { 482 assert(0, "Both rangeMedianViaSort and rangeMedianViaTopN assigned as versions. Assign only one."); 483 } 484 } 485 else version(rangeMedianViaTopN) 486 { 487 } 488 else 489 { 490 static assert(0, "A version of rangeMedianViaSort or rangeMedianViaTopN must be assigned."); 491 } 492 493 import std.traits : isFloatingPoint; 494 495 ElementType!Range median; 496 497 if (r.length > 0) 498 { 499 size_t medianIndex = r.length / 2; 500 501 version(rangeMedianViaSort) 502 { 503 import std.algorithm : sort; 504 sort(r); 505 median = r[medianIndex]; 506 507 static if (isFloatingPoint!(ElementType!Range)) 508 { 509 if (r.length % 2 == 0) 510 { 511 /* Even number of values. Split the difference. */ 512 median = (median + r[medianIndex - 1]) / 2.0; 513 } 514 } 515 } 516 else version(rangeMedianViaTopN) 517 { 518 import std.algorithm : maxElement, topN; 519 topN(r, medianIndex); 520 median = r[medianIndex]; 521 522 static if (isFloatingPoint!(ElementType!Range)) 523 { 524 if (r.length % 2 == 0) 525 { 526 /* Even number of values. Split the difference. */ 527 if (r[medianIndex - 1] < median) 528 { 529 median = (median + r[0..medianIndex].maxElement) / 2.0; 530 } 531 } 532 } 533 } 534 else 535 { 536 static assert(0, "A version of rangeMedianViaSort or rangeMedianViaTopN must be assigned."); 537 } 538 } 539 540 return median; 541 } 542 543 /* rangeMedian unit tests. */ 544 @safe unittest 545 { 546 import std.math : isNaN; 547 import std.algorithm : all, permutations; 548 549 // Median of empty range is (type).init. Zero for int, nan for floats/doubles 550 assert(rangeMedian(new int[0]) == int.init); 551 assert(rangeMedian(new double[0]).isNaN && double.init.isNaN); 552 assert(rangeMedian(new string[0]) == ""); 553 554 assert(rangeMedian([3]) == 3); 555 assert(rangeMedian([3.0]) == 3.0); 556 assert(rangeMedian([3.5]) == 3.5); 557 assert(rangeMedian(["aaa"]) == "aaa"); 558 559 /* Even number of elements: Split the difference for floating point, but not other types. */ 560 assert(rangeMedian([3, 4]) == 4); 561 assert(rangeMedian([3.0, 4.0]) == 3.5); 562 563 assert(rangeMedian([3, 6, 12]) == 6); 564 assert(rangeMedian([3.0, 6.5, 12.5]) == 6.5); 565 566 // Do the rest with permutations 567 assert([4, 7].permutations.all!(x => (x.rangeMedian == 7))); 568 assert([4.0, 7.0].permutations.all!(x => (x.rangeMedian == 5.5))); 569 assert(["aaa", "bbb"].permutations.all!(x => (x.rangeMedian == "bbb"))); 570 571 assert([4, 7, 19].permutations.all!(x => (x.rangeMedian == 7))); 572 assert([4.5, 7.5, 19.5].permutations.all!(x => (x.rangeMedian == 7.5))); 573 assert(["aaa", "bbb", "ccc"].permutations.all!(x => (x.rangeMedian == "bbb"))); 574 575 assert([4.5, 7.5, 19.5, 21.0].permutations.all!(x => (x.rangeMedian == 13.5))); 576 assert([4.5, 7.5, 19.5, 20.5, 36.0].permutations.all!(x => (x.rangeMedian == 19.5))); 577 assert([4.5, 7.5, 19.5, 24.0, 24.5, 25.0].permutations.all!(x => (x.rangeMedian == 21.75))); 578 assert([1.5, 3.25, 3.55, 4.5, 24.5, 25.0, 25.6].permutations.all!(x => (x.rangeMedian == 4.5))); 579 } 580 581 /// Quantiles 582 583 /** The different quantile interpolation methods. 584 * See: https://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html 585 */ 586 enum QuantileInterpolation 587 { 588 R1 = 1, /// R quantile type 1 589 R2 = 2, /// R quantile type 2 590 R3 = 3, /// R quantile type 3 591 R4 = 4, /// R quantile type 4 592 R5 = 5, /// R quantile type 5 593 R6 = 6, /// R quantile type 6 594 R7 = 7, /// R quantile type 7 595 R8 = 8, /// R quantile type 8 596 R9 = 9, /// R quantile type 9 597 } 598 599 600 import std.traits : isFloatingPoint, isNumeric, Unqual; 601 import std.range; 602 603 /** 604 Returns the quantile in a data vector for a cumulative probability. 605 606 Takes a data vector and a probability and returns the quantile cut point for the 607 probability. The vector must be sorted and the probability in the range [0.0, 1.0]. 608 The interpolation methods available are the same as in R and available in a number 609 of statistical packages. See the R documentation or wikipedia for details 610 (https://en.wikipedia.org/wiki/Quantile). 611 612 Examples: 613 ---- 614 double data = [22, 57, 73, 97, 113]; 615 double median = quantile(0.5, data); // 73 616 auto q1 = [0.25, 0.5, 0.75].map!(p => p.quantile(data)); // 57, 73, 97 617 auto q2 = [0.25, 0.5, 0.75].map!(p => p.quantile(data), QuantileInterpolation.R8); //45.3333, 73, 102.333 618 ---- 619 */ 620 double quantile(ProbType, Range) 621 (const ProbType prob, Range data, QuantileInterpolation method = QuantileInterpolation.R7) 622 if (isRandomAccessRange!Range && hasLength!Range && isNumeric!(ElementType!Range) && 623 isFloatingPoint!ProbType) 624 in 625 { 626 import std.algorithm : isSorted; 627 assert(0.0 <= prob && prob <= 1.0); 628 assert(method >= QuantileInterpolation.min && method <= QuantileInterpolation.max); 629 assert(data.isSorted); 630 } 631 do 632 { 633 import core.stdc.math : modf; 634 import std.algorithm : max, min; 635 import std.conv : to; 636 import std.math : ceil, lrint; 637 638 /* Note: In the implementation below, 'h1' is the 1-based index into the data vector. 639 * This follows the wikipedia notation for the interpolation methods. One will be 640 * subtracted before the vector is accessed. 641 */ 642 643 double q = double.nan; // The return value. 644 645 if (data.length == 1) q = data[0].to!double; 646 else if (data.length > 1) 647 { 648 if (method == QuantileInterpolation.R1) 649 { 650 q = data[((data.length * prob).ceil - 1.0).to!long.max(0).to!size_t].to!double; 651 } 652 else if (method == QuantileInterpolation.R2) 653 { 654 immutable double h1 = data.length * prob + 0.5; 655 immutable size_t lo = ((h1 - 0.5).ceil.to!long - 1).max(0).to!size_t; 656 immutable size_t hi = ((h1 + 0.5).to!size_t - 1).min(data.length - 1); 657 q = (data[lo].to!double + data[hi].to!double) / 2.0; 658 } 659 else if (method == QuantileInterpolation.R3) 660 { 661 /* Implementation notes: 662 * - R3 uses 'banker's rounding', where 0.5 is rounded to the nearest even 663 * value. The 'lrint' routine does this. 664 * - DMD will sometimes choose the incorrect 0.5 rounding if the calculation 665 * is done as a single step. The separate calculation of 'h1' avoids this. 666 */ 667 immutable double h1 = data.length * prob; 668 q = data[h1.lrint.max(1).to!size_t - 1].to!double; 669 } 670 else if ((method == QuantileInterpolation.R4) || 671 (method == QuantileInterpolation.R5) || 672 (method == QuantileInterpolation.R6) || 673 (method == QuantileInterpolation.R7) || 674 (method == QuantileInterpolation.R8) || 675 (method == QuantileInterpolation.R9)) 676 { 677 /* Methods 4-9 have different formulas for generating the real-valued index, 678 * but work the same after that, choosing the final value by linear interpolation. 679 */ 680 double h1; 681 switch (method) 682 { 683 case QuantileInterpolation.R4: h1 = data.length * prob; break; 684 case QuantileInterpolation.R5: h1 = data.length * prob + 0.5; break; 685 case QuantileInterpolation.R6: h1 = (data.length + 1) * prob; break; 686 case QuantileInterpolation.R7: h1 = (data.length - 1) * prob + 1.0; break; 687 case QuantileInterpolation.R8: h1 = (data.length.to!double + 1.0/3.0) * prob + 1.0/3.0; break; 688 case QuantileInterpolation.R9: h1 = (data.length + 0.25) * prob + 3.0/8.0; break; 689 default: assert(0); 690 } 691 692 double h1IntegerPart; 693 immutable double h1FractionPart = modf(h1, &h1IntegerPart); 694 immutable size_t lo = (h1IntegerPart - 1.0).to!long.max(0).min(data.length - 1).to!size_t; 695 q = data[lo]; 696 if (h1FractionPart > 0.0) 697 { 698 immutable size_t hi = h1IntegerPart.to!long.min(data.length - 1).to!size_t; 699 q += h1FractionPart * (data[hi].to!double - data[lo].to!double); 700 } 701 } 702 else assert(0); 703 } 704 return q; 705 } 706 707 unittest 708 { 709 import std.algorithm : equal, map; 710 import std.array : array; 711 import std.traits : EnumMembers; 712 713 /* A couple simple tests. */ 714 assert(quantile(0.5, [22, 57, 73, 97, 113]) == 73); 715 assert(quantile(0.5, [22.5, 57.5, 73.5, 97.5, 113.5]) == 73.5); 716 assert([0.25, 0.5, 0.75].map!(p => p.quantile([22, 57, 73, 97, 113])).array == [57.0, 73.0, 97.0]); 717 assert([0.25, 0.5, 0.75].map!(p => p.quantile([22, 57, 73, 97, 113], QuantileInterpolation.R1)).array == [57.0, 73.0, 97.0]); 718 719 /* Data arrays. */ 720 double[] d1 = []; 721 double[] d2 = [5.5]; 722 double[] d3 = [0.0, 1.0]; 723 double[] d4 = [-1.0, 1.0]; 724 double[] d5 = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]; 725 double[] d6 = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]; 726 double[] d7 = [ 31.79, 64.19, 81.77]; 727 double[] d8 = [-94.43, -74.55, -50.81, 27.45, 78.79]; 728 double[] d9 = [-89.17, 20.93, 38.51, 48.03, 76.43, 77.02]; 729 double[] d10 = [-99.53, -76.87, -76.69, -67.81, -40.26, -11.29, 21.02]; 730 double[] d11 = [-78.32, -52.22, -50.86, 13.45, 15.96, 17.25, 46.35, 85.00]; 731 double[] d12 = [-81.36, -70.87, -53.56, -42.14, -9.18, 7.23, 49.52, 80.43, 98.50]; 732 double[] d13 = [ 38.37, 44.36, 45.70, 50.69, 51.36, 55.66, 56.91, 58.95, 62.01, 65.25]; 733 734 /* Spot check a few other data types. Same expected outputs.*/ 735 int[] d3Int = [0, 1]; 736 int[] d4Int = [-1, 1]; 737 int[] d5Int = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; 738 size_t[] d6Size_t = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; 739 float[] d7Float = [ 31.79f, 64.19f, 81.77f]; 740 float[] d8Float = [-94.43f, -74.55f, -50.81f, 27.45f, 78.79f]; 741 float[] d9Float = [-89.17f, 20.93f, 38.51f, 48.03f, 76.43f, 77.02f]; 742 float[] d10Float = [-99.53f, -76.87f, -76.69f, -67.81f, -40.26f, -11.29f, 21.02f]; 743 744 /* Probability values. */ 745 double[] probs = [0.0, 0.05, 0.1, 0.25, 0.4, 0.49, 0.5, 0.51, 0.75, 0.9, 0.95, 0.98, 1.0]; 746 747 /* Expected values for each data array, for 'probs'. One expected result for each of the nine methods. 748 * The expected values were generated by R and Octave. 749 */ 750 double[13][9] d1_expected; // All values double.nan, the default. 751 double[13][9] d2_expected = [ 752 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 753 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 754 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 755 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 756 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 757 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 758 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 759 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 760 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], 761 ]; 762 double[13][9] d3_expected = [ 763 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 764 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 765 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0], 766 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02, 0.5, 0.8, 0.9, 0.96, 1.0], 767 [0.0, 0.0, 0.0, 0.0, 0.3, 0.48, 0.5, 0.52, 1.0, 1.0, 1.0, 1.0, 1.0], 768 [0.0, 0.0, 0.0, 0.0, 0.2, 0.47, 0.5, 0.53, 1.0, 1.0, 1.0, 1.0, 1.0], 769 [0.0, 0.05, 0.1, 0.25, 0.4, 0.49, 0.5, 0.51, 0.75, 0.9, 0.95, 0.98, 1.0], 770 [0.0, 0.0, 0.0, 0.0, 0.2666667, 0.4766667, 0.5, 0.5233333, 1.0, 1.0, 1.0, 1.0, 1.0], 771 [0.0, 0.0, 0.0, 0.0, 0.275, 0.4775, 0.5, 0.5225, 1.0, 1.0, 1.0, 1.0, 1.0], 772 ]; 773 double[13][9] d4_expected = [ 774 [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 775 [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 776 [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 777 [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -0.96, 0.0, 0.6, 0.8, 0.92, 1.0], 778 [-1.0, -1.0, -1.0, -1.0, -0.4, -0.04, 0.0, 0.04, 1.0, 1.0, 1.0, 1.0, 1.0], 779 [-1.0, -1.0, -1.0, -1.0, -0.6, -0.06, 0.0, 0.06, 1.0, 1.0, 1.0, 1.0, 1.0], 780 [-1.0, -0.9, -0.8, -0.5, -0.2, -0.02, 0.0, 0.02, 0.5, 0.8, 0.9, 0.96, 1.0], 781 [-1.0, -1.0, -1.0, -1.0, -0.4666667, -0.04666667, -4.440892e-16, 0.04666667, 1.0, 1.0, 1.0, 1.0, 1.0], 782 [-1.0, -1.0, -1.0, -1.0, -0.45, -0.045, 0.0, 0.045, 1.0, 1.0, 1.0, 1.0, 1.0], 783 ]; 784 double[13][9] d5_expected = [ 785 [0.0, 0.0, 1.0, 2.0, 4.0, 5.0, 5.0, 5.0, 8.0, 9.0, 10.0, 10.0, 10.0], 786 [0.0, 0.0, 1.0, 2.0, 4.0, 5.0, 5.0, 5.0, 8.0, 9.0, 10.0, 10.0, 10.0], 787 [0.0, 0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 5.0, 7.0, 9.0, 9.0, 10.0, 10.0], 788 [0.0, 0.0, 0.1, 1.75, 3.4, 4.39, 4.5, 4.61, 7.25, 8.9, 9.45, 9.78, 10.0], 789 [0.0, 0.05, 0.6, 2.25, 3.9, 4.89, 5.0, 5.11, 7.75, 9.4, 9.95, 10.0, 10.0], 790 [0.0, 0.0, 0.2, 2.0, 3.8, 4.88, 5.0, 5.12, 8.0, 9.8, 10.0, 10.0, 10.0], 791 [0.0, 0.5, 1.0, 2.5, 4.0, 4.9, 5.0, 5.1, 7.5, 9.0, 9.5, 9.8, 10.0], 792 [0.0, 0.0, 0.4666667, 2.166667, 3.866667, 4.886667, 5.0, 5.113333, 7.833333, 9.533333, 10.0, 10.0, 10.0], 793 [0.0, 0.0, 0.5, 2.1875, 3.875, 4.8875, 5.0, 5.1125, 7.8125, 9.5, 10.0, 10.0, 10.0], 794 ]; 795 double[13][9] d6_expected = [ 796 [0.0, 0.0, 1.0, 2.0, 4.0, 5.0, 5.0, 6.0, 8.0, 10.0, 11.0, 11.0, 11.0], 797 [0.0, 0.0, 1.0, 2.5, 4.0, 5.0, 5.5, 6.0, 8.5, 10.0, 11.0, 11.0, 11.0], 798 [0.0, 0.0, 0.0, 2.0, 4.0, 5.0, 5.0, 5.0, 8.0, 10.0, 10.0, 11.0, 11.0], 799 [0.0, 0.0, 0.2, 2.0, 3.8, 4.88, 5.0, 5.12, 8.0, 9.8, 10.4, 10.76, 11.0], 800 [0.0, 0.1, 0.7, 2.5, 4.3, 5.38, 5.5, 5.62, 8.5, 10.3, 10.9, 11.0, 11.0], 801 [0.0, 0.0, 0.3, 2.25, 4.2, 5.37, 5.5, 5.63, 8.75, 10.7, 11.0, 11.0, 11.0], 802 [0.0, 0.55, 1.1, 2.75, 4.4, 5.39, 5.5, 5.61, 8.25, 9.9, 10.45, 10.78, 11.0], 803 [0.0, 0.0, 0.5666667, 2.416667, 4.266667, 5.376667, 5.5, 5.623333, 8.583333, 10.43333, 11.0, 11.0, 11.0], 804 [0.0, 0.0, 0.6, 2.4375, 4.275, 5.3775, 5.5, 5.6225, 8.5625, 10.4, 11.0, 11.0, 11.0], 805 ]; 806 double[13][9] d7_expected = [ 807 [31.79, 31.79, 31.79, 31.79, 64.19, 64.19, 64.19, 64.19, 81.77, 81.77, 81.77, 81.77, 81.77], 808 [31.79, 31.79, 31.79, 31.79, 64.19, 64.19, 64.19, 64.19, 81.77, 81.77, 81.77, 81.77, 81.77], 809 [31.79, 31.79, 31.79, 31.79, 31.79, 31.79, 64.19, 64.19, 64.19, 81.77, 81.77, 81.77, 81.77], 810 [31.79, 31.79, 31.79, 31.79, 38.27, 47.018, 47.99, 48.962, 68.585, 76.496, 79.133, 80.7152, 81.77], 811 [31.79, 31.79, 31.79, 39.89, 54.47, 63.218, 64.19, 64.7174, 77.375, 81.77, 81.77, 81.77, 81.77], 812 [31.79, 31.79, 31.79, 31.79, 51.23, 62.894, 64.19, 64.8932, 81.77, 81.77, 81.77, 81.77, 81.77], 813 [31.79, 35.03, 38.27, 47.99, 57.71, 63.542, 64.19, 64.5416, 72.98, 78.254, 80.012, 81.0668, 81.77], 814 [31.79, 31.79, 31.79, 37.19, 53.39, 63.11, 64.19, 64.776, 78.84, 81.77, 81.77, 81.77, 81.77], 815 [31.79, 31.79, 31.79, 37.865, 53.66, 63.137, 64.19, 64.76135, 78.47375, 81.77, 81.77, 81.77, 81.77], 816 ]; 817 double[13][9] d8_expected = [ 818 [-94.43, -94.43, -94.43, -74.55, -74.55, -50.81, -50.81, -50.81, 27.45, 78.79, 78.79, 78.79, 78.79], 819 [-94.43, -94.43, -94.43, -74.55, -62.68, -50.81, -50.81, -50.81, 27.45, 78.79, 78.79, 78.79, 78.79], 820 [-94.43, -94.43, -94.43, -94.43, -74.55, -74.55, -74.55, -50.81, 27.45, 27.45, 78.79, 78.79, 78.79], 821 [-94.43, -94.43, -94.43, -89.46, -74.55, -63.867, -62.68, -61.493, 7.885, 53.12, 65.955, 73.656, 78.79], 822 [-94.43, -94.43, -94.43, -79.52, -62.68, -51.997, -50.81, -46.897, 40.285, 78.79, 78.79, 78.79, 78.79], 823 [-94.43, -94.43, -94.43, -84.49, -65.054, -52.2344, -50.81, -46.1144, 53.12, 78.79, 78.79, 78.79, 78.79], 824 [-94.43, -90.454, -86.478, -74.55, -60.306, -51.7596, -50.81, -47.6796, 27.45, 58.254, 68.522, 74.6828, 78.79], 825 [-94.43, -94.43, -94.43, -81.17667, -63.47133, -52.07613, -50.81, -46.63613, 44.56333, 78.79, 78.79, 78.79, 78.79], 826 [-94.43, -94.43, -94.43, -80.7625, -63.2735, -52.05635, -50.81, -46.70135, 43.49375, 78.79, 78.79, 78.79, 78.79], 827 ]; 828 double[13][9] d9_expected = [ 829 [-89.17, -89.17, -89.17, 20.93, 38.51, 38.51, 38.51, 48.03, 76.43, 77.02, 77.02, 77.02, 77.02], 830 [-89.17, -89.17, -89.17, 20.93, 38.51, 38.51, 43.27, 48.03, 76.43, 77.02, 77.02, 77.02, 77.02], 831 [-89.17, -89.17, -89.17, 20.93, 20.93, 38.51, 38.51, 38.51, 48.03, 76.43, 77.02, 77.02, 77.02], 832 [-89.17, -89.17, -89.17, -34.12, 27.962, 37.4552, 38.51, 39.0812, 62.23, 76.666, 76.843, 76.9492, 77.02], 833 [-89.17, -89.17, -78.16, 20.93, 36.752, 42.6988, 43.27, 43.8412, 76.43, 76.961, 77.02, 77.02, 77.02], 834 [-89.17, -89.17, -89.17, -6.595, 34.994, 42.6036, 43.27, 43.9364, 76.5775, 77.02, 77.02, 77.02, 77.02], 835 [-89.17, -61.645, -34.12, 25.325, 38.51, 42.794, 43.27, 43.746, 69.33, 76.725, 76.8725, 76.961, 77.02], 836 [-89.17, -89.17, -89.17, 11.755, 36.166, 42.66707, 43.27, 43.87293, 76.47917, 77.02, 77.02, 77.02, 77.02], 837 [-89.17, -89.17, -89.17, 14.04875, 36.3125, 42.675, 43.27, 43.865, 76.46688, 77.02, 77.02, 77.02, 77.02], 838 ]; 839 double[13][9] d10_expected = [ 840 [-99.53, -99.53, -99.53, -76.87, -76.69, -67.81, -67.81, -67.81, -11.29, 21.02, 21.02, 21.02, 21.02], 841 [-99.53, -99.53, -99.53, -76.87, -76.69, -67.81, -67.81, -67.81, -11.29, 21.02, 21.02, 21.02, 21.02], 842 [-99.53, -99.53, -99.53, -76.87, -76.69, -76.69, -67.81, -67.81, -40.26, -11.29, 21.02, 21.02, 21.02], 843 [-99.53, -99.53, -99.53, -82.535, -76.726, -72.8716, -72.25, -71.6284, -33.0175, -1.597, 9.7115, 16.4966, 21.02], 844 [-99.53, -99.53, -94.998, -76.825, -74.026, -68.4316, -67.81, -65.8815, -18.5325, 14.558, 21.02, 21.02, 21.02], 845 [-99.53, -99.53, -99.53, -76.87, -74.914, -68.5204, -67.81, -65.606, -11.29, 21.02, 21.02, 21.02, 21.02], 846 [-99.53, -92.732, -85.934, -76.78, -73.138, -68.3428, -67.81, -66.157, -25.775, 1.634, 11.327, 17.1428, 21.02], 847 [-99.53, -99.53, -98.01933, -76.84, -74.322, -68.4612, -67.81, -65.78967, -16.11833, 18.866, 21.02, 21.02, 21.02], 848 [-99.53, -99.53, -97.264, -76.83625, -74.248, -68.4538, -67.81, -65.81263, -16.72187, 17.789, 21.02, 21.02, 21.02], 849 ]; 850 double[13][9] d11_expected = [ 851 [-78.32, -78.32, -78.32, -52.22, 13.45, 13.45, 13.45, 15.96, 17.25, 85.0, 85.0, 85.0, 85.0], 852 [-78.32, -78.32, -78.32, -51.54, 13.45, 13.45, 14.705, 15.96, 31.8, 85.0, 85.0, 85.0, 85.0], 853 [-78.32, -78.32, -78.32, -52.22, -50.86, 13.45, 13.45, 13.45, 17.25, 46.35, 85.0, 85.0, 85.0], 854 [-78.32, -78.32, -78.32, -52.22, -37.998, 8.3052, 13.45, 13.6508, 17.25, 54.08, 69.54, 78.816, 85.0], 855 [-78.32, -78.32, -70.49, -51.54, -5.843, 14.5042, 14.705, 14.9058, 31.8, 73.405, 85.0, 85.0, 85.0], 856 [-78.32, -78.32, -78.32, -51.88, -12.274, 14.4791, 14.705, 14.9309, 39.075, 85.0, 85.0, 85.0, 85.0], 857 [-78.32, -69.185, -60.05, -51.2, 0.588, 14.5293, 14.705, 14.8807, 24.525, 57.945, 71.4725, 79.589, 85.0], 858 [-78.32, -78.32, -73.97, -51.65333, -7.986667, 14.49583, 14.705, 14.91417, 34.225, 78.55833, 85.0, 85.0, 85.0], 859 [-78.32, -78.32, -73.1, -51.625, -7.45075, 14.49792, 14.705, 14.91208, 33.61875, 77.27, 85.0, 85.0, 85.0], 860 ]; 861 double[13][9] d12_expected = [ 862 [-81.36, -81.36, -81.36, -53.56, -42.14, -9.18, -9.18, -9.18, 49.52, 98.5, 98.5, 98.5, 98.5], 863 [-81.36, -81.36, -81.36, -53.56, -42.14, -9.18, -9.18, -9.18, 49.52, 98.5, 98.5, 98.5, 98.5], 864 [-81.36, -81.36, -81.36, -70.87, -42.14, -42.14, -42.14, -9.18, 49.52, 80.43, 98.5, 98.5, 98.5], 865 [-81.36, -81.36, -81.36, -66.5425, -46.708, -28.6264, -25.66, -22.6936, 38.9475, 82.237, 90.3685, 95.2474, 98.5], 866 [-81.36, -81.36, -77.164, -57.8875, -38.844, -12.1464, -9.18, -7.7031, 57.2475, 91.272, 98.5, 98.5, 98.5], 867 [-81.36, -81.36, -81.36, -62.215, -42.14, -12.476, -9.18, -7.539, 64.975, 98.5, 98.5, 98.5, 98.5], 868 [-81.36, -77.164, -72.968, -53.56, -35.548, -11.8168, -9.18, -7.8672, 49.52, 84.044, 91.272, 95.6088, 98.5], 869 [-81.36, -81.36, -78.56267, -59.33, -39.94267, -12.25627, -9.18, -7.6484, 59.82333, 93.68133, 98.5, 98.5, 98.5], 870 [-81.36, -81.36, -78.213, -58.96938, -39.668, -12.2288, -9.18, -7.662075, 59.17938, 93.079, 98.5, 98.5, 98.5], 871 ]; 872 double[13][9] d13_expected = [ 873 [38.37, 38.37, 38.37, 45.7, 50.69, 51.36, 51.36, 55.66, 58.95, 62.01, 65.25, 65.25, 65.25], 874 [38.37, 38.37, 41.365, 45.7, 51.025, 51.36, 53.51, 55.66, 58.95, 63.63, 65.25, 65.25, 65.25], 875 [38.37, 38.37, 38.37, 44.36, 50.69, 51.36, 51.36, 51.36, 58.95, 62.01, 65.25, 65.25, 65.25], 876 [38.37, 38.37, 38.37, 45.03, 50.69, 51.293, 51.36, 51.79, 57.93, 62.01, 63.63, 64.602, 65.25], 877 [38.37, 38.37, 41.365, 45.7, 51.025, 53.08, 53.51, 53.94, 58.95, 63.63, 65.25, 65.25, 65.25], 878 [38.37, 38.37, 38.969, 45.365, 50.958, 53.037, 53.51, 53.983, 59.715, 64.926, 65.25, 65.25, 65.25], 879 [38.37, 41.0655, 43.761, 46.9475, 51.092, 53.123, 53.51, 53.897, 58.44, 62.334, 63.792, 64.6668, 65.25], 880 [38.37, 38.37, 40.56633, 45.58833, 51.00267, 53.06567, 53.51, 53.95433, 59.205, 64.062, 65.25, 65.25, 65.25], 881 [38.37, 38.37, 40.766, 45.61625, 51.00825, 53.06925, 53.51, 53.95075, 59.14125, 63.954, 65.25, 65.25, 65.25], 882 ]; 883 884 void compareResults(const double[] actual, const double[] expected, string dataset, QuantileInterpolation method) 885 { 886 import std.conv : to; 887 import std.format : format; 888 import std.math : approxEqual, isNaN; 889 import std.range : lockstep; 890 891 foreach (i, actualValue, expectedValue; lockstep(actual, expected)) 892 { 893 assert(actualValue.approxEqual(expectedValue) || (actualValue.isNaN && expectedValue.isNaN), 894 format("Quantile unit test failure, dataset %s, method: %s, index: %d, expected: %g, actual: %g", 895 dataset, method.to!string, i, expectedValue, actualValue)); 896 } 897 } 898 899 foreach(methodIndex, method; EnumMembers!QuantileInterpolation) 900 { 901 compareResults(probs.map!(p => p.quantile(d1, method)).array, d1_expected[methodIndex], "d1", method); 902 compareResults(probs.map!(p => p.quantile(d2, method)).array, d2_expected[methodIndex], "d2", method); 903 compareResults(probs.map!(p => p.quantile(d3, method)).array, d3_expected[methodIndex], "d3", method); 904 compareResults(probs.map!(p => p.quantile(d3Int, method)).array, d3_expected[methodIndex], "d3Int", method); 905 compareResults(probs.map!(p => p.quantile(d4, method)).array, d4_expected[methodIndex], "d4", method); 906 compareResults(probs.map!(p => p.quantile(d4Int, method)).array, d4_expected[methodIndex], "d4Int", method); 907 compareResults(probs.map!(p => p.quantile(d5, method)).array, d5_expected[methodIndex], "d5", method); 908 compareResults(probs.map!(p => p.quantile(d5Int, method)).array, d5_expected[methodIndex], "d5Int", method); 909 compareResults(probs.map!(p => p.quantile(d6, method)).array, d6_expected[methodIndex], "d6", method); 910 compareResults(probs.map!(p => p.quantile(d6Size_t, method)).array, d6_expected[methodIndex], "d6Size_t", method); 911 compareResults(probs.map!(p => p.quantile(d7, method)).array, d7_expected[methodIndex], "d7", method); 912 compareResults(probs.map!(p => p.quantile(d7Float, method)).array, d7_expected[methodIndex], "d7Float", method); 913 compareResults(probs.map!(p => p.quantile(d8, method)).array, d8_expected[methodIndex], "d8", method); 914 compareResults(probs.map!(p => p.quantile(d8Float, method)).array, d8_expected[methodIndex], "d8Float", method); 915 compareResults(probs.map!(p => p.quantile(d9, method)).array, d9_expected[methodIndex], "d9", method); 916 compareResults(probs.map!(p => p.quantile(d9Float, method)).array, d9_expected[methodIndex], "d9Float", method); 917 compareResults(probs.map!(p => p.quantile(d10, method)).array, d10_expected[methodIndex], "d10", method); 918 compareResults(probs.map!(p => p.quantile(d10Float, method)).array, d10_expected[methodIndex], "d10Float", method); 919 compareResults(probs.map!(p => p.quantile(d11, method)).array, d11_expected[methodIndex], "d11", method); 920 compareResults(probs.map!(p => p.quantile(d12, method)).array, d12_expected[methodIndex], "d12", method); 921 compareResults(probs.map!(p => p.quantile(d13, method)).array, d13_expected[methodIndex], "d13", method); 922 } 923 }