1 /**
2 Numeric related utilities used by TSV Utilities.
3
4 Utilities in this file:
5 $(LIST
6 * [formatNumber] - An alternate print format for numbers, especially useful when
7 doubles are being used to represent integer and float values.
8
9 * [rangeMedian] - Finds the median value of a range.
10
11 * [quantile] - Generates quantile values for a data set.
12 )
13
14 Copyright (c) 2016-2021, eBay Inc.
15 Initially written by Jon Degenhardt
16
17 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt)
18 */
19
20 module tsv_utils.common.numerics;
21
22 import std.traits : isFloatingPoint, isIntegral, Unqual;
23
24 /**
25 formatNumber is an alternate way to print numbers. It is especially useful when
26 representing both integral and floating point values with float point data types.
27
28 formatNumber was created for tsv-summarize, where all calculations are done as doubles,
29 but may be integers by nature. In addition, output may be either for human consumption
30 or for additional machine processing. Integers are printed normally. Floating point is
31 printed as follows:
32 $(LIST
33 * Values that are exact integral values are printed as integers, as long as they
34 are within the range of where all integers are represented exactly by the floating
35 point type. The practical effect is to avoid switching to exponential notion.
36
37 * If the specified floatPrecision is between 0 and readablePrecisionMax, then
38 floatPrecision is used to set the significant digits following the decimal point.
39 Otherwise, it is used to set total significant digits. This does not apply to
40 really large numbers, for doubles, those larger than 2^53. Trailing zeros are
41 chopped in all cases.
42 )
43 */
44 auto formatNumber(T, size_t readablePrecisionMax = 6)(T num, const size_t floatPrecision = 12)
45 if (isFloatingPoint!T || isIntegral!T)
46 {
47 alias UT = Unqual!T;
48
49 import std.conv : to;
50 import std.format : format;
51
52 static if (isIntegral!T)
53 {
54 return format("%d", num); // The easy case.
55 }
56 else
57 {
58 static assert(isFloatingPoint!T);
59
60 static if (!is(UT == float) && !is(UT == double))
61 {
62 /* Not a double or float, but a floating point. Punt on refinements. */
63 return format("%.*g", floatPrecision, num);
64 }
65 else
66 {
67 static assert(is(UT == float) || is(UT == double));
68
69 if (floatPrecision <= readablePrecisionMax)
70 {
71 /* Print with a fixed precision beyond the decimal point (%.*f), but
72 * remove trailing zeros. Notes:
73 * - This handles integer values stored in floating point types.
74 * - Values like NaN and infinity also handled.
75 */
76 immutable str = format("%.*f", floatPrecision, num);
77 size_t trimToLength = str.length;
78
79 if (floatPrecision != 0 && str.length > floatPrecision + 1)
80 {
81 import std.ascii : isDigit;
82 assert(str.length - floatPrecision - 1 > 0);
83 size_t decimalIndex = str.length - floatPrecision - 1;
84
85 if (str[decimalIndex] == '.' && str[decimalIndex - 1].isDigit)
86 {
87 size_t lastNonZeroDigit = str.length - 1;
88 assert(decimalIndex < lastNonZeroDigit);
89 while (str[lastNonZeroDigit] == '0') lastNonZeroDigit--;
90 trimToLength = (decimalIndex < lastNonZeroDigit)
91 ? lastNonZeroDigit + 1
92 : decimalIndex;
93 }
94 }
95
96 return str[0 .. trimToLength];
97 }
98 else
99 {
100 /* Determine if the number is subject to special integer value printing.
101 * Goal is to avoid exponential notion for integer values that '%.*g'
102 * generates. Numbers within the significant digit range of floatPrecision
103 * will print as desired with '%.*g', whether there is a fractional part
104 * or not. The '%.*g' format, with exponential notation, is also used for
105 * really large numbers. "Really large" being numbers outside the range
106 * of integers exactly representable by the floating point type.
107 */
108
109 enum UT maxConsecutiveUTInteger = 2.0^^UT.mant_dig;
110 enum bool maxUTIntFitsInLong = (maxConsecutiveUTInteger <= long.max);
111
112 import std.math : fabs;
113 immutable UT absNum = num.fabs;
114
115 if (!maxUTIntFitsInLong ||
116 absNum < 10.0^^floatPrecision ||
117 absNum > maxConsecutiveUTInteger)
118 {
119 /* Within signficant digits range or very large. */
120 return format("%.*g", floatPrecision, num);
121 }
122 else
123 {
124 /* Check for integral values needing to be printed in decimal format.
125 * modf/modff are used to determine if the value has a non-zero
126 * fractional component.
127 */
128 import core.stdc.math : modf, modff;
129
130 static if (is(UT == float)) alias modfUT = modff;
131 else static if (is(UT == double)) alias modfUT = modf;
132 else static assert(0);
133
134 UT integerPart;
135
136 if (modfUT(num, &integerPart) == 0.0) return format("%d", num.to!long);
137 else return format("%.*g", floatPrecision, num);
138 }
139 }
140 }
141 }
142 assert(0);
143 }
144
145 unittest // formatNumber unit tests
146 {
147 import std.conv : to;
148 import std.format : format;
149
150 /* Integers */
151 assert(formatNumber(0) == "0");
152 assert(formatNumber(1) == "1");
153 assert(formatNumber(-1) == "-1");
154 assert(formatNumber(999) == "999");
155 assert(formatNumber(12345678912345) == "12345678912345");
156 assert(formatNumber(-12345678912345) == "-12345678912345");
157
158 size_t a1 = 10; assert(a1.formatNumber == "10");
159 const int a2 = -33234; assert(a2.formatNumber == "-33234");
160 immutable long a3 = -12345678912345; assert(a3.formatNumber == "-12345678912345");
161
162 // Specifying precision should never matter for integer values.
163 assert(formatNumber(0, 0) == "0");
164 assert(formatNumber(1, 0) == "1");
165 assert(formatNumber(-1, 0) == "-1");
166 assert(formatNumber(999, 0) == "999");
167 assert(formatNumber(12345678912345, 0) == "12345678912345");
168 assert(formatNumber(-12345678912345, 0) == "-12345678912345");
169
170 assert(formatNumber(0, 3) == "0");
171 assert(formatNumber(1, 3) == "1");
172 assert(formatNumber(-1, 3 ) == "-1");
173 assert(formatNumber(999, 3) == "999");
174 assert(formatNumber(12345678912345, 3) == "12345678912345");
175 assert(formatNumber(-12345678912345, 3) == "-12345678912345");
176
177 assert(formatNumber(0, 9) == "0");
178 assert(formatNumber(1, 9) == "1");
179 assert(formatNumber(-1, 9 ) == "-1");
180 assert(formatNumber(999, 9) == "999");
181 assert(formatNumber(12345678912345, 9) == "12345678912345");
182 assert(formatNumber(-12345678912345, 9) == "-12345678912345");
183
184 /* Doubles */
185 assert(formatNumber(0.0) == "0");
186 assert(formatNumber(0.2) == "0.2");
187 assert(formatNumber(0.123412, 0) == "0");
188 assert(formatNumber(0.123412, 1) == "0.1");
189 assert(formatNumber(0.123412, 2) == "0.12");
190 assert(formatNumber(0.123412, 5) == "0.12341");
191 assert(formatNumber(0.123412, 6) == "0.123412");
192 assert(formatNumber(0.123412, 7) == "0.123412");
193 assert(formatNumber(9.123412, 5) == "9.12341");
194 assert(formatNumber(9.123412, 6) == "9.123412");
195 assert(formatNumber(99.123412, 5) == "99.12341");
196 assert(formatNumber(99.123412, 6) == "99.123412");
197 assert(formatNumber(99.123412, 7) == "99.12341");
198 assert(formatNumber(999.123412, 0) == "999");
199 assert(formatNumber(999.123412, 1) == "999.1");
200 assert(formatNumber(999.123412, 2) == "999.12");
201 assert(formatNumber(999.123412, 3) == "999.123");
202 assert(formatNumber(999.123412, 4) == "999.1234");
203 assert(formatNumber(999.123412, 5) == "999.12341");
204 assert(formatNumber(999.123412, 6) == "999.123412");
205 assert(formatNumber(999.123412, 7) == "999.1234");
206 assert(formatNumber!(double, 9)(999.12341234, 7) == "999.1234123");
207 assert(formatNumber(9001.0) == "9001");
208 assert(formatNumber(1234567891234.0) == "1234567891234");
209 assert(formatNumber(1234567891234.0, 0) == "1234567891234");
210 assert(formatNumber(1234567891234.0, 1) == "1234567891234");
211
212 /* Test round off cases. Note: These tests will not pass on Windows 32-bit builds.
213 * Use the -m64 flag on Windows to get a 64-bit build.
214 *
215 * Note: The first test case does not generate correct results on Windows 64-bit
216 * builds prior to DMD 2.096. In DMD 2.096 the code was moved into Phobos rather
217 * than being forwared to the C library. Both formatNumber and the underlying
218 * format call are included. See: https://github.com/dlang/phobos/pull/7757.
219 */
220 version(Windows)
221 {
222 static if (__VERSION__ <= 2095)
223 {
224 // Incorrect
225 assert(format("%.*f", 0, 0.6) == "0");
226 assert(formatNumber(0.6, 0) == "0");
227 }
228 else
229 {
230 assert(format("%.*f", 0, 0.6) == "1");
231 assert(formatNumber(0.6, 0) == "1");
232 }
233 }
234 else
235 {
236 assert(format("%.*f", 0, 0.6) == "1");
237 assert(formatNumber(0.6, 0) == "1");
238 }
239
240 assert(formatNumber(0.6, 1) == "0.6");
241 assert(formatNumber(0.06, 0) == "0");
242 assert(formatNumber(0.06, 1) == "0.1");
243 assert(formatNumber(0.06, 2) == "0.06");
244 assert(formatNumber(0.06, 3) == "0.06");
245 assert(formatNumber(9.49999, 0) == "9");
246 assert(formatNumber(9.49999, 1) == "9.5");
247 assert(formatNumber(9.6, 0) == "10");
248 assert(formatNumber(9.6, 1) == "9.6");
249 assert(formatNumber(99.99, 0) == "100");
250 assert(formatNumber(99.99, 1) == "100");
251 assert(formatNumber(99.99, 2) == "99.99");
252 assert(formatNumber(9999.9996, 3) == "10000");
253 assert(formatNumber(9999.9996, 4) == "9999.9996");
254 assert(formatNumber(99999.99996, 4) == "100000");
255 assert(formatNumber(99999.99996, 5) == "99999.99996");
256 assert(formatNumber(999999.999996, 5) == "1000000");
257 assert(formatNumber(999999.999996, 6) == "999999.999996");
258
259 /* Turn off precision, the 'human readable' style.
260 * Note: Remains o if both are zero (first test). If it becomes desirable to support
261 * turning it off when for the precision equal zero case the simple extension is to
262 * allow the 'human readable' precision template parameter to be negative.
263 */
264 assert(formatNumber!(double, 0)(999.123412, 0) == "999");
265 assert(formatNumber!(double, 0)(999.123412, 1) == "1e+03");
266 assert(formatNumber!(double, 0)(999.123412, 2) == "1e+03");
267 assert(formatNumber!(double, 0)(999.123412, 3) == "999");
268 assert(formatNumber!(double, 0)(999.123412, 4) == "999.1");
269
270 // Default number printing
271 assert(formatNumber(1.2) == "1.2");
272 assert(formatNumber(12.3) == "12.3");
273 assert(formatNumber(12.34) == "12.34");
274 assert(formatNumber(123.45) == "123.45");
275 assert(formatNumber(123.456) == "123.456");
276 assert(formatNumber(1234.567) == "1234.567");
277 assert(formatNumber(1234.5678) == "1234.5678");
278 assert(formatNumber(12345.6789) == "12345.6789");
279 assert(formatNumber(12345.67891) == "12345.67891");
280 assert(formatNumber(123456.78912) == "123456.78912");
281 assert(formatNumber(123456.789123) == "123456.789123");
282 assert(formatNumber(1234567.891234) == "1234567.89123");
283 assert(formatNumber(12345678.912345) == "12345678.9123");
284 assert(formatNumber(123456789.12345) == "123456789.123");
285 assert(formatNumber(1234567891.2345) == "1234567891.23");
286 assert(formatNumber(12345678912.345) == "12345678912.3");
287 assert(formatNumber(123456789123.45) == "123456789123");
288 assert(formatNumber(1234567891234.5) == "1.23456789123e+12");
289 assert(formatNumber(12345678912345.6) == "1.23456789123e+13");
290 assert(formatNumber(123456789123456.0) == "123456789123456");
291 assert(formatNumber(0.3) == "0.3");
292 assert(formatNumber(0.03) == "0.03");
293 assert(formatNumber(0.003) == "0.003");
294 assert(formatNumber(0.0003) == "0.0003");
295 assert(formatNumber(0.00003) == "3e-05" || formatNumber(0.00003) == "3e-5");
296 assert(formatNumber(0.000003) == "3e-06" || formatNumber(0.000003) == "3e-6");
297 assert(formatNumber(0.0000003) == "3e-07" || formatNumber(0.0000003) == "3e-7");
298
299 // Large number inside and outside the contiguous integer representation range
300 double dlarge = 2.0^^(double.mant_dig - 2) - 10.0;
301 double dhuge = 2.0^^(double.mant_dig + 1) + 1000.0;
302
303 assert(dlarge.formatNumber == format("%d", dlarge.to!long));
304 assert(dhuge.formatNumber!(double) == format("%.12g", dhuge));
305
306 // Negative values - Repeat most of above tests.
307 assert(formatNumber(-0.0) == "-0" || formatNumber(-0.0) == "0");
308 assert(formatNumber(-0.2) == "-0.2");
309 assert(formatNumber(-0.123412, 0) == "-0");
310 assert(formatNumber(-0.123412, 1) == "-0.1");
311 assert(formatNumber(-0.123412, 2) == "-0.12");
312 assert(formatNumber(-0.123412, 5) == "-0.12341");
313 assert(formatNumber(-0.123412, 6) == "-0.123412");
314 assert(formatNumber(-0.123412, 7) == "-0.123412");
315 assert(formatNumber(-9.123412, 5) == "-9.12341");
316 assert(formatNumber(-9.123412, 6) == "-9.123412");
317 assert(formatNumber(-99.123412, 5) == "-99.12341");
318 assert(formatNumber(-99.123412, 6) == "-99.123412");
319 assert(formatNumber(-99.123412, 7) == "-99.12341");
320 assert(formatNumber(-999.123412, 0) == "-999");
321 assert(formatNumber(-999.123412, 1) == "-999.1");
322 assert(formatNumber(-999.123412, 2) == "-999.12");
323 assert(formatNumber(-999.123412, 3) == "-999.123");
324 assert(formatNumber(-999.123412, 4) == "-999.1234");
325 assert(formatNumber(-999.123412, 5) == "-999.12341");
326 assert(formatNumber(-999.123412, 6) == "-999.123412");
327 assert(formatNumber(-999.123412, 7) == "-999.1234");
328 assert(formatNumber!(double, 9)(-999.12341234, 7) == "-999.1234123");
329 assert(formatNumber(-9001.0) == "-9001");
330 assert(formatNumber(-1234567891234.0) == "-1234567891234");
331 assert(formatNumber(-1234567891234.0, 0) == "-1234567891234");
332 assert(formatNumber(-1234567891234.0, 1) == "-1234567891234");
333
334 /* Test round off cases with negative numbers. Note: These tests will not pass
335 * on Windows 32-bit builds. Use the -m64 flag on Windows to get a 64-bit build.
336 *
337 * Note: The first test case does not generate correct results on Windows 64-bit
338 * builds prior to DMD 2.096. In DMD 2.096 the code was moved into Phobos rather
339 * than being forwared to the C library. Both formatNumber and the underlying
340 * format call are included. See: https://github.com/dlang/phobos/pull/7757.
341 */
342 version(Windows)
343 {
344 static if (__VERSION__ <= 2095)
345 {
346 // Incorrect
347 assert(format("%.*f", 0, -0.6) == "-0");
348 assert(formatNumber(-0.6, 0) == "-0");
349 }
350 }
351 else
352 {
353 assert(format("%.*f", 0, -0.6) == "-1");
354 assert(formatNumber(-0.6, 0) == "-1");
355 }
356
357 assert(formatNumber(-0.6, 1) == "-0.6");
358 assert(formatNumber(-0.06, 0) == "-0");
359 assert(formatNumber(-0.06, 1) == "-0.1");
360 assert(formatNumber(-0.06, 2) == "-0.06");
361 assert(formatNumber(-0.06, 3) == "-0.06");
362 assert(formatNumber(-9.49999, 0) == "-9");
363 assert(formatNumber(-9.49999, 1) == "-9.5");
364 assert(formatNumber(-9.6, 0) == "-10");
365 assert(formatNumber(-9.6, 1) == "-9.6");
366 assert(formatNumber(-99.99, 0) == "-100");
367 assert(formatNumber(-99.99, 1) == "-100");
368 assert(formatNumber(-99.99, 2) == "-99.99");
369 assert(formatNumber(-9999.9996, 3) == "-10000");
370 assert(formatNumber(-9999.9996, 4) == "-9999.9996");
371 assert(formatNumber(-99999.99996, 4) == "-100000");
372 assert(formatNumber(-99999.99996, 5) == "-99999.99996");
373 assert(formatNumber(-999999.999996, 5) == "-1000000");
374 assert(formatNumber(-999999.999996, 6) == "-999999.999996");
375
376 assert(formatNumber!(double, 0)(-999.123412, 0) == "-999");
377 assert(formatNumber!(double, 0)(-999.123412, 1) == "-1e+03");
378 assert(formatNumber!(double, 0)(-999.123412, 2) == "-1e+03");
379 assert(formatNumber!(double, 0)(-999.123412, 3) == "-999");
380 assert(formatNumber!(double, 0)(-999.123412, 4) == "-999.1");
381
382 // Default number printing
383 assert(formatNumber(-1.2) == "-1.2");
384 assert(formatNumber(-12.3) == "-12.3");
385 assert(formatNumber(-12.34) == "-12.34");
386 assert(formatNumber(-123.45) == "-123.45");
387 assert(formatNumber(-123.456) == "-123.456");
388 assert(formatNumber(-1234.567) == "-1234.567");
389 assert(formatNumber(-1234.5678) == "-1234.5678");
390 assert(formatNumber(-12345.6789) == "-12345.6789");
391 assert(formatNumber(-12345.67891) == "-12345.67891");
392 assert(formatNumber(-123456.78912) == "-123456.78912");
393 assert(formatNumber(-123456.789123) == "-123456.789123");
394 assert(formatNumber(-1234567.891234) == "-1234567.89123");
395
396 assert(formatNumber(-12345678.912345) == "-12345678.9123");
397 assert(formatNumber(-123456789.12345) == "-123456789.123");
398 assert(formatNumber(-1234567891.2345) == "-1234567891.23");
399 assert(formatNumber(-12345678912.345) == "-12345678912.3");
400 assert(formatNumber(-123456789123.45) == "-123456789123");
401 assert(formatNumber(-1234567891234.5) == "-1.23456789123e+12");
402 assert(formatNumber(-12345678912345.6) == "-1.23456789123e+13");
403 assert(formatNumber(-123456789123456.0) == "-123456789123456");
404
405 assert(formatNumber(-0.3) == "-0.3");
406 assert(formatNumber(-0.03) == "-0.03");
407 assert(formatNumber(-0.003) == "-0.003");
408 assert(formatNumber(-0.0003) == "-0.0003");
409 assert(formatNumber(-0.00003) == "-3e-05" || formatNumber(-0.00003) == "-3e-5");
410 assert(formatNumber(-0.000003) == "-3e-06" || formatNumber(-0.000003) == "-3e-6");
411 assert(formatNumber(-0.0000003) == "-3e-07" || formatNumber(-0.0000003) == "-3e-7");
412
413 const double dlargeNeg = -2.0^^(double.mant_dig - 2) + 10.0;
414 immutable double dhugeNeg = -2.0^^(double.mant_dig + 1) - 1000.0;
415
416 assert(dlargeNeg.formatNumber == format("%d", dlargeNeg.to!long));
417 assert(dhugeNeg.formatNumber!(double) == format("%.12g", dhugeNeg));
418
419 // Type qualifiers
420 const double b1 = 0.0; assert(formatNumber(b1) == "0");
421 const double b2 = 0.2; assert(formatNumber(b2) == "0.2");
422 const double b3 = 0.123412; assert(formatNumber(b3, 2) == "0.12");
423 immutable double b4 = 99.123412; assert(formatNumber(b4, 5) == "99.12341");
424 immutable double b5 = 99.123412; assert(formatNumber(b5, 7) == "99.12341");
425
426 // Special values
427 assert(formatNumber(double.nan) == "nan");
428 assert(formatNumber(double.nan, 0) == "nan");
429 assert(formatNumber(double.nan, 1) == "nan");
430 assert(formatNumber(double.nan, 9) == "nan");
431 assert(formatNumber(double.infinity) == "inf");
432 assert(formatNumber(double.infinity, 0) == "inf");
433 assert(formatNumber(double.infinity, 1) == "inf");
434 assert(formatNumber(double.infinity, 9) == "inf");
435
436 // Float. Mix negative and type qualifiers in.
437 assert(formatNumber(0.0f) == "0");
438 assert(formatNumber(0.5f) == "0.5");
439 assert(formatNumber(0.123412f, 0) == "0");
440 assert(formatNumber(0.123412f, 1) == "0.1");
441 assert(formatNumber(-0.123412f, 2) == "-0.12");
442 assert(formatNumber(9.123412f, 5) == "9.12341");
443 assert(formatNumber(9.123412f, 6) == "9.123412");
444 assert(formatNumber(-99.123412f, 5) == "-99.12341");
445 assert(formatNumber(99.123412f, 7) == "99.12341");
446 assert(formatNumber(-999.123412f, 5) == "-999.12341");
447
448 float c1 = 999.123412f; assert(formatNumber(c1, 7) == "999.1234");
449 float c2 = 999.1234f; assert(formatNumber!(float, 9)(c2, 3) == "999.123");
450 const float c3 = 9001.0f; assert(formatNumber(c3) == "9001");
451 const float c4 = -12345678.0f; assert(formatNumber(c4) == "-12345678");
452 immutable float c5 = 12345678.0f; assert(formatNumber(c5, 0) == "12345678");
453 immutable float c6 = 12345678.0f; assert(formatNumber(c6, 1) == "12345678");
454
455 float flarge = 2.0^^(float.mant_dig - 2) - 10.0;
456 float fhuge = 2.0^^(float.mant_dig + 1) + 1000.0;
457
458 assert(flarge.formatNumber == format("%d", flarge.to!long));
459 assert(fhuge.formatNumber!(float, 12) == format("%.12g", fhuge));
460
461 // Reals - No special formatting
462 real d1 = 2.0^^(double.mant_dig) - 1000.0; assert(formatNumber(d1) == format("%.12g", d1));
463 real d2 = 123456789.12341234L; assert(formatNumber(d2, 12) == format("%.12g", d2));
464 }
465
466 /**
467 rangeMedian. Finds the median. Modifies the range via topN or sort in the process.
468
469 Note: topN is the preferred algorithm, but the version prior to Phobos 2.073
470 is pathologically slow on certain data sets. Use topN in 2.073 and later,
471 sort in earlier versions.
472
473 See: https://issues.dlang.org/show_bug.cgi?id=16517
474 https://github.com/dlang/phobos/pull/4815
475 http://forum.dlang.org/post/ujuugklmbibuheptdwcn@forum.dlang.org
476 */
477 static if (__VERSION__ >= 2073)
478 {
479 version = rangeMedianViaTopN;
480 }
481 else
482 {
483 version = rangeMedianViaSort;
484 }
485
486 auto rangeMedian (Range) (Range r)
487 if (isRandomAccessRange!Range && hasLength!Range && hasSlicing!Range)
488 {
489 version(rangeMedianViaSort)
490 {
491 version(rangeMedianViaTopN)
492 {
493 assert(0, "Both rangeMedianViaSort and rangeMedianViaTopN assigned as versions. Assign only one.");
494 }
495 }
496 else version(rangeMedianViaTopN)
497 {
498 }
499 else
500 {
501 static assert(0, "A version of rangeMedianViaSort or rangeMedianViaTopN must be assigned.");
502 }
503
504 import std.traits : isFloatingPoint;
505
506 ElementType!Range median;
507
508 if (r.length > 0)
509 {
510 size_t medianIndex = r.length / 2;
511
512 version(rangeMedianViaSort)
513 {
514 import std.algorithm : sort;
515 sort(r);
516 median = r[medianIndex];
517
518 static if (isFloatingPoint!(ElementType!Range))
519 {
520 if (r.length % 2 == 0)
521 {
522 /* Even number of values. Split the difference. */
523 median = (median + r[medianIndex - 1]) / 2.0;
524 }
525 }
526 }
527 else version(rangeMedianViaTopN)
528 {
529 import std.algorithm : maxElement, topN;
530 topN(r, medianIndex);
531 median = r[medianIndex];
532
533 static if (isFloatingPoint!(ElementType!Range))
534 {
535 if (r.length % 2 == 0)
536 {
537 /* Even number of values. Split the difference. */
538 if (r[medianIndex - 1] < median)
539 {
540 median = (median + r[0..medianIndex].maxElement) / 2.0;
541 }
542 }
543 }
544 }
545 else
546 {
547 static assert(0, "A version of rangeMedianViaSort or rangeMedianViaTopN must be assigned.");
548 }
549 }
550
551 return median;
552 }
553
554 /* rangeMedian unit tests. */
555 @safe unittest
556 {
557 import std.math : isNaN;
558 import std.algorithm : all, permutations;
559
560 // Median of empty range is (type).init. Zero for int, nan for floats/doubles
561 assert(rangeMedian(new int[0]) == int.init);
562 assert(rangeMedian(new double[0]).isNaN && double.init.isNaN);
563 assert(rangeMedian(new string[0]) == "");
564
565 assert(rangeMedian([3]) == 3);
566 assert(rangeMedian([3.0]) == 3.0);
567 assert(rangeMedian([3.5]) == 3.5);
568 assert(rangeMedian(["aaa"]) == "aaa");
569
570 /* Even number of elements: Split the difference for floating point, but not other types. */
571 assert(rangeMedian([3, 4]) == 4);
572 assert(rangeMedian([3.0, 4.0]) == 3.5);
573
574 assert(rangeMedian([3, 6, 12]) == 6);
575 assert(rangeMedian([3.0, 6.5, 12.5]) == 6.5);
576
577 // Do the rest with permutations
578 assert([4, 7].permutations.all!(x => (x.rangeMedian == 7)));
579 assert([4.0, 7.0].permutations.all!(x => (x.rangeMedian == 5.5)));
580 assert(["aaa", "bbb"].permutations.all!(x => (x.rangeMedian == "bbb")));
581
582 assert([4, 7, 19].permutations.all!(x => (x.rangeMedian == 7)));
583 assert([4.5, 7.5, 19.5].permutations.all!(x => (x.rangeMedian == 7.5)));
584 assert(["aaa", "bbb", "ccc"].permutations.all!(x => (x.rangeMedian == "bbb")));
585
586 assert([4.5, 7.5, 19.5, 21.0].permutations.all!(x => (x.rangeMedian == 13.5)));
587 assert([4.5, 7.5, 19.5, 20.5, 36.0].permutations.all!(x => (x.rangeMedian == 19.5)));
588 assert([4.5, 7.5, 19.5, 24.0, 24.5, 25.0].permutations.all!(x => (x.rangeMedian == 21.75)));
589 assert([1.5, 3.25, 3.55, 4.5, 24.5, 25.0, 25.6].permutations.all!(x => (x.rangeMedian == 4.5)));
590 }
591
592 /// Quantiles
593
594 /** The different quantile interpolation methods.
595 * See: https://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html
596 */
597 enum QuantileInterpolation
598 {
599 R1 = 1, /// R quantile type 1
600 R2 = 2, /// R quantile type 2
601 R3 = 3, /// R quantile type 3
602 R4 = 4, /// R quantile type 4
603 R5 = 5, /// R quantile type 5
604 R6 = 6, /// R quantile type 6
605 R7 = 7, /// R quantile type 7
606 R8 = 8, /// R quantile type 8
607 R9 = 9, /// R quantile type 9
608 }
609
610
611 import std.traits : isFloatingPoint, isNumeric, Unqual;
612 import std.range;
613
614 /**
615 Returns the quantile in a data vector for a cumulative probability.
616
617 Takes a data vector and a probability and returns the quantile cut point for the
618 probability. The vector must be sorted and the probability in the range [0.0, 1.0].
619 The interpolation methods available are the same as in R and available in a number
620 of statistical packages. See the R documentation or wikipedia for details
621 (https://en.wikipedia.org/wiki/Quantile).
622
623 Examples:
624 ----
625 double data = [22, 57, 73, 97, 113];
626 double median = quantile(0.5, data); // 73
627 auto q1 = [0.25, 0.5, 0.75].map!(p => p.quantile(data)); // 57, 73, 97
628 auto q2 = [0.25, 0.5, 0.75].map!(p => p.quantile(data), QuantileInterpolation.R8); //45.3333, 73, 102.333
629 ----
630 */
631 double quantile(ProbType, Range)
632 (const ProbType prob, Range data, QuantileInterpolation method = QuantileInterpolation.R7)
633 if (isRandomAccessRange!Range && hasLength!Range && isNumeric!(ElementType!Range) &&
634 isFloatingPoint!ProbType)
635 in
636 {
637 import std.algorithm : isSorted;
638 assert(0.0 <= prob && prob <= 1.0);
639 assert(method >= QuantileInterpolation.min && method <= QuantileInterpolation.max);
640 assert(data.isSorted);
641 }
642 do
643 {
644 import core.stdc.math : modf;
645 import std.algorithm : max, min;
646 import std.conv : to;
647 import std.math : ceil, lrint;
648
649 /* Note: In the implementation below, 'h1' is the 1-based index into the data vector.
650 * This follows the wikipedia notation for the interpolation methods. One will be
651 * subtracted before the vector is accessed.
652 */
653
654 double q = double.nan; // The return value.
655
656 if (data.length == 1) q = data[0].to!double;
657 else if (data.length > 1)
658 {
659 if (method == QuantileInterpolation.R1)
660 {
661 q = data[((data.length * prob).ceil - 1.0).to!long.max(0).to!size_t].to!double;
662 }
663 else if (method == QuantileInterpolation.R2)
664 {
665 immutable double h1 = data.length * prob + 0.5;
666 immutable size_t lo = ((h1 - 0.5).ceil.to!long - 1).max(0).to!size_t;
667 immutable size_t hi = ((h1 + 0.5).to!size_t - 1).min(data.length - 1);
668 q = (data[lo].to!double + data[hi].to!double) / 2.0;
669 }
670 else if (method == QuantileInterpolation.R3)
671 {
672 /* Implementation notes:
673 * - R3 uses 'banker's rounding', where 0.5 is rounded to the nearest even
674 * value. The 'lrint' routine does this.
675 * - DMD will sometimes choose the incorrect 0.5 rounding if the calculation
676 * is done as a single step. The separate calculation of 'h1' avoids this.
677 */
678 immutable double h1 = data.length * prob;
679 q = data[h1.lrint.max(1).to!size_t - 1].to!double;
680 }
681 else if ((method == QuantileInterpolation.R4) ||
682 (method == QuantileInterpolation.R5) ||
683 (method == QuantileInterpolation.R6) ||
684 (method == QuantileInterpolation.R7) ||
685 (method == QuantileInterpolation.R8) ||
686 (method == QuantileInterpolation.R9))
687 {
688 /* Methods 4-9 have different formulas for generating the real-valued index,
689 * but work the same after that, choosing the final value by linear interpolation.
690 */
691 double h1;
692 switch (method)
693 {
694 case QuantileInterpolation.R4: h1 = data.length * prob; break;
695 case QuantileInterpolation.R5: h1 = data.length * prob + 0.5; break;
696 case QuantileInterpolation.R6: h1 = (data.length + 1) * prob; break;
697 case QuantileInterpolation.R7: h1 = (data.length - 1) * prob + 1.0; break;
698 case QuantileInterpolation.R8: h1 = (data.length.to!double + 1.0/3.0) * prob + 1.0/3.0; break;
699 case QuantileInterpolation.R9: h1 = (data.length + 0.25) * prob + 3.0/8.0; break;
700 default: assert(0);
701 }
702
703 double h1IntegerPart;
704 immutable double h1FractionPart = modf(h1, &h1IntegerPart);
705 immutable size_t lo = (h1IntegerPart - 1.0).to!long.max(0).min(data.length - 1).to!size_t;
706 q = data[lo];
707 if (h1FractionPart > 0.0)
708 {
709 immutable size_t hi = h1IntegerPart.to!long.min(data.length - 1).to!size_t;
710 q += h1FractionPart * (data[hi].to!double - data[lo].to!double);
711 }
712 }
713 else assert(0);
714 }
715 return q;
716 }
717
718 unittest
719 {
720 import std.algorithm : equal, map;
721 import std.array : array;
722 import std.traits : EnumMembers;
723
724 /* A couple simple tests. */
725 assert(quantile(0.5, [22, 57, 73, 97, 113]) == 73);
726 assert(quantile(0.5, [22.5, 57.5, 73.5, 97.5, 113.5]) == 73.5);
727 assert([0.25, 0.5, 0.75].map!(p => p.quantile([22, 57, 73, 97, 113])).array == [57.0, 73.0, 97.0]);
728 assert([0.25, 0.5, 0.75].map!(p => p.quantile([22, 57, 73, 97, 113], QuantileInterpolation.R1)).array == [57.0, 73.0, 97.0]);
729
730 /* Data arrays. */
731 double[] d1 = [];
732 double[] d2 = [5.5];
733 double[] d3 = [0.0, 1.0];
734 double[] d4 = [-1.0, 1.0];
735 double[] d5 = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
736 double[] d6 = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0];
737 double[] d7 = [ 31.79, 64.19, 81.77];
738 double[] d8 = [-94.43, -74.55, -50.81, 27.45, 78.79];
739 double[] d9 = [-89.17, 20.93, 38.51, 48.03, 76.43, 77.02];
740 double[] d10 = [-99.53, -76.87, -76.69, -67.81, -40.26, -11.29, 21.02];
741 double[] d11 = [-78.32, -52.22, -50.86, 13.45, 15.96, 17.25, 46.35, 85.00];
742 double[] d12 = [-81.36, -70.87, -53.56, -42.14, -9.18, 7.23, 49.52, 80.43, 98.50];
743 double[] d13 = [ 38.37, 44.36, 45.70, 50.69, 51.36, 55.66, 56.91, 58.95, 62.01, 65.25];
744
745 /* Spot check a few other data types. Same expected outputs.*/
746 int[] d3Int = [0, 1];
747 int[] d4Int = [-1, 1];
748 int[] d5Int = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
749 size_t[] d6Size_t = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
750 float[] d7Float = [ 31.79f, 64.19f, 81.77f];
751 float[] d8Float = [-94.43f, -74.55f, -50.81f, 27.45f, 78.79f];
752 float[] d9Float = [-89.17f, 20.93f, 38.51f, 48.03f, 76.43f, 77.02f];
753 float[] d10Float = [-99.53f, -76.87f, -76.69f, -67.81f, -40.26f, -11.29f, 21.02f];
754
755 /* Probability values. */
756 double[] probs = [0.0, 0.05, 0.1, 0.25, 0.4, 0.49, 0.5, 0.51, 0.75, 0.9, 0.95, 0.98, 1.0];
757
758 /* Expected values for each data array, for 'probs'. One expected result for each of the nine methods.
759 * The expected values were generated by R and Octave.
760 */
761 double[13][9] d1_expected; // All values double.nan, the default.
762 double[13][9] d2_expected = [
763 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5],
764 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5],
765 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5],
766 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5],
767 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5],
768 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5],
769 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5],
770 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5],
771 [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5],
772 ];
773 double[13][9] d3_expected = [
774 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
775 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
776 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0],
777 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02, 0.5, 0.8, 0.9, 0.96, 1.0],
778 [0.0, 0.0, 0.0, 0.0, 0.3, 0.48, 0.5, 0.52, 1.0, 1.0, 1.0, 1.0, 1.0],
779 [0.0, 0.0, 0.0, 0.0, 0.2, 0.47, 0.5, 0.53, 1.0, 1.0, 1.0, 1.0, 1.0],
780 [0.0, 0.05, 0.1, 0.25, 0.4, 0.49, 0.5, 0.51, 0.75, 0.9, 0.95, 0.98, 1.0],
781 [0.0, 0.0, 0.0, 0.0, 0.2666667, 0.4766667, 0.5, 0.5233333, 1.0, 1.0, 1.0, 1.0, 1.0],
782 [0.0, 0.0, 0.0, 0.0, 0.275, 0.4775, 0.5, 0.5225, 1.0, 1.0, 1.0, 1.0, 1.0],
783 ];
784 double[13][9] d4_expected = [
785 [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
786 [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
787 [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
788 [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -0.96, 0.0, 0.6, 0.8, 0.92, 1.0],
789 [-1.0, -1.0, -1.0, -1.0, -0.4, -0.04, 0.0, 0.04, 1.0, 1.0, 1.0, 1.0, 1.0],
790 [-1.0, -1.0, -1.0, -1.0, -0.6, -0.06, 0.0, 0.06, 1.0, 1.0, 1.0, 1.0, 1.0],
791 [-1.0, -0.9, -0.8, -0.5, -0.2, -0.02, 0.0, 0.02, 0.5, 0.8, 0.9, 0.96, 1.0],
792 [-1.0, -1.0, -1.0, -1.0, -0.4666667, -0.04666667, -4.440892e-16, 0.04666667, 1.0, 1.0, 1.0, 1.0, 1.0],
793 [-1.0, -1.0, -1.0, -1.0, -0.45, -0.045, 0.0, 0.045, 1.0, 1.0, 1.0, 1.0, 1.0],
794 ];
795 double[13][9] d5_expected = [
796 [0.0, 0.0, 1.0, 2.0, 4.0, 5.0, 5.0, 5.0, 8.0, 9.0, 10.0, 10.0, 10.0],
797 [0.0, 0.0, 1.0, 2.0, 4.0, 5.0, 5.0, 5.0, 8.0, 9.0, 10.0, 10.0, 10.0],
798 [0.0, 0.0, 0.0, 2.0, 3.0, 4.0, 5.0, 5.0, 7.0, 9.0, 9.0, 10.0, 10.0],
799 [0.0, 0.0, 0.1, 1.75, 3.4, 4.39, 4.5, 4.61, 7.25, 8.9, 9.45, 9.78, 10.0],
800 [0.0, 0.05, 0.6, 2.25, 3.9, 4.89, 5.0, 5.11, 7.75, 9.4, 9.95, 10.0, 10.0],
801 [0.0, 0.0, 0.2, 2.0, 3.8, 4.88, 5.0, 5.12, 8.0, 9.8, 10.0, 10.0, 10.0],
802 [0.0, 0.5, 1.0, 2.5, 4.0, 4.9, 5.0, 5.1, 7.5, 9.0, 9.5, 9.8, 10.0],
803 [0.0, 0.0, 0.4666667, 2.166667, 3.866667, 4.886667, 5.0, 5.113333, 7.833333, 9.533333, 10.0, 10.0, 10.0],
804 [0.0, 0.0, 0.5, 2.1875, 3.875, 4.8875, 5.0, 5.1125, 7.8125, 9.5, 10.0, 10.0, 10.0],
805 ];
806 double[13][9] d6_expected = [
807 [0.0, 0.0, 1.0, 2.0, 4.0, 5.0, 5.0, 6.0, 8.0, 10.0, 11.0, 11.0, 11.0],
808 [0.0, 0.0, 1.0, 2.5, 4.0, 5.0, 5.5, 6.0, 8.5, 10.0, 11.0, 11.0, 11.0],
809 [0.0, 0.0, 0.0, 2.0, 4.0, 5.0, 5.0, 5.0, 8.0, 10.0, 10.0, 11.0, 11.0],
810 [0.0, 0.0, 0.2, 2.0, 3.8, 4.88, 5.0, 5.12, 8.0, 9.8, 10.4, 10.76, 11.0],
811 [0.0, 0.1, 0.7, 2.5, 4.3, 5.38, 5.5, 5.62, 8.5, 10.3, 10.9, 11.0, 11.0],
812 [0.0, 0.0, 0.3, 2.25, 4.2, 5.37, 5.5, 5.63, 8.75, 10.7, 11.0, 11.0, 11.0],
813 [0.0, 0.55, 1.1, 2.75, 4.4, 5.39, 5.5, 5.61, 8.25, 9.9, 10.45, 10.78, 11.0],
814 [0.0, 0.0, 0.5666667, 2.416667, 4.266667, 5.376667, 5.5, 5.623333, 8.583333, 10.43333, 11.0, 11.0, 11.0],
815 [0.0, 0.0, 0.6, 2.4375, 4.275, 5.3775, 5.5, 5.6225, 8.5625, 10.4, 11.0, 11.0, 11.0],
816 ];
817 double[13][9] d7_expected = [
818 [31.79, 31.79, 31.79, 31.79, 64.19, 64.19, 64.19, 64.19, 81.77, 81.77, 81.77, 81.77, 81.77],
819 [31.79, 31.79, 31.79, 31.79, 64.19, 64.19, 64.19, 64.19, 81.77, 81.77, 81.77, 81.77, 81.77],
820 [31.79, 31.79, 31.79, 31.79, 31.79, 31.79, 64.19, 64.19, 64.19, 81.77, 81.77, 81.77, 81.77],
821 [31.79, 31.79, 31.79, 31.79, 38.27, 47.018, 47.99, 48.962, 68.585, 76.496, 79.133, 80.7152, 81.77],
822 [31.79, 31.79, 31.79, 39.89, 54.47, 63.218, 64.19, 64.7174, 77.375, 81.77, 81.77, 81.77, 81.77],
823 [31.79, 31.79, 31.79, 31.79, 51.23, 62.894, 64.19, 64.8932, 81.77, 81.77, 81.77, 81.77, 81.77],
824 [31.79, 35.03, 38.27, 47.99, 57.71, 63.542, 64.19, 64.5416, 72.98, 78.254, 80.012, 81.0668, 81.77],
825 [31.79, 31.79, 31.79, 37.19, 53.39, 63.11, 64.19, 64.776, 78.84, 81.77, 81.77, 81.77, 81.77],
826 [31.79, 31.79, 31.79, 37.865, 53.66, 63.137, 64.19, 64.76135, 78.47375, 81.77, 81.77, 81.77, 81.77],
827 ];
828 double[13][9] d8_expected = [
829 [-94.43, -94.43, -94.43, -74.55, -74.55, -50.81, -50.81, -50.81, 27.45, 78.79, 78.79, 78.79, 78.79],
830 [-94.43, -94.43, -94.43, -74.55, -62.68, -50.81, -50.81, -50.81, 27.45, 78.79, 78.79, 78.79, 78.79],
831 [-94.43, -94.43, -94.43, -94.43, -74.55, -74.55, -74.55, -50.81, 27.45, 27.45, 78.79, 78.79, 78.79],
832 [-94.43, -94.43, -94.43, -89.46, -74.55, -63.867, -62.68, -61.493, 7.885, 53.12, 65.955, 73.656, 78.79],
833 [-94.43, -94.43, -94.43, -79.52, -62.68, -51.997, -50.81, -46.897, 40.285, 78.79, 78.79, 78.79, 78.79],
834 [-94.43, -94.43, -94.43, -84.49, -65.054, -52.2344, -50.81, -46.1144, 53.12, 78.79, 78.79, 78.79, 78.79],
835 [-94.43, -90.454, -86.478, -74.55, -60.306, -51.7596, -50.81, -47.6796, 27.45, 58.254, 68.522, 74.6828, 78.79],
836 [-94.43, -94.43, -94.43, -81.17667, -63.47133, -52.07613, -50.81, -46.63613, 44.56333, 78.79, 78.79, 78.79, 78.79],
837 [-94.43, -94.43, -94.43, -80.7625, -63.2735, -52.05635, -50.81, -46.70135, 43.49375, 78.79, 78.79, 78.79, 78.79],
838 ];
839 double[13][9] d9_expected = [
840 [-89.17, -89.17, -89.17, 20.93, 38.51, 38.51, 38.51, 48.03, 76.43, 77.02, 77.02, 77.02, 77.02],
841 [-89.17, -89.17, -89.17, 20.93, 38.51, 38.51, 43.27, 48.03, 76.43, 77.02, 77.02, 77.02, 77.02],
842 [-89.17, -89.17, -89.17, 20.93, 20.93, 38.51, 38.51, 38.51, 48.03, 76.43, 77.02, 77.02, 77.02],
843 [-89.17, -89.17, -89.17, -34.12, 27.962, 37.4552, 38.51, 39.0812, 62.23, 76.666, 76.843, 76.9492, 77.02],
844 [-89.17, -89.17, -78.16, 20.93, 36.752, 42.6988, 43.27, 43.8412, 76.43, 76.961, 77.02, 77.02, 77.02],
845 [-89.17, -89.17, -89.17, -6.595, 34.994, 42.6036, 43.27, 43.9364, 76.5775, 77.02, 77.02, 77.02, 77.02],
846 [-89.17, -61.645, -34.12, 25.325, 38.51, 42.794, 43.27, 43.746, 69.33, 76.725, 76.8725, 76.961, 77.02],
847 [-89.17, -89.17, -89.17, 11.755, 36.166, 42.66707, 43.27, 43.87293, 76.47917, 77.02, 77.02, 77.02, 77.02],
848 [-89.17, -89.17, -89.17, 14.04875, 36.3125, 42.675, 43.27, 43.865, 76.46688, 77.02, 77.02, 77.02, 77.02],
849 ];
850 double[13][9] d10_expected = [
851 [-99.53, -99.53, -99.53, -76.87, -76.69, -67.81, -67.81, -67.81, -11.29, 21.02, 21.02, 21.02, 21.02],
852 [-99.53, -99.53, -99.53, -76.87, -76.69, -67.81, -67.81, -67.81, -11.29, 21.02, 21.02, 21.02, 21.02],
853 [-99.53, -99.53, -99.53, -76.87, -76.69, -76.69, -67.81, -67.81, -40.26, -11.29, 21.02, 21.02, 21.02],
854 [-99.53, -99.53, -99.53, -82.535, -76.726, -72.8716, -72.25, -71.6284, -33.0175, -1.597, 9.7115, 16.4966, 21.02],
855 [-99.53, -99.53, -94.998, -76.825, -74.026, -68.4316, -67.81, -65.8815, -18.5325, 14.558, 21.02, 21.02, 21.02],
856 [-99.53, -99.53, -99.53, -76.87, -74.914, -68.5204, -67.81, -65.606, -11.29, 21.02, 21.02, 21.02, 21.02],
857 [-99.53, -92.732, -85.934, -76.78, -73.138, -68.3428, -67.81, -66.157, -25.775, 1.634, 11.327, 17.1428, 21.02],
858 [-99.53, -99.53, -98.01933, -76.84, -74.322, -68.4612, -67.81, -65.78967, -16.11833, 18.866, 21.02, 21.02, 21.02],
859 [-99.53, -99.53, -97.264, -76.83625, -74.248, -68.4538, -67.81, -65.81263, -16.72187, 17.789, 21.02, 21.02, 21.02],
860 ];
861 double[13][9] d11_expected = [
862 [-78.32, -78.32, -78.32, -52.22, 13.45, 13.45, 13.45, 15.96, 17.25, 85.0, 85.0, 85.0, 85.0],
863 [-78.32, -78.32, -78.32, -51.54, 13.45, 13.45, 14.705, 15.96, 31.8, 85.0, 85.0, 85.0, 85.0],
864 [-78.32, -78.32, -78.32, -52.22, -50.86, 13.45, 13.45, 13.45, 17.25, 46.35, 85.0, 85.0, 85.0],
865 [-78.32, -78.32, -78.32, -52.22, -37.998, 8.3052, 13.45, 13.6508, 17.25, 54.08, 69.54, 78.816, 85.0],
866 [-78.32, -78.32, -70.49, -51.54, -5.843, 14.5042, 14.705, 14.9058, 31.8, 73.405, 85.0, 85.0, 85.0],
867 [-78.32, -78.32, -78.32, -51.88, -12.274, 14.4791, 14.705, 14.9309, 39.075, 85.0, 85.0, 85.0, 85.0],
868 [-78.32, -69.185, -60.05, -51.2, 0.588, 14.5293, 14.705, 14.8807, 24.525, 57.945, 71.4725, 79.589, 85.0],
869 [-78.32, -78.32, -73.97, -51.65333, -7.986667, 14.49583, 14.705, 14.91417, 34.225, 78.55833, 85.0, 85.0, 85.0],
870 [-78.32, -78.32, -73.1, -51.625, -7.45075, 14.49792, 14.705, 14.91208, 33.61875, 77.27, 85.0, 85.0, 85.0],
871 ];
872 double[13][9] d12_expected = [
873 [-81.36, -81.36, -81.36, -53.56, -42.14, -9.18, -9.18, -9.18, 49.52, 98.5, 98.5, 98.5, 98.5],
874 [-81.36, -81.36, -81.36, -53.56, -42.14, -9.18, -9.18, -9.18, 49.52, 98.5, 98.5, 98.5, 98.5],
875 [-81.36, -81.36, -81.36, -70.87, -42.14, -42.14, -42.14, -9.18, 49.52, 80.43, 98.5, 98.5, 98.5],
876 [-81.36, -81.36, -81.36, -66.5425, -46.708, -28.6264, -25.66, -22.6936, 38.9475, 82.237, 90.3685, 95.2474, 98.5],
877 [-81.36, -81.36, -77.164, -57.8875, -38.844, -12.1464, -9.18, -7.7031, 57.2475, 91.272, 98.5, 98.5, 98.5],
878 [-81.36, -81.36, -81.36, -62.215, -42.14, -12.476, -9.18, -7.539, 64.975, 98.5, 98.5, 98.5, 98.5],
879 [-81.36, -77.164, -72.968, -53.56, -35.548, -11.8168, -9.18, -7.8672, 49.52, 84.044, 91.272, 95.6088, 98.5],
880 [-81.36, -81.36, -78.56267, -59.33, -39.94267, -12.25627, -9.18, -7.6484, 59.82333, 93.68133, 98.5, 98.5, 98.5],
881 [-81.36, -81.36, -78.213, -58.96938, -39.668, -12.2288, -9.18, -7.662075, 59.17938, 93.079, 98.5, 98.5, 98.5],
882 ];
883 double[13][9] d13_expected = [
884 [38.37, 38.37, 38.37, 45.7, 50.69, 51.36, 51.36, 55.66, 58.95, 62.01, 65.25, 65.25, 65.25],
885 [38.37, 38.37, 41.365, 45.7, 51.025, 51.36, 53.51, 55.66, 58.95, 63.63, 65.25, 65.25, 65.25],
886 [38.37, 38.37, 38.37, 44.36, 50.69, 51.36, 51.36, 51.36, 58.95, 62.01, 65.25, 65.25, 65.25],
887 [38.37, 38.37, 38.37, 45.03, 50.69, 51.293, 51.36, 51.79, 57.93, 62.01, 63.63, 64.602, 65.25],
888 [38.37, 38.37, 41.365, 45.7, 51.025, 53.08, 53.51, 53.94, 58.95, 63.63, 65.25, 65.25, 65.25],
889 [38.37, 38.37, 38.969, 45.365, 50.958, 53.037, 53.51, 53.983, 59.715, 64.926, 65.25, 65.25, 65.25],
890 [38.37, 41.0655, 43.761, 46.9475, 51.092, 53.123, 53.51, 53.897, 58.44, 62.334, 63.792, 64.6668, 65.25],
891 [38.37, 38.37, 40.56633, 45.58833, 51.00267, 53.06567, 53.51, 53.95433, 59.205, 64.062, 65.25, 65.25, 65.25],
892 [38.37, 38.37, 40.766, 45.61625, 51.00825, 53.06925, 53.51, 53.95075, 59.14125, 63.954, 65.25, 65.25, 65.25],
893 ];
894
895 void compareResults(const double[] actual, const double[] expected, string dataset, QuantileInterpolation method)
896 {
897 import std.conv : to;
898 import std.format : format;
899 import std.math : isNaN;
900 import std.range : lockstep;
901
902 // Expected results are from printed representations (from R and Octave) and
903 // are not at double precision accuracy. Use a relaxed equality test at more
904 // expected float precision. Eventually need more precise expected values.
905 // Some tests make double-float-double round-trips. These need float precision.
906
907 double maxRelDiff = 4.0 * float.epsilon.to!double;
908 double maxAbsDiff = 2.0 * double.epsilon;
909
910 foreach (i, actualValue, expectedValue; lockstep(actual, expected))
911 {
912 assert(nearEqual!(double, Yes.areNaNsEqual)(actualValue, expectedValue, maxRelDiff, maxAbsDiff),
913 format("Quantile unit test failure, dataset %s, method: %s, index: %d, expected: %.32g, actual: %.32g",
914 dataset, method.to!string, i, expectedValue, actualValue));
915 }
916 }
917
918 foreach(methodIndex, method; EnumMembers!QuantileInterpolation)
919 {
920 compareResults(probs.map!(p => p.quantile(d1, method)).array, d1_expected[methodIndex], "d1", method);
921 compareResults(probs.map!(p => p.quantile(d2, method)).array, d2_expected[methodIndex], "d2", method);
922 compareResults(probs.map!(p => p.quantile(d3, method)).array, d3_expected[methodIndex], "d3", method);
923 compareResults(probs.map!(p => p.quantile(d3Int, method)).array, d3_expected[methodIndex], "d3Int", method);
924 compareResults(probs.map!(p => p.quantile(d4, method)).array, d4_expected[methodIndex], "d4", method);
925 compareResults(probs.map!(p => p.quantile(d4Int, method)).array, d4_expected[methodIndex], "d4Int", method);
926 compareResults(probs.map!(p => p.quantile(d5, method)).array, d5_expected[methodIndex], "d5", method);
927 compareResults(probs.map!(p => p.quantile(d5Int, method)).array, d5_expected[methodIndex], "d5Int", method);
928 compareResults(probs.map!(p => p.quantile(d6, method)).array, d6_expected[methodIndex], "d6", method);
929 compareResults(probs.map!(p => p.quantile(d6Size_t, method)).array, d6_expected[methodIndex], "d6Size_t", method);
930 compareResults(probs.map!(p => p.quantile(d7, method)).array, d7_expected[methodIndex], "d7", method);
931 compareResults(probs.map!(p => p.quantile(d7Float, method)).array, d7_expected[methodIndex], "d7Float", method);
932 compareResults(probs.map!(p => p.quantile(d8, method)).array, d8_expected[methodIndex], "d8", method);
933 compareResults(probs.map!(p => p.quantile(d8Float, method)).array, d8_expected[methodIndex], "d8Float", method);
934 compareResults(probs.map!(p => p.quantile(d9, method)).array, d9_expected[methodIndex], "d9", method);
935 compareResults(probs.map!(p => p.quantile(d9Float, method)).array, d9_expected[methodIndex], "d9Float", method);
936 compareResults(probs.map!(p => p.quantile(d10, method)).array, d10_expected[methodIndex], "d10", method);
937 compareResults(probs.map!(p => p.quantile(d10Float, method)).array, d10_expected[methodIndex], "d10Float", method);
938 compareResults(probs.map!(p => p.quantile(d11, method)).array, d11_expected[methodIndex], "d11", method);
939 compareResults(probs.map!(p => p.quantile(d12, method)).array, d12_expected[methodIndex], "d12", method);
940 compareResults(probs.map!(p => p.quantile(d13, method)).array, d13_expected[methodIndex], "d13", method);
941 }
942 }
943 /** Flag use by the nearEqual template. */
944 alias AreNaNsEqual = Flag!"areNaNsEqual";
945
946 /**
947 nearEqual checks if two floating point numbers are "near equal".
948
949 nearEqual is an alternative to the approxEqual and isClose functions in the Phobos
950 standard library. It should be regarded as experimental. Currently it is used only in
951 unit tests.
952
953 Default relative diff tolerance is small. Absolute diff tolerance is also small, but
954 non-zero. This means comparing number near zero to zero will considered near equal by
955 defaults.
956
957 Default tolerances will not suffice for float-to-double conversion. Use tolerances
958 based on float in these cases.
959 */
960 bool nearEqual(T, AreNaNsEqual naNsAreEqual = No.areNaNsEqual)
961 (T x, T y, T maxRelDiff = 4.0 * T.epsilon, T maxAbsDiff = T.epsilon)
962 if (isFloatingPoint!T)
963 {
964 import std.algorithm : max;
965 import std.math : abs, isNaN;
966
967 if (x == y) return true;
968
969 static if (naNsAreEqual) if (x.isNaN && y.isNaN) return true;
970
971 if (x.isNaN || y.isNaN) return false;
972
973 immutable absDiff = abs(x - y);
974
975 if (absDiff <= maxAbsDiff) return true;
976
977 immutable relDiff = absDiff / max(abs(x), abs(y));
978
979 return (relDiff <= maxRelDiff);
980 }
981
982 @safe unittest
983 {
984 enum float defaultMaxRelDiffFloat = 4 * float.epsilon;
985 enum double defaultMaxRelDiffDouble = 4 * double.epsilon;
986
987 // +0.0 and -0.0 are always equal, even with zero relative and absolute diffs.
988 assert(nearEqual(0.0f, 0.0f, 0.0f, 0.0f));
989 assert(nearEqual(-0.0f, +0.0f, 0.0f, 0.0f));
990 assert(nearEqual(0.0, 0.0, 0.0, 0.0));
991 assert(nearEqual(-0.0, +0.0, 0.0, 0.0));
992
993 // NaNs are equal or not depending on template parameters
994 assert(!nearEqual(float.nan, float.nan, 0.0, 0.0));
995 assert(!nearEqual(double.nan, double.nan, 0.0, 0.0));
996 assert(nearEqual!(float, Yes.areNaNsEqual)(float.nan, float.nan, 0.0, 0.0));
997 assert(nearEqual!(double, Yes.areNaNsEqual)(double.nan, double.nan, 0.0, 0.0));
998
999 // Infinity tests
1000 assert(nearEqual(float.infinity, float.infinity, 0.0f, 0.0f));
1001 assert(nearEqual(-float.infinity, -float.infinity, 0.0f, 0.0f));
1002 assert(!nearEqual(-float.infinity, float.infinity, 0.0f, 0.0f));
1003
1004 assert(nearEqual(double.infinity, double.infinity, 0.0f, 0.0f));
1005 assert(nearEqual(-double.infinity, -double.infinity, 0.0f, 0.0f));
1006 assert(!nearEqual(-double.infinity, double.infinity, 0.0f, 0.0f));
1007
1008 assert(!nearEqual(float.infinity, float.max, 0.0f, 0.0f));
1009 assert(!nearEqual(-float.infinity, -float.max, 0.0f, 0.0f));
1010 assert(!nearEqual(double.infinity, double.max, 0.0f, 0.0f));
1011 assert(!nearEqual(-double.infinity, -double.max, 0.0f, 0.0f));
1012
1013 // Near Zero tests
1014 assert(nearEqual(0.0f, float.min_normal));
1015 assert(nearEqual(0.0f, -float.min_normal));
1016 assert(nearEqual(0.0, double.min_normal));
1017 assert(nearEqual(0.0, -double.min_normal));
1018
1019 assert(!nearEqual(0.0f, float.min_normal, defaultMaxRelDiffFloat, 0.0f));
1020 assert(!nearEqual(0.0f, -float.min_normal, defaultMaxRelDiffFloat, 0.0f));
1021 assert(!nearEqual(0.0, double.min_normal, defaultMaxRelDiffDouble, 0.0));
1022 assert(!nearEqual(0.0, -double.min_normal, defaultMaxRelDiffDouble, 0.0));
1023
1024 // Tests with some different sizes
1025 assert(!nearEqual(1.11f, 1.110001f));
1026 assert( nearEqual(1.11f, 1.1100001f));
1027
1028 assert(!nearEqual(1.11, 1.11000000000001));
1029 assert( nearEqual(1.11, 1.110000000000001));
1030
1031 assert(!nearEqual(-1.11f, -1.110001f));
1032 assert( nearEqual(-1.11f, -1.1100001f));
1033
1034 assert(!nearEqual(-1.11, -1.11000000000001));
1035 assert( nearEqual(-1.11, -1.110000000000001));
1036
1037 assert(!nearEqual(3739.7f, 3739.69f));
1038 assert( nearEqual(3739.7f, 3739.699f));
1039
1040 assert(!nearEqual(3739.7, 3739.69999999999));
1041 assert( nearEqual(3739.7, 3739.699999999999));
1042
1043 assert(!nearEqual(732156983.0f, 732157367.0f)); // Delta 384
1044 assert( nearEqual(732156983.0f, 732157303.0f)); // Delta 320
1045
1046 assert(!nearEqual(732156983.0, 732156983.000001));
1047 assert( nearEqual(732156983.0, 732156983.0000001));
1048
1049 assert(!nearEqual(-0.0007370567f, -0.0007372567f));
1050 assert( nearEqual(-0.0007370567f, -0.0007371567f));
1051
1052 assert(!nearEqual(-0.0000007370567, -0.000000737056701));
1053 assert( nearEqual(-0.0000007370567, -0.0000007370567001));
1054
1055 // More near zero tests
1056 {
1057 float a = 1.0f;
1058 float b = 2.0f;
1059 float c = 3.0f;
1060 float x = (a + b) - c;
1061
1062 assert(nearEqual(x, 0.0f));
1063
1064 foreach (i; 0 .. 3)
1065 {
1066 // Divide by ten and repeat the comparisons.
1067 a /= 10.0f;
1068 b /= 10.0f;
1069 c /= 10.0f;
1070 x /= 10.0f;
1071
1072 float y = (a + b) - c;
1073
1074 assert(nearEqual(x, 0.0f));
1075 assert(nearEqual(y, 0.0f));
1076 assert(nearEqual(x, y));
1077 }
1078 }
1079 {
1080 double a = 1.0;
1081 double b = 2.0;
1082 double c = 3.0;
1083 double x = (a + b) - c;
1084
1085 assert(nearEqual(x, 0.0));
1086
1087 foreach (i; 0 .. 3)
1088 {
1089 // Divide by ten and repeat the comparisons.
1090 a /= 10.0;
1091 b /= 10.0;
1092 c /= 10.0;
1093 x /= 10.0;
1094
1095 double y = (a + b) - c;
1096
1097 assert(nearEqual(x, 0.0));
1098 assert(nearEqual(y, 0.0));
1099 assert(nearEqual(x, y));
1100 }
1101 }
1102
1103 // Double to double to float round trip. Use float tolerances.
1104 // Note: Don't use immutables. Compiler will preserve doubles.
1105 {
1106 import std.conv : to;
1107
1108 double q = 31.79;
1109 float rf = q;
1110 double r = rf;
1111
1112 assert(!nearEqual(q, r));
1113 assert(nearEqual(q, r, defaultMaxRelDiffFloat.to!double));
1114 }
1115 }