tsv_utils.tsv_pretty source code

1 /**
2 Command line tool that prints TSV data aligned for easier reading on consoles
3 and traditional command-line environments.
4 
5 Copyright (c) 2017-2020, eBay Inc.
6 Initially written by Jon Degenhardt
7 
8 License: Boost License 1.0 (http://boost.org/LICENSE_1_0.txt)
9 */
10 module tsv_utils.tsv_pretty;
11 
12 import std.range;
13 import std.stdio;
14 import std.typecons : Flag, Yes, No, tuple;
15 
16 static if (__VERSION__ >= 2085) extern(C) __gshared string[] rt_options = [ "gcopt=cleanup:none" ];
17 
18 version(unittest)
19 {
20     // When running unit tests, use main from -main compiler switch.
21 }
22 else
23 {
24     /** Main program. Invokes command line arg processing and tsv-pretty to perform
25      * the real work. Any errors are caught and reported.
26      */
27     int main(string[] cmdArgs)
28     {
29         /* When running in DMD code coverage mode, turn on report merging. */
30         version(D_Coverage) version(DigitalMars)
31         {
32             import core.runtime : dmd_coverSetMerge;
33             dmd_coverSetMerge(true);
34         }
35 
36         TsvPrettyOptions options;
37         auto r = options.processArgs(cmdArgs);
38         if (!r[0]) return r[1];
39         try tsvPretty(options, cmdArgs[1 .. $]);
40         catch (Exception exc)
41         {
42             stderr.writefln("Error [%s]: %s", options.programName, exc.msg);
43             return 1;
44         }
45         return 0;
46     }
47 }
48 
49 auto helpTextVerbose = q"EOS
50 Synopsis: tsv-pretty [options] [file...]
51 
52 tsv-pretty outputs TSV data in a format intended to be more human readable when
53 working on the command line. This is done primarily by lining up data into
54 fixed-width columns. Text is left aligned, numbers are right aligned. Floating
55 points numbers are aligned on the decimal point when feasible.
56 
57 Processing begins by reading the initial set of lines into memory to determine
58 the field widths and data types of each column. This look-ahead buffer is used
59 for header detection as well. Output begins after this processing is complete.
60 
61 By default, only the alignment is changed, the actual values are not modified.
62 Several of the formatting options do modify the values.
63 
64 Features:
65 
66 * Floating point numbers: Floats can be printed in fixed-width precision, using
67   the same precision for all floats in a column. This makes then line up nicely.
68   Precision is determined by values seen during look-ahead processing. The max
69   precision defaults to 9, this can be changed when smaller or larger values are
70   desired. See the '--f|format-floats' and '--p|precision' options.
71 
72 * Header lines: Headers are detected automatically when possible. This can be
73   overridden when automatic detection doesn't work as desired. Headers can be
74   underlined and repeated at regular intervals.
75 
76 * Missing values: A substitute value can be used for empty fields. This is often
77   less confusing than spaces. See '--e|replace-empty' and '--E|empty-replacement'.
78 
79 * Exponential notion: As part float formatting, '--f|format-floats' re-formats
80   columns where exponential notation is found so all the values in the column
81   are displayed using exponential notation with the same precision.
82 
83 * Preamble: A number of initial lines can be designated as a preamble and output
84   unchanged. The preamble is before the header, if a header is present. Preamble
85   lines can be auto-detected via the heuristic that they lack field delimiters.
86   This works well when the field delimiter is a TAB.
87 
88 * Fonts: Fixed-width fonts are assumed. CJK characters are assumed to be double
89   width. This is not always correct, but works well in most cases.
90 
91 Options:
92 EOS";
93 
94 auto helpText = q"EOS
95 Synopsis: tsv-pretty [options] [file...]
96 
97 tsv-pretty outputs TSV data in a more human readable format. This is done by lining
98 up data into fixed-width columns. Text is left aligned, numbers are right aligned.
99 Floating points numbers are aligned on the decimal point when feasible.
100 
101 Options:
102 EOS";
103 
104 /** TsvPrettyOptions is used to process and store command line options. */
105 struct TsvPrettyOptions
106 {
107     string programName;
108     bool helpVerbose = false;           // --help-verbose
109     bool hasHeader = false;             // --H|header (Note: Default false assumed by validation code)
110     bool autoDetectHeader = true;       // Derived (Note: Default true assumed by validation code)
111     bool noHeader = false;              // --x|no-header (Note: Default false assumed by validation code)
112     size_t lookahead = 1000;            // --l|lookahead
113     size_t repeatHeader = 0;            // --r|repeat-header num (zero means no repeat)
114     bool underlineHeader = false;       // --u|underline-header
115     bool formatFloats = false;          // --f|format-floats
116     size_t floatPrecision = 9;          // --p|precision num (max precision when formatting floats.)
117     bool replaceEmpty = false;          // --e|replace-empty
118     string emptyReplacement = "";       // --E|empty-replacement
119     size_t emptyReplacementPrintWidth = 0;    // Derived
120     char delim = '\t';                  // --d|delimiter
121     size_t spaceBetweenFields = 2;      // --s|space-between-fields num
122     size_t maxFieldPrintWidth = 40;     // --m|max-text-width num; Max width for variable width text fields.
123     bool autoDetectPreamble = false;    // --a|auto-preamble
124     size_t preambleLines = 0;           // --b|preamble; Number of preamble lines.
125     bool versionWanted = false;         // --V|version
126 
127     /* Returns a tuple. First value is true if command line arguments were successfully
128      * processed and execution should continue, or false if an error occurred or the user
129      * asked for help. If false, the second value is the appropriate exit code (0 or 1).
130      *
131      * Returning true (execution continues) means args have been validated and derived
132      * values calculated. In addition, field indices have been converted to zero-based.
133      * If the whole line is the key, the individual fields list will be cleared.
134      */
135     auto processArgs (ref string[] cmdArgs)
136     {
137         import std.algorithm : any, each;
138         import std.getopt;
139         import std.path : baseName, stripExtension;
140 
141         programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name";
142 
143         try
144         {
145             arraySep = ",";    // Use comma to separate values in command line options
146             auto r = getopt(
147                 cmdArgs,
148                 "help-verbose",           "       Print full help.", &helpVerbose,
149                 std.getopt.config.caseSensitive,
150                 "H|header",               "       Treat the first line of each file as a header.", &hasHeader,
151                 std.getopt.config.caseInsensitive,
152                 "x|no-header",            "       Assume no header. Turns off automatic header detection.", &noHeader,
153                 "l|lookahead",            "NUM    Lines to read to interpret data before generating output. Default: 1000", &lookahead,
154 
155                 "r|repeat-header",        "NUM    Lines to print before repeating the header. Default: No repeating header", &repeatHeader,
156 
157                 "u|underline-header",     "       Underline the header.", &underlineHeader,
158                 "f|format-floats",        "       Format floats for better readability. Default: No", &formatFloats,
159                 "p|precision",            "NUM    Max floating point precision. Implies --format-floats. Default: 9", &floatPrecisionOptionHandler,
160                 std.getopt.config.caseSensitive,
161                 "e|replace-empty",        "       Replace empty fields with '--'.", &replaceEmpty,
162                 "E|empty-replacement",    "STR    Replace empty fields with a string.", &emptyReplacement,
163                 std.getopt.config.caseInsensitive,
164                 "d|delimiter",            "CHR    Field delimiter. Default: TAB. (Single byte UTF-8 characters only.)", &delim,
165                 "s|space-between-fields", "NUM    Spaces between each field (Default: 2)", &spaceBetweenFields,
166                 "m|max-text-width",       "NUM    Max reserved field width for variable width text fields. Default: 40", &maxFieldPrintWidth,
167                 "a|auto-preamble",        "       Treat initial lines in a file as a preamble if the line contains no field delimiters.", &autoDetectPreamble,
168                 "b|preamble",             "NUM    Treat the first NUM lines as a preamble and output them unchanged.", &preambleLines,
169                 std.getopt.config.caseSensitive,
170                 "V|version",              "       Print version information and exit.", &versionWanted,
171                 std.getopt.config.caseInsensitive,
172                 );
173 
174             if (r.helpWanted)
175             {
176                 defaultGetoptPrinter(helpText, r.options);
177                 return tuple(false, 0);
178             }
179             else if (helpVerbose)
180             {
181                 defaultGetoptPrinter(helpTextVerbose, r.options);
182                 return tuple(false, 0);
183             }
184             else if (versionWanted)
185             {
186                 import tsv_utils.common.tsvutils_version;
187                 writeln(tsvutilsVersionNotice("tsv-pretty"));
188                 return tuple(false, 0);
189             }
190 
191             /* Validation and derivations. */
192             if (noHeader && hasHeader) throw new Exception("Cannot specify both --H|header and --x|no-header.");
193 
194             if (noHeader || hasHeader) autoDetectHeader = false;
195 
196             /* Zero look-ahead has limited utility unless the first line is known to
197              * be a header. Good chance the user will get an unintended behavior.
198              */
199             if (lookahead == 0 && autoDetectHeader)
200             {
201                 assert (!noHeader && !hasHeader);
202                 throw new Exception("Cannot auto-detect header with zero look-ahead. Specify either '--H|header' or '--x|no-header' when using '--l|lookahead 0'.");
203             }
204 
205             if (autoDetectPreamble && preambleLines != 0)
206             {
207                 throw new Exception("Do not use '--b|preamble NUM' and '--a|auto-preamble' together. ('--b|preamble 0' is okay.)");
208             }
209 
210             if (emptyReplacement.length != 0) replaceEmpty = true;
211             else if (replaceEmpty) emptyReplacement = "--";
212 
213             if (emptyReplacement.length != 0)
214             {
215                 emptyReplacementPrintWidth = emptyReplacement.monospacePrintWidth;
216             }
217         }
218         catch (Exception exc)
219         {
220             stderr.writefln("[%s] Error processing command line arguments: %s", programName, exc.msg);
221             return tuple(false, 1);
222         }
223         return tuple(true, 0);
224     }
225 
226     /* Option handler for --p|precision. It also sets --f|format-floats. */
227     private void floatPrecisionOptionHandler(string option, string optionVal) @safe pure
228     {
229         import std.conv : to;
230         floatPrecision = optionVal.to!size_t;
231         formatFloats = true;
232     }
233 }
234 
235 /** tsvPretty is the main loop, operating on input files and passing control to a
236  * TSVPrettyProccessor instance.
237  *
238  * This separates physical I/O sources and sinks from the underlying processing
239  * algorithm, which operates on generic ranges. A lockingTextWriter is created and
240  * released on every input line. This has effect flushing standard output every line,
241  * desirable in command line tools.
242  *
243  * This routine also handles identification of preamble lines. This is mostly for
244  * simplification of the TsvPrettyProcessor code.
245  */
246 void tsvPretty(const ref TsvPrettyOptions options, const string[] files)
247 {
248     import std.algorithm : canFind;
249 
250     auto firstNonPreambleLine = options.preambleLines + 1;
251     auto tpp = TsvPrettyProcessor(options);
252     foreach (filename; (files.length > 0) ? files : ["-"])
253     {
254         bool autoDetectPreambleDone = false;
255         auto inputStream = (filename == "-") ? stdin : filename.File();
256         foreach (lineNum, line; inputStream.byLine.enumerate(1))
257         {
258             bool isPreambleLine = false;
259             bool isFirstNonPreambleLine = false;
260 
261             if (options.autoDetectPreamble)
262             {
263                 if (!autoDetectPreambleDone)
264                 {
265                     if (line.canFind(options.delim))
266                     {
267                         autoDetectPreambleDone = true;
268                         isFirstNonPreambleLine = true;
269                     }
270                     else
271                     {
272                         isPreambleLine = true;
273                     }
274                 }
275             }
276             else if (lineNum < firstNonPreambleLine)
277             {
278                 isPreambleLine = true;
279             }
280             else if (lineNum == firstNonPreambleLine)
281             {
282                 isFirstNonPreambleLine = true;
283             }
284 
285 
286             if (isPreambleLine)
287             {
288                 tpp.processPreambleLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
289             }
290             else if (isFirstNonPreambleLine)
291             {
292                 tpp.processFileFirstLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
293             }
294             else
295             {
296                 tpp.processLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
297             }
298         }
299     }
300     tpp.finish(outputRangeObject!(char, char[])(stdout.lockingTextWriter));
301 }
302 
303 /** TsvPrettyProcessor maintains state of processing and exposes operations for
304  * processing individual input lines.
305  *
306  * TsvPrettyProcessor knows that input is file-based, but doesn't deal with actual
307  * files or reading lines from input. That is the job of the caller. Output is
308  * written to an output range. The caller is expected to pass each line to in the
309  * order received, that is an assumption built-into the its processing.
310  *
311  * In addition to the constructor, there are four API methods:
312  *  - processPreambleLine - Called to process a preamble line occurring before
313  *    the header line or first line of data.
314  *  - processFileFirstLine - Called to process the first line of each file. This
315  *    enables header processing.
316  *  - processLine - Called to process all lines except for the first line a file.
317  *  - finish - Called at the end of all processing. This is needed in case the
318  *    look-ahead cache is still being filled when input terminates.
319  */
320 
321 struct TsvPrettyProcessor
322 {
323     import std.array : appender;
324 
325 private:
326     private enum AutoDetectHeaderResult { none, hasHeader, noHeader };
327 
328     private TsvPrettyOptions _options;
329     private size_t _fileCount = 0;
330     private size_t _dataLineOutputCount = 0;
331     private bool _stillCaching = true;
332     private string _candidateHeaderLine;
333     private auto _lookaheadCache = appender!(string[])();
334     private FieldFormat[] _fieldVector;
335     private AutoDetectHeaderResult _autoDetectHeaderResult = AutoDetectHeaderResult.none;
336 
337     /** Constructor. */
338     this(const TsvPrettyOptions options) @safe pure nothrow @nogc
339     {
340         _options = options;
341         if (options.noHeader && options.lookahead == 0) _stillCaching = false;
342     }
343 
344     invariant
345     {
346         assert(_options.hasHeader || _options.noHeader || _options.autoDetectHeader);
347         assert((_options.lookahead == 0 && _lookaheadCache.data.length == 0) ||
348                _lookaheadCache.data.length < _options.lookahead);
349     }
350 
351     /** Called to process a preamble line occurring before the header line or first
352      * line of data.
353      */
354     void processPreambleLine(OutputRange!char outputStream, const char[] line)
355     {
356         if (_fileCount == 0)
357         {
358             put(outputStream, line);
359             put(outputStream, '\n');
360         }
361     }
362 
363     /** Called to process the first line of each file. This enables header processing. */
364     void processFileFirstLine(OutputRange!char outputStream, const char[] line)
365     {
366         import std.conv : to;
367 
368         _fileCount++;
369 
370         if (_options.noHeader)
371         {
372             processLine(outputStream, line);
373         }
374         else if (_options.hasHeader)
375         {
376             if (_fileCount == 1)
377             {
378                 setHeaderLine(line);
379                 if (_options.lookahead == 0) outputLookaheadCache(outputStream);
380             }
381         }
382         else
383         {
384             assert(_options.autoDetectHeader);
385 
386             final switch (_autoDetectHeaderResult)
387             {
388             case AutoDetectHeaderResult.noHeader:
389                 assert(_fileCount > 1);
390                 processLine(outputStream, line);
391                 break;
392 
393             case AutoDetectHeaderResult.hasHeader:
394                 assert(_fileCount > 1);
395                 break;
396 
397             case AutoDetectHeaderResult.none:
398                 if (_fileCount == 1)
399                 {
400                     assert(_candidateHeaderLine.length == 0);
401                     _candidateHeaderLine = line.to!string;
402                 }
403                 else if (_fileCount == 2)
404                 {
405                     if (_candidateHeaderLine == line)
406                     {
407                         _autoDetectHeaderResult = AutoDetectHeaderResult.hasHeader;
408                         setHeaderLine(_candidateHeaderLine);
409 
410                         /* Edge case: First file has only a header line and look-ahead set to zero. */
411                         if (_stillCaching && _options.lookahead == 0) outputLookaheadCache(outputStream);
412                     }
413                     else
414                     {
415                         _autoDetectHeaderResult = AutoDetectHeaderResult.noHeader;
416                         updateFieldFormatsForLine(_candidateHeaderLine);
417                         processLine(outputStream, line);
418                     }
419                 }
420                 break;
421             }
422         }
423     }
424 
425     /** Called to process all lines except for the first line a file. */
426     void processLine(OutputRange!char outputStream, const char[] line)
427     {
428         if (_stillCaching) cacheDataLine(outputStream, line);
429         else outputDataLine(outputStream, line);
430     }
431 
432     /** Called at the end of all processing. This is needed in case the look-ahead cache
433      * is still being filled when input terminates.
434      */
435     void finish(OutputRange!char outputStream)
436     {
437         if (_stillCaching) outputLookaheadCache(outputStream);
438     }
439 
440 private:
441     /* outputLookaheadCache finalizes processing of the lookahead cache. This includes
442      * Setting the type and width of each field, finalizing the auto-detect header
443      * decision, and outputing all lines in the cache.
444      */
445     void outputLookaheadCache(OutputRange!char outputStream)
446     {
447         import std.algorithm : splitter;
448 
449         assert(_stillCaching);
450 
451         if (_options.autoDetectHeader &&
452             _autoDetectHeaderResult == AutoDetectHeaderResult.none &&
453             _candidateHeaderLine.length != 0)
454         {
455             if (candidateHeaderLooksLikeHeader())
456             {
457                 _autoDetectHeaderResult = AutoDetectHeaderResult.hasHeader;
458                 setHeaderLine(_candidateHeaderLine);
459             }
460             else
461             {
462                 _autoDetectHeaderResult = AutoDetectHeaderResult.noHeader;
463             }
464         }
465 
466 
467         if (_options.hasHeader ||
468             (_options.autoDetectHeader && _autoDetectHeaderResult == AutoDetectHeaderResult.hasHeader))
469         {
470             finalizeFieldFormatting();
471             outputHeader(outputStream);
472         }
473         else if (_options.autoDetectHeader && _autoDetectHeaderResult == AutoDetectHeaderResult.noHeader &&
474                  _candidateHeaderLine.length != 0)
475         {
476             updateFieldFormatsForLine(_candidateHeaderLine);
477             finalizeFieldFormatting();
478             outputDataLine(outputStream, _candidateHeaderLine);
479         }
480         else
481         {
482             finalizeFieldFormatting();
483         }
484 
485         foreach(line; _lookaheadCache.data) outputDataLine(outputStream, line);
486         _lookaheadCache.clear;
487         _stillCaching = false;
488     }
489 
490     bool candidateHeaderLooksLikeHeader() @safe
491     {
492         import std.algorithm : splitter;
493 
494         /* The candidate header is declared as the header if the look-ahead cache has at least
495          * one numeric field that is text in the candidate header.
496          */
497         foreach(fieldIndex, fieldValue; _candidateHeaderLine.splitter(_options.delim).enumerate)
498         {
499             auto candidateFieldFormat = FieldFormat(fieldIndex);
500             candidateFieldFormat.updateForFieldValue(fieldValue, _options);
501             if (_fieldVector.length > fieldIndex &&
502                 candidateFieldFormat.fieldType == FieldType.text &&
503                 (_fieldVector[fieldIndex].fieldType == FieldType.integer ||
504                  _fieldVector[fieldIndex].fieldType == FieldType.floatingPoint ||
505                  _fieldVector[fieldIndex].fieldType == FieldType.exponent))
506             {
507                 return true;
508             }
509         }
510 
511         return false;
512     }
513 
514     void setHeaderLine(const char[] line) @safe
515     {
516         import std.algorithm : splitter;
517 
518         foreach(fieldIndex, header; line.splitter(_options.delim).enumerate)
519         {
520             if (_fieldVector.length == fieldIndex) _fieldVector ~= FieldFormat(fieldIndex);
521             assert(_fieldVector.length > fieldIndex);
522             _fieldVector[fieldIndex].setHeader(header);
523         }
524     }
525 
526     void cacheDataLine(OutputRange!char outputStream, const char[] line)
527     {
528         import std.conv : to;
529 
530         assert(_lookaheadCache.data.length < _options.lookahead);
531 
532         _lookaheadCache ~= line.to!string;
533         updateFieldFormatsForLine(line);
534         if (_lookaheadCache.data.length == _options.lookahead) outputLookaheadCache(outputStream);
535     }
536 
537     void updateFieldFormatsForLine(const char[] line) @safe
538     {
539         import std.algorithm : splitter;
540 
541         foreach(fieldIndex, fieldValue; line.splitter(_options.delim).enumerate)
542         {
543             if (_fieldVector.length == fieldIndex) _fieldVector ~= FieldFormat(fieldIndex);
544             assert(_fieldVector.length > fieldIndex);
545             _fieldVector[fieldIndex].updateForFieldValue(fieldValue, _options);
546         }
547 
548     }
549 
550     void finalizeFieldFormatting() @safe pure @nogc nothrow
551     {
552         size_t nextFieldStart = 0;
553         foreach(ref field; _fieldVector)
554         {
555             nextFieldStart = field.finalizeFormatting(nextFieldStart, _options) + _options.spaceBetweenFields;
556         }
557     }
558 
559     void outputHeader(OutputRange!char outputStream)
560     {
561         size_t nextOutputPosition = 0;
562         foreach(fieldIndex, ref field; _fieldVector.enumerate)
563         {
564             size_t spacesNeeded = field.startPosition - nextOutputPosition;
565             put(outputStream, repeat(" ", spacesNeeded));
566             nextOutputPosition += spacesNeeded;
567             nextOutputPosition += field.writeHeader(outputStream, _options);
568         }
569         put(outputStream, '\n');
570 
571         if (_options.underlineHeader)
572         {
573             nextOutputPosition = 0;
574             foreach(fieldIndex, ref field; _fieldVector.enumerate)
575             {
576                 size_t spacesNeeded = field.startPosition - nextOutputPosition;
577                 put(outputStream, repeat(" ", spacesNeeded));
578                 nextOutputPosition += spacesNeeded;
579                 nextOutputPosition += field.writeHeader!(Yes.writeUnderline)(outputStream, _options);
580             }
581             put(outputStream, '\n');
582         }
583     }
584 
585     void outputDataLine(OutputRange!char outputStream, const char[] line)
586     {
587         import std.algorithm : splitter;
588 
589         /* Repeating header option. */
590         if (_options.repeatHeader != 0 && _dataLineOutputCount != 0 &&
591             (_options.hasHeader || (_options.autoDetectHeader &&
592                                     _autoDetectHeaderResult == AutoDetectHeaderResult.hasHeader)) &&
593             _dataLineOutputCount % _options.repeatHeader == 0)
594         {
595             put(outputStream, '\n');
596             outputHeader(outputStream);
597         }
598 
599         _dataLineOutputCount++;
600 
601         size_t nextOutputPosition = 0;
602         foreach(fieldIndex, fieldValue; line.splitter(_options.delim).enumerate)
603         {
604             if (fieldIndex == _fieldVector.length)
605             {
606                 /* Line is longer than any seen while caching. Add a new FieldFormat entry
607                  * and set the line formatting based on this field value.
608                  */
609                 _fieldVector ~= FieldFormat(fieldIndex);
610                 size_t startPosition = (fieldIndex == 0) ?
611                     0 :
612                     _fieldVector[fieldIndex - 1].endPosition + _options.spaceBetweenFields;
613 
614                 _fieldVector[fieldIndex].updateForFieldValue(fieldValue, _options);
615                 _fieldVector[fieldIndex].finalizeFormatting(startPosition, _options);
616             }
617 
618             assert(fieldIndex < _fieldVector.length);
619 
620             FieldFormat fieldFormat = _fieldVector[fieldIndex];
621             size_t nextFieldStart = fieldFormat.startPosition;
622             size_t spacesNeeded = (nextOutputPosition < nextFieldStart) ?
623                 nextFieldStart - nextOutputPosition :
624                 (fieldIndex == 0) ? 0 : 1;  // Previous field went long. One space between fields
625 
626             put(outputStream, repeat(" ", spacesNeeded));
627             nextOutputPosition += spacesNeeded;
628             nextOutputPosition += fieldFormat.writeFieldValue(outputStream, nextOutputPosition, fieldValue, _options);
629         }
630         put(outputStream, '\n');
631     }
632 }
633 
634 /** Field types recognized and tracked by tsv-pretty processing. */
635 enum FieldType { unknown, text, integer, floatingPoint, exponent };
636 
637 /** Field alignments used by tsv-pretty processing. */
638 enum FieldAlignment { left, right };
639 
640 /** FieldFormat holds all the formatting info needed to format data values in a specific
641  * column. e.g. Field 1 may be text, field 2 may be a float, etc. This is calculated
642  * during the caching phase. Each FieldFormat instance is part of a vector representing
643  * the full row, so each includes the start position on the line and similar data.
644  *
645  * APIs used during the caching phase to gather field value samples
646  *  - this - Initial construction. Takes the field index.
647  *  - setHeader - Used to set the header text.
648  *  - updateForFieldValue - Used to add the next field value sample.
649  *  - finalizeFormatting - Used at the end of caching to finalize the format choices.
650  *
651  * APIs used after caching is finished (after finalizeFormatting):
652  *  - startPosition - Returns the expected start position for the field.
653  *  - endPosition - Returns the expected end position for the field.
654  *  - writeHeader - Outputs the header, properly aligned.
655  *  - writeFieldValue - Outputs the current field value, properly aligned.
656  */
657 
658 struct FieldFormat
659 {
660 private:
661     size_t _fieldIndex;                  // Zero-based index in the line
662     string _header = "";                 // Original field header
663     size_t _headerPrintWidth = 0;
664     FieldType _type = FieldType.unknown;
665     FieldAlignment _alignment = FieldAlignment.left;
666     size_t _startPosition = 0;
667     size_t _printWidth = 0;
668     size_t _precision = 0;          // Number of digits after the decimal point
669 
670     /* These are used while doing initial type and print format detection. */
671     size_t _minRawPrintWidth = 0;
672     size_t _maxRawPrintWidth = 0;
673     size_t _maxDigitsBeforeDecimal = 0;
674     size_t _maxDigitsAfterDecimal = 0;
675     size_t _maxSignificantDigits = 0;  // Digits to include in exponential notation
676 
677 public:
678 
679     /** Initial construction. Takes a field index. */
680     this(size_t fieldIndex) @safe pure nothrow @nogc
681     {
682         _fieldIndex = fieldIndex;
683     }
684 
685     /** Sets the header text. */
686     void setHeader(const char[] header) @safe
687     {
688         import std.conv : to;
689 
690         _header = header.to!string;
691         _headerPrintWidth = _header.monospacePrintWidth;
692     }
693 
694     /** Returns the expected start position for the field. */
695     size_t startPosition() nothrow pure @safe @property
696     {
697         return _startPosition;
698     }
699 
700     /** Returns the expected end position for the field. */
701     size_t endPosition() nothrow pure @safe @property
702     {
703         return _startPosition + _printWidth;
704     }
705 
706     /** Returns the type of field. */
707     FieldType fieldType() nothrow pure @safe @property
708     {
709         return _type;
710     }
711 
712     /** Writes the field header or underline characters to the output stream.
713      *
714      * The current output position should have been written up to the field's start position,
715      * including any spaces between fields. Unlike data fields, there is no need to correct
716      * for previous fields that have run long. This routine does not output trailing spaces.
717      * This makes it simpler for lines to avoid unnecessary trailing spaces.
718      *
719      * Underlines can either be written the full width of the field or the just under the
720      * text of the header. At present this is a template parameter (compile-time).
721      *
722      * The print width of the output is returned.
723      */
724     size_t writeHeader (Flag!"writeUnderline" writeUnderline = No.writeUnderline,
725                         Flag!"fullWidthUnderline" fullWidthUnderline = No.fullWidthUnderline)
726         (OutputRange!char outputStream, const ref TsvPrettyOptions options)
727     {
728         import std.range : repeat;
729 
730         size_t positionsWritten = 0;
731         if (_headerPrintWidth > 0)
732         {
733             static if (writeUnderline)
734             {
735                 static if (fullWidthUnderline)
736                 {
737                     put(outputStream, repeat("-", _printWidth));
738                     positionsWritten += _printWidth;
739                 }
740                 else  // Underline beneath the header text only
741                 {
742                     if (_alignment == FieldAlignment.right)
743                     {
744                         put(outputStream, repeat(" ", _printWidth - _headerPrintWidth));
745                         positionsWritten += _printWidth - _headerPrintWidth;
746                     }
747                     put(outputStream, repeat("-", _headerPrintWidth));
748                     positionsWritten += _headerPrintWidth;
749                 }
750             }
751             else
752             {
753                 if (_alignment == FieldAlignment.right)
754                 {
755                     put(outputStream, repeat(" ", _printWidth - _headerPrintWidth));
756                     positionsWritten += _printWidth - _headerPrintWidth;
757                 }
758                 put(outputStream, _header);
759                 positionsWritten += _headerPrintWidth;
760             }
761         }
762         return positionsWritten;
763     }
764 
765     /** Writes the field value for the current column.
766      *
767      * The caller needs to generate output at least to the column's start position, but
768      * can go beyond if previous fields have run long.
769      *
770      * The field value is aligned properly in the field. Either left aligned (text) or
771      * right aligned (numeric). Floating point fields are both right aligned and
772      * decimal point aligned. The number of bytes written is returned. Trailing spaces
773      * are not added, the caller must add any necessary trailing spaces prior to
774      * printing the next field.
775      */
776     size_t writeFieldValue(OutputRange!char outputStream, size_t currPosition,
777                            const char[] fieldValue, in ref TsvPrettyOptions options)
778     in
779     {
780         assert(currPosition >= _startPosition);   // Caller resposible for advancing to field start position.
781         assert(_type == FieldType.text || _type == FieldType.integer ||
782                _type == FieldType.floatingPoint || _type == FieldType.exponent);
783     }
784     do
785     {
786         import std.algorithm : find, max, min;
787         import std.conv : to, ConvException;
788         import std.format : format;
789 
790         /* Create the print version of the string. Either the raw value or a formatted
791          * version of a float.
792          */
793         string printValue;
794         if (!options.formatFloats || _type == FieldType.text || _type == FieldType.integer)
795         {
796             printValue = fieldValue.to!string;
797         }
798         else
799         {
800             assert(options.formatFloats);
801             assert(_type == FieldType.exponent || _type == FieldType.floatingPoint);
802 
803             if (_type == FieldType.exponent)
804             {
805                 printValue = fieldValue.formatExponentValue(_precision);
806             }
807             else
808             {
809                 printValue = fieldValue.formatFloatingPointValue(_precision);
810             }
811         }
812 
813         if (printValue.length == 0 && options.replaceEmpty) printValue = options.emptyReplacement;
814         size_t printValuePrintWidth = printValue.monospacePrintWidth;
815 
816         /* Calculate leading spaces needed for right alignment. */
817         size_t leadingSpaces = 0;
818         if (_alignment == FieldAlignment.right)
819         {
820             /* Target width adjusts the column width to account for overrun by the previous field. */
821             size_t targetWidth;
822             if (currPosition == _startPosition)
823             {
824                 targetWidth = _printWidth;
825             }
826             else
827             {
828                 size_t startGap = currPosition - _startPosition;
829                 targetWidth = max(printValuePrintWidth,
830                                   startGap < _printWidth ? _printWidth - startGap : 0);
831             }
832 
833             leadingSpaces = (printValuePrintWidth < targetWidth) ?
834                 targetWidth - printValuePrintWidth : 0;
835 
836             /* The above calculation assumes the print value is fully right aligned.
837              * This is not correct when raw value floats are being used rather than
838              * formatted floats, as different values will have different precision.
839              * The next adjustment accounts for this, dropping leading spaces as
840              * needed to align the decimal point. Note that text and exponential
841              * values get aligned strictly against right boundaries.
842              */
843             if (leadingSpaces > 0 && _precision > 0 &&
844                 _type == FieldType.floatingPoint && !options.formatFloats)
845             {
846                 import std.algorithm : canFind, findSplit;
847                 import std.string : isNumeric;
848 
849                 if (printValue.isNumeric && !printValue.canFind!(x => x == 'e' || x == 'E'))
850                 {
851                     size_t decimalAndDigitsLength = printValue.find(".").length;
852                     size_t trailingSpaces =
853                         (decimalAndDigitsLength == 0) ? _precision + 1 :
854                         (decimalAndDigitsLength > _precision) ? 0 :
855                         _precision + 1 - decimalAndDigitsLength;
856 
857                     leadingSpaces = (leadingSpaces > trailingSpaces) ?
858                         leadingSpaces - trailingSpaces : 0;
859                 }
860             }
861         }
862         put(outputStream, repeat(' ', leadingSpaces));
863         put(outputStream, printValue);
864         return printValuePrintWidth + leadingSpaces;
865     }
866 
867     /** Updates type and format given a new field value.
868      *
869      * This is called during look-ahead caching to register a new sample value for the
870      * column. The key components updates are field type and print width.
871      */
872     void updateForFieldValue(const char[] fieldValue, const ref TsvPrettyOptions options) @safe
873     {
874         import std.algorithm : findAmong, findSplit, max, min;
875         import std.conv : to, ConvException;
876         import std.string : isNumeric;
877 
878         size_t fieldValuePrintWidth = fieldValue.monospacePrintWidth;
879         size_t fieldValuePrintWidthWithEmpty =
880             (fieldValuePrintWidth == 0 && options.replaceEmpty) ?
881             options.emptyReplacementPrintWidth :
882             fieldValuePrintWidth;
883 
884         _maxRawPrintWidth = max(_maxRawPrintWidth, fieldValuePrintWidthWithEmpty);
885         _minRawPrintWidth = (_minRawPrintWidth == 0) ?
886             fieldValuePrintWidthWithEmpty :
887             min(_minRawPrintWidth, fieldValuePrintWidthWithEmpty);
888 
889         if (_type == FieldType.text)
890         {
891             /* Already text, can't become anything else. */
892         }
893         else if (fieldValuePrintWidth == 0)
894         {
895             /* Don't let an empty field override a numeric field type. */
896         }
897         else if (!fieldValue.isNumeric)
898         {
899             /* Not parsable as a number. Switch from unknown or numeric type to text. */
900             _type = FieldType.text;
901         }
902         else
903         {
904             /* Field type is currently unknown or numeric, and current field parses as numeric.
905              * See if it parses as integer or float. Integers will parse as floats, so try
906              * integer types first.
907              */
908             FieldType parsesAs = FieldType.unknown;
909             long longValue;
910             ulong ulongValue;
911             double doubleValue;
912             try
913             {
914                 longValue = fieldValue.to!long;
915                 parsesAs = FieldType.integer;
916             }
917             catch (ConvException)
918             {
919                 try
920                 {
921                     ulongValue = fieldValue.to!ulong;
922                     parsesAs = FieldType.integer;
923                 }
924                 catch (ConvException)
925                 {
926                     try
927                     {
928                         doubleValue = fieldValue.to!double;
929                         import std.algorithm : findAmong;
930                         parsesAs = (fieldValue.findAmong("eE").length == 0) ?
931                             FieldType.floatingPoint : FieldType.exponent;
932                     }
933                     catch (ConvException)
934                     {
935                         /* Note: This means isNumeric thinks it's a number, but conversions all failed. */
936                         parsesAs = FieldType.text;
937                     }
938                 }
939             }
940 
941             if (parsesAs == FieldType.text)
942             {
943                 /* Not parsable as a number (despite isNumeric result). Switch to text type. */
944                 _type = FieldType.text;
945             }
946             else if (parsesAs == FieldType.exponent)
947             {
948                 /* Exponential notion supersedes both vanilla floats and integers. */
949                 _type = FieldType.exponent;
950                 _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
951 
952                 if (auto decimalSplit = fieldValue.findSplit("."))
953                 {
954                     auto fromExponent = decimalSplit[2].findAmong("eE");
955                     size_t numDigitsAfterDecimal = decimalSplit[2].length - fromExponent.length;
956                     _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, decimalSplit[0].length);
957                     _maxDigitsAfterDecimal = max(_maxDigitsAfterDecimal, numDigitsAfterDecimal);
958                 }
959                 else
960                 {
961                     /* Exponent without a decimal point. */
962                     auto fromExponent = fieldValue.findAmong("eE");
963                     assert(fromExponent.length > 0);
964                     size_t numDigits = fieldValue.length - fromExponent.length;
965                     _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, numDigits);
966                 }
967             }
968             else if (parsesAs == FieldType.floatingPoint)
969             {
970                 /* Floating point supercedes integer but not exponential. */
971                 if (_type != FieldType.exponent) _type = FieldType.floatingPoint;
972                 _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
973 
974                 if (auto decimalSplit = fieldValue.findSplit("."))
975                 {
976                     _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, decimalSplit[0].length);
977                     _maxDigitsAfterDecimal = max(_maxDigitsAfterDecimal, decimalSplit[2].length);
978                 }
979             }
980             else
981             {
982                 assert(parsesAs == FieldType.integer);
983                 if (_type != FieldType.floatingPoint) _type = FieldType.integer;
984                 _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
985                 _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, fieldValue.length);
986             }
987         }
988     }
989 
990     /** Updates field formatting info based on the current state. It is expected to be
991      * called after adding field entries via updateForFieldValue(). It returns its new
992      * end position.
993      */
994     size_t finalizeFormatting (size_t startPosition, const ref TsvPrettyOptions options) @safe pure @nogc nothrow
995     {
996         import std.algorithm : max, min;
997         _startPosition = startPosition;
998         if (_type == FieldType.unknown) _type = FieldType.text;
999         _alignment = (_type == FieldType.integer || _type == FieldType.floatingPoint
1000                       || _type == FieldType.exponent) ?
1001             FieldAlignment.right :
1002             FieldAlignment.left;
1003 
1004         if (_type == FieldType.floatingPoint)
1005         {
1006             size_t precision = min(options.floatPrecision, _maxDigitsAfterDecimal);
1007             size_t maxValueWidth = _maxDigitsBeforeDecimal + precision;
1008             if (precision > 0) maxValueWidth++;  // Account for the decimal point.
1009             _printWidth = max(1, _headerPrintWidth, maxValueWidth);
1010             _precision = precision;
1011         }
1012         else if (_type == FieldType.exponent)
1013         {
1014             size_t maxPrecision = (_maxSignificantDigits > 0) ? _maxSignificantDigits - 1 : 0;
1015             _precision = min(options.floatPrecision, maxPrecision);
1016 
1017             size_t maxValuePrintWidth = !options.formatFloats ? _maxRawPrintWidth : _precision + 7;
1018             _printWidth = max(1, _headerPrintWidth, maxValuePrintWidth);
1019         }
1020         else if (_type == FieldType.integer)
1021         {
1022             _printWidth = max(1, _headerPrintWidth, _minRawPrintWidth, _maxRawPrintWidth);
1023             _precision = 0;
1024         }
1025         else
1026         {
1027             _printWidth = max(1, _headerPrintWidth, _minRawPrintWidth,
1028                               min(options.maxFieldPrintWidth, _maxRawPrintWidth));
1029             _precision = 0;
1030         }
1031 
1032         return _startPosition + _printWidth;
1033     }
1034 }
1035 
1036 /** formatFloatingPointValue returns the printed representation of a raw value
1037  * formatted as a fixed precision floating number. This includes zero padding or
1038  * truncation of trailing digits as necessary to meet the desired precision.
1039  *
1040  * If the value cannot be interpreted as a double then the raw value is returned.
1041  * Similarly, values in exponential notion are returned without reformatting.
1042  *
1043  * This routine is used to format values in columns identified as floating point.
1044  */
1045 string formatFloatingPointValue(const char[] value, size_t precision) @safe
1046 {
1047     import std.algorithm : canFind, find;
1048     import std.array : join;
1049     import std.conv : to, ConvException;
1050     import std.format : format;
1051     import std.math : isFinite;
1052     import std.range : repeat;
1053 
1054     string printValue;
1055 
1056     if (value.canFind!(x => x == 'e' || x == 'E'))
1057     {
1058         /* Exponential notion. Use the raw value. */
1059         printValue = value.to!string;
1060     }
1061     else
1062     {
1063         try
1064         {
1065             double doubleValue = value.to!double;
1066             if (doubleValue.isFinite)
1067             {
1068                 size_t numPrecisionDigits = value.precisionDigits;
1069                 if (numPrecisionDigits >= precision)
1070                 {
1071                     printValue = format("%.*f", precision, doubleValue);
1072                 }
1073                 else if (numPrecisionDigits == 0)
1074                 {
1075                     printValue = format("%.*f", numPrecisionDigits, doubleValue) ~ "." ~ repeat("0", precision).join;
1076                 }
1077                 else
1078                 {
1079                     printValue = format("%.*f", numPrecisionDigits, doubleValue) ~ repeat("0", precision - numPrecisionDigits).join;
1080                 }
1081             }
1082             else printValue = value.to!string;  // NaN or Infinity
1083         }
1084         catch (ConvException) printValue = value.to!string;
1085     }
1086     return printValue;
1087 }
1088 
1089 @safe unittest
1090 {
1091     assert("".formatFloatingPointValue(3) == "");
1092     assert(" ".formatFloatingPointValue(3) == " ");
1093     assert("abc".formatFloatingPointValue(3) == "abc");
1094     assert("nan".formatFloatingPointValue(3) == "nan");
1095     assert("0".formatFloatingPointValue(0) == "0");
1096     assert("1".formatFloatingPointValue(0) == "1");
1097     assert("1.".formatFloatingPointValue(0) == "1");
1098     assert("1".formatFloatingPointValue(3) == "1.000");
1099     assert("1000".formatFloatingPointValue(3) == "1000.000");
1100     assert("1000.001".formatFloatingPointValue(5) == "1000.00100");
1101     assert("1000.001".formatFloatingPointValue(3) == "1000.001");
1102     assert("1000.001".formatFloatingPointValue(2) == "1000.00");
1103     assert("1000.006".formatFloatingPointValue(2) == "1000.01");
1104     assert("-0.1".formatFloatingPointValue(1) == "-0.1");
1105     assert("-0.1".formatFloatingPointValue(3) == "-0.100");
1106     assert("-0.001".formatFloatingPointValue(3) == "-0.001");
1107     assert("-0.006".formatFloatingPointValue(2) == "-0.01");
1108     assert("-0.001".formatFloatingPointValue(1) == "-0.0");
1109     assert("-0.001".formatFloatingPointValue(0) == "-0");
1110     assert("0e+00".formatFloatingPointValue(0) == "0e+00");
1111     assert("0.00e+00".formatFloatingPointValue(0) == "0.00e+00");
1112     assert("1e+06".formatFloatingPointValue(1) == "1e+06");
1113     assert("1e+06".formatFloatingPointValue(2) == "1e+06");
1114     assert("1E-06".formatFloatingPointValue(1) == "1E-06");
1115     assert("1.1E+6".formatFloatingPointValue(2) == "1.1E+6");
1116     assert("1.1E+100".formatFloatingPointValue(2) == "1.1E+100");
1117 }
1118 
1119 /** formatExponentValue returns the printed representation of a raw value formatted
1120  * using exponential notation and a specific precision. If the value cannot be interpreted
1121  * as a double then the a copy of the original value is returned.
1122  *
1123  * This routine is used to format values in columns identified as having exponent format.
1124  */
1125 string formatExponentValue(const char[] value, size_t precision) @safe
1126 {
1127     import std.algorithm : canFind, find, findSplit;
1128     import std.array : join;
1129     import std.conv : to, ConvException;
1130     import std.format : format;
1131     import std.math : isFinite;
1132     import std.range : repeat;
1133 
1134     string printValue;
1135     try
1136     {
1137         double doubleValue = value.to!double;
1138         if (doubleValue.isFinite)
1139         {
1140             size_t numSignificantDigits = value.significantDigits;
1141             size_t numPrecisionDigits = (numSignificantDigits == 0) ? 0 : numSignificantDigits - 1;
1142             if (numPrecisionDigits >= precision)
1143             {
1144                 printValue = format("%.*e", precision, doubleValue);
1145             }
1146             else
1147             {
1148                 string unpaddedPrintValue = format("%.*e", numPrecisionDigits, doubleValue);
1149                 auto exponentSplit = unpaddedPrintValue.findSplit("e");   // Uses the same exponent case as format call.
1150                 if (numPrecisionDigits == 0)
1151                 {
1152                     assert(precision != 0);
1153                     assert(!exponentSplit[0].canFind("."));
1154                     printValue = exponentSplit[0] ~ "." ~ repeat("0", precision).join ~ exponentSplit[1] ~ exponentSplit[2];
1155                 }
1156                 else
1157                 {
1158                     printValue = exponentSplit[0] ~ repeat("0", precision - numPrecisionDigits).join ~ exponentSplit[1] ~ exponentSplit[2];
1159                 }
1160             }
1161         }
1162         else printValue = value.to!string;  // NaN or Infinity
1163     }
1164     catch (ConvException) printValue = value.to!string;
1165 
1166     return printValue;
1167 }
1168 
1169 @safe unittest
1170 {
1171     assert("".formatExponentValue(3) == "");
1172     assert(" ".formatExponentValue(3) == " ");
1173     assert("abc".formatExponentValue(3) == "abc");
1174     assert("nan".formatExponentValue(3) == "nan");
1175     assert("0".formatExponentValue(0) == "0e+00");
1176     assert("1".formatExponentValue(0) == "1e+00");
1177     assert("1.".formatExponentValue(0) == "1e+00");
1178     assert("1".formatExponentValue(3) == "1.000e+00");
1179     assert("1000".formatExponentValue(3) == "1.000e+03");
1180     assert("1000.001".formatExponentValue(5) == "1.00000e+03");
1181     assert("1000.001".formatExponentValue(3) == "1.000e+03");
1182     assert("1000.001".formatExponentValue(6) == "1.000001e+03");
1183     assert("1000.006".formatExponentValue(5) == "1.00001e+03");
1184     assert("-0.1".formatExponentValue(1) == "-1.0e-01");
1185     assert("-0.1".formatExponentValue(3) == "-1.000e-01");
1186     assert("-0.001".formatExponentValue(3) == "-1.000e-03");
1187     assert("-0.001".formatExponentValue(1) == "-1.0e-03");
1188     assert("-0.001".formatExponentValue(0) == "-1e-03");
1189     assert("0e+00".formatExponentValue(0) == "0e+00");
1190     assert("0.00e+00".formatExponentValue(0) == "0e+00");
1191     assert("1e+06".formatExponentValue(1) == "1.0e+06");
1192     assert("1e+06".formatExponentValue(2) == "1.00e+06");
1193     assert("1.0001e+06".formatExponentValue(1) == "1.0e+06");
1194     assert("1.0001e+06".formatExponentValue(5) == "1.00010e+06");
1195 }
1196 
1197 /** Returns the number of significant digits in a numeric string.
1198  *
1199  * Significant digits are those needed to represent a number in exponential notation.
1200  * Examples:
1201  *   22.345 - 5 digits
1202  *   10.010 - 4 digits
1203  *   0.0032 - 2 digits
1204  */
1205 size_t significantDigits(const char[] numericString) @safe pure
1206 {
1207     import std.algorithm : canFind, find, findAmong, findSplit, stripRight;
1208     import std.ascii : isDigit;
1209     import std.math : isFinite;
1210     import std.string : isNumeric;
1211     import std.conv : to;
1212 
1213     assert (numericString.isNumeric);
1214 
1215     size_t significantDigits = 0;
1216     if (numericString.to!double.isFinite)
1217     {
1218         auto digitsPart = numericString.find!(x => x.isDigit && x != '0');
1219         auto exponentPart = digitsPart.findAmong("eE");
1220         digitsPart = digitsPart[0 .. $ - exponentPart.length];
1221 
1222         if (digitsPart.canFind('.'))
1223         {
1224             digitsPart = digitsPart.stripRight('0');
1225             significantDigits = digitsPart.length - 1;
1226         }
1227         else
1228         {
1229             significantDigits = digitsPart.length;
1230         }
1231 
1232         if (significantDigits == 0) significantDigits = 1;
1233     }
1234 
1235     return significantDigits;
1236 }
1237 
1238 @safe pure unittest
1239 {
1240     assert("0".significantDigits == 1);
1241     assert("10".significantDigits == 2);
1242     assert("0.0".significantDigits == 1);
1243     assert("-10.0".significantDigits == 2);
1244     assert("-.01".significantDigits == 1);
1245     assert("-.5401".significantDigits == 4);
1246     assert("1010.010".significantDigits == 6);
1247     assert("0.0003003".significantDigits == 4);
1248     assert("6e+06".significantDigits == 1);
1249     assert("6.0e+06".significantDigits == 1);
1250     assert("6.5e+06".significantDigits == 2);
1251     assert("6.005e+06".significantDigits == 4);
1252 }
1253 
1254 /** Returns the number of digits to the right of the decimal point in a numeric string.
1255  * This routine includes trailing zeros in the count.
1256  */
1257 size_t precisionDigits(const char[] numericString) @safe pure
1258 {
1259     import std.algorithm : canFind, find, findAmong, findSplit, stripRight;
1260     import std.ascii : isDigit;
1261     import std.math : isFinite;
1262     import std.string : isNumeric;
1263     import std.conv : to;
1264 
1265     assert (numericString.isNumeric);
1266 
1267     size_t precisionDigits = 0;
1268     if (numericString.to!double.isFinite)
1269     {
1270         if (auto decimalSplit = numericString.findSplit("."))
1271         {
1272             auto exponentPart = decimalSplit[2].findAmong("eE");
1273             precisionDigits = decimalSplit[2].length - exponentPart.length;
1274         }
1275     }
1276 
1277     return precisionDigits;
1278 }
1279 
1280 @safe pure unittest
1281 {
1282     assert("0".precisionDigits == 0);
1283     assert("10".precisionDigits == 0);
1284     assert("0.0".precisionDigits == 1);
1285     assert("-10.0".precisionDigits == 1);
1286     assert("-.01".precisionDigits == 2);
1287     assert("-.5401".precisionDigits == 4);
1288 }
1289 
1290 /** Calculates the expected print width of a string in monospace (fixed-width) fonts.
1291  */
1292 size_t monospacePrintWidth(const char[] str) @safe nothrow
1293 {
1294     bool isCJK(dchar c)
1295     {
1296         return c >= '\u3000' && c <= '\u9fff';
1297     }
1298 
1299     import std.uni : byGrapheme;
1300 
1301     size_t width = 0;
1302     try foreach (g; str.byGrapheme) width += isCJK(g[0]) ? 2 : 1;
1303     catch (Exception) width = str.length;  // Invalid utf-8 sequence. Catch avoids program failure.
1304 
1305     return width;
1306 }
1307 
1308 unittest
1309 {
1310     assert("".monospacePrintWidth == 0);
1311     assert(" ".monospacePrintWidth == 1);
1312     assert("abc".monospacePrintWidth == 3);
1313     assert("林檎".monospacePrintWidth == 4);
1314     assert("æble".monospacePrintWidth == 4);
1315     assert("ვაშლი".monospacePrintWidth == 5);
1316     assert("größten".monospacePrintWidth == 7);
1317 }