tsv_utils.tsv_pretty source code

1 /**
2 Command line tool that prints TSV data aligned for easier reading on consoles
3 and traditional command-line environments.
4 
5 Copyright (c) 2017-2019, eBay Software Foundation
6 Initially written by Jon Degenhardt
7 
8 License: Boost License 1.0 (http://boost.org/LICENSE_1_0.txt)
9 */
10 module tsv_utils.tsv_pretty;
11 
12 import std.range;
13 import std.stdio;
14 import std.typecons : Flag, Yes, No, tuple;
15 
16 static if (__VERSION__ >= 2085) extern(C) __gshared string[] rt_options = [ "gcopt=cleanup:none" ];
17 
18 version(unittest)
19 {
20     // When running unit tests, use main from -main compiler switch.
21 }
22 else
23 {
24     /** Main program. Invokes command line arg processing and tsv-pretty to perform
25      * the real work. Any errors are caught and reported.
26      */
27     int main(string[] cmdArgs)
28     {
29         /* When running in DMD code coverage mode, turn on report merging. */
30         version(D_Coverage) version(DigitalMars)
31         {
32             import core.runtime : dmd_coverSetMerge;
33             dmd_coverSetMerge(true);
34         }
35 
36         TsvPrettyOptions options;
37         auto r = options.processArgs(cmdArgs);
38         if (!r[0]) return r[1];
39         try tsvPretty(options, cmdArgs[1 .. $]);
40         catch (Exception exc)
41         {
42             stderr.writefln("Error [%s]: %s", options.programName, exc.msg);
43             return 1;
44         }
45         return 0;
46     }
47 }
48 
49 auto helpTextVerbose = q"EOS
50 Synopsis: tsv-pretty [options] [file...]
51 
52 tsv-pretty outputs TSV data in a format intended to be more human readable when
53 working on the command line. This is done primarily by lining up data into
54 fixed-width columns. Text is left aligned, numbers are right aligned. Floating
55 points numbers are aligned on the decimal point when feasible.
56 
57 Processing begins by reading the initial set of lines into memory to determine
58 the field widths and data types of each column. This look-ahead buffer is used
59 for header detection as well. Output begins after this processing is complete.
60 
61 By default, only the alignment is changed, the actual values are not modified.
62 Several of the formatting options do modify the values.
63 
64 Features:
65 
66 * Floating point numbers: Floats can be printed in fixed-width precision, using
67   the same precision for all floats in a column. This makes then line up nicely.
68   Precision is determined by values seen during look-ahead processing. The max
69   precision defaults to 9, this can be changed when smaller or larger values are
70   desired. See the '--f|format-floats' and '--p|precision' options.
71 
72 * Header lines: Headers are detected automatically when possible. This can be
73   overridden when automatic detection doesn't work as desired. Headers can be
74   underlined and repeated at regular intervals.
75 
76 * Missing values: A substitute value can be used for empty fields. This is often
77   less confusing than spaces. See '--e|replace-empty' and '--E|empty-replacement'.
78 
79 * Exponential notion: As part float formatting, '--f|format-floats' re-formats
80   columns where exponential notation is found so all the values in the column
81   are displayed using exponential notation with the same precision.
82 
83 * Preamble: A number of initial lines can be designated as a preamble and output
84   unchanged. The preamble is before the header, if a header is present.
85 
86 * Fonts: Fixed-width fonts are assumed. CJK characters are assumed to be double
87   width. This is not always correct, but works well in most cases.
88 
89 Options:
90 EOS";
91 
92 auto helpText = q"EOS
93 Synopsis: tsv-pretty [options] [file...]
94 
95 tsv-pretty outputs TSV data in a more human readable format. This is done by lining
96 up data into fixed-width columns. Text is left aligned, numbers are right aligned.
97 Floating points numbers are aligned on the decimal point when feasible.
98 
99 Options:
100 EOS";
101 
102 /** TsvPrettyOptions is used to process and store command line options. */
103 struct TsvPrettyOptions
104 {
105     string programName;
106     bool helpVerbose = false;           // --help-verbose
107     bool hasHeader = false;             // --H|header (Note: Default false assumed by validation code)
108     bool autoDetectHeader = true;       // Derived (Note: Default true assumed by validation code)
109     bool noHeader = false;              // --x|no-header (Note: Default false assumed by validation code)
110     size_t lookahead = 1000;            // --l|lookahead
111     size_t repeatHeader = 0;            // --r|repeat-header num (zero means no repeat)
112     bool underlineHeader = false;       // --u|underline-header
113     bool formatFloats = false;          // --f|format-floats
114     size_t floatPrecision = 9;          // --p|precision num (max precision when formatting floats.)
115     bool replaceEmpty = false;          // --e|replace-empty
116     string emptyReplacement = "";       // --E|empty-replacement
117     size_t emptyReplacementPrintWidth = 0;    // Derived
118     char delim = '\t';                  // --d|delimiter
119     size_t spaceBetweenFields = 2;      // --s|space-between-fields num
120     size_t maxFieldPrintWidth = 40;     // --m|max-text-width num; Max width for variable width text fields.
121     size_t preambleLines = 0;           // --a|preamble; Number of preamble lines.
122     bool versionWanted = false;         // --V|version
123 
124     /* Returns a tuple. First value is true if command line arguments were successfully
125      * processed and execution should continue, or false if an error occurred or the user
126      * asked for help. If false, the second value is the appropriate exit code (0 or 1).
127      *
128      * Returning true (execution continues) means args have been validated and derived
129      * values calculated. In addition, field indices have been converted to zero-based.
130      * If the whole line is the key, the individual fields list will be cleared.
131      */
132     auto processArgs (ref string[] cmdArgs)
133     {
134         import std.algorithm : any, each;
135         import std.getopt;
136         import std.path : baseName, stripExtension;
137 
138         programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name";
139 
140         try
141         {
142             arraySep = ",";    // Use comma to separate values in command line options
143             auto r = getopt(
144                 cmdArgs,
145                 "help-verbose",           "       Print full help.", &helpVerbose,
146                 std.getopt.config.caseSensitive,
147                 "H|header",               "       Treat the first line of each file as a header.", &hasHeader,
148                 std.getopt.config.caseInsensitive,
149                 "x|no-header",            "       Assume no header. Turns off automatic header detection.", &noHeader,
150                 "l|lookahead",            "NUM    Lines to read to interpret data before generating output. Default: 1000", &lookahead,
151 
152                 "r|repeat-header",        "NUM    Lines to print before repeating the header. Default: No repeating header", &repeatHeader,
153 
154                 "u|underline-header",     "       Underline the header.", &underlineHeader,
155                 "f|format-floats",        "       Format floats for better readability. Default: No", &formatFloats,
156                 "p|precision",            "NUM    Max floating point precision. Implies --format-floats. Default: 9", &floatPrecisionOptionHandler,
157                 std.getopt.config.caseSensitive,
158                 "e|replace-empty",        "       Replace empty fields with '--'.", &replaceEmpty,
159                 "E|empty-replacement",    "STR    Replace empty fields with a string.", &emptyReplacement,
160                 std.getopt.config.caseInsensitive,
161                 "d|delimiter",            "CHR    Field delimiter. Default: TAB. (Single byte UTF-8 characters only.)", &delim,
162                 "s|space-between-fields", "NUM    Spaces between each field (Default: 2)", &spaceBetweenFields,
163                 "m|max-text-width",       "NUM    Max reserved field width for variable width text fields. Default: 40", &maxFieldPrintWidth,
164                 "a|preamble",             "NUM    Treat the first NUM lines as a preamble and output them unchanged.", &preambleLines,
165                 std.getopt.config.caseSensitive,
166                 "V|version",              "       Print version information and exit.", &versionWanted,
167                 std.getopt.config.caseInsensitive,
168                 );
169 
170             if (r.helpWanted)
171             {
172                 defaultGetoptPrinter(helpText, r.options);
173                 return tuple(false, 0);
174             }
175             else if (helpVerbose)
176             {
177                 defaultGetoptPrinter(helpTextVerbose, r.options);
178                 return tuple(false, 0);
179             }
180             else if (versionWanted)
181             {
182                 import tsv_utils.common.tsvutils_version;
183                 writeln(tsvutilsVersionNotice("tsv-pretty"));
184                 return tuple(false, 0);
185             }
186 
187             /* Validation and derivations. */
188             if (noHeader && hasHeader) throw new Exception("Cannot specify both --H|header and --x|no-header.");
189 
190             if (noHeader || hasHeader) autoDetectHeader = false;
191 
192             /* Zero look-ahead has limited utility unless the first line is known to
193              * be a header. Good chance the user will get an unintended behavior.
194              */
195             if (lookahead == 0 && autoDetectHeader)
196             {
197                 assert (!noHeader && !hasHeader);
198                 throw new Exception("Cannot auto-detect header with zero look-ahead. Specify either '--H|header' or '--x|no-header' when using '--l|lookahead 0'.");
199             }
200 
201             if (emptyReplacement.length != 0) replaceEmpty = true;
202             else if (replaceEmpty) emptyReplacement = "--";
203 
204             if (emptyReplacement.length != 0)
205             {
206                 emptyReplacementPrintWidth = emptyReplacement.monospacePrintWidth;
207             }
208         }
209         catch (Exception exc)
210         {
211             stderr.writefln("[%s] Error processing command line arguments: %s", programName, exc.msg);
212             return tuple(false, 1);
213         }
214         return tuple(true, 0);
215     }
216 
217     /* Option handler for --p|precision. It also sets --f|format-floats. */
218     private void floatPrecisionOptionHandler(string option, string optionVal) @safe pure
219     {
220         import std.conv : to;
221         floatPrecision = optionVal.to!size_t;
222         formatFloats = true;
223     }
224 }
225 
226 /** tsvPretty is the main loop, operating on input files and passing control to a
227  * TSVPrettyProccessor instance.
228  *
229  * This separates physical I/O sources and sinks from the underlying processing
230  * algorithm, which operates on generic ranges. A lockingTextWriter is created and
231  * released on every input line. This has effect flushing standard output every line,
232  * desirable in command line tools.
233  */
234 void tsvPretty(in ref TsvPrettyOptions options, string[] files)
235 {
236     auto firstNonPreambleLine = options.preambleLines + 1;
237     auto tpp = TsvPrettyProcessor(options);
238     foreach (filename; (files.length > 0) ? files : ["-"])
239     {
240         auto inputStream = (filename == "-") ? stdin : filename.File();
241         foreach (lineNum, line; inputStream.byLine.enumerate(1))
242         {
243             if (lineNum < firstNonPreambleLine)
244             {
245                 tpp.processPreambleLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
246             }
247             else if (lineNum == firstNonPreambleLine)
248             {
249                 tpp.processFileFirstLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
250             }
251             else
252             {
253                 tpp.processLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
254             }
255         }
256     }
257     tpp.finish(outputRangeObject!(char, char[])(stdout.lockingTextWriter));
258 }
259 
260 /** TsvPrettyProcessor maintains state of processing and exposes operations for
261  * processing individual input lines.
262  *
263  * TsvPrettyProcessor knows that input is file-based, but doesn't deal with actual
264  * files or reading lines from input. That is the job of the caller. Output is
265  * written to an output range. The caller is expected to pass each line to in the
266  * order received, that is an assumption built-into the its processing.
267  *
268  * In addition to the constructor, there are four API methods:
269  *  - processPreambleLine - Called to process a preamble line occurring before
270  *    the header line or first line of data.
271  *  - processFileFirstLine - Called to process the first line of each file. This
272  *    enables header processing.
273  *  - processLine - Called to process all lines except for the first line a file.
274  *  - finish - Called at the end of all processing. This is needed in case the
275  *    look-ahead cache is still being filled when input terminates.
276  */
277 
278 struct TsvPrettyProcessor
279 {
280     import std.array : appender;
281 
282 private:
283     private enum AutoDetectHeaderResult { none, hasHeader, noHeader };
284 
285     private TsvPrettyOptions _options;
286     private size_t _fileCount = 0;
287     private size_t _dataLineOutputCount = 0;
288     private bool _stillCaching = true;
289     private string _candidateHeaderLine;
290     private auto _lookaheadCache = appender!(string[])();
291     private FieldFormat[] _fieldVector;
292     private AutoDetectHeaderResult _autoDetectHeaderResult = AutoDetectHeaderResult.none;
293 
294     /** Constructor. */
295     this(const TsvPrettyOptions options) @safe pure nothrow @nogc
296     {
297         _options = options;
298         if (options.noHeader && options.lookahead == 0) _stillCaching = false;
299     }
300 
301     invariant
302     {
303         assert(_options.hasHeader || _options.noHeader || _options.autoDetectHeader);
304         assert((_options.lookahead == 0 && _lookaheadCache.data.length == 0) ||
305                _lookaheadCache.data.length < _options.lookahead);
306     }
307 
308     /** Called to process a preamble line occurring before the header line or first
309      * line of data.
310      */
311     void processPreambleLine(OutputRange!char outputStream, const char[] line)
312     {
313         if (_fileCount == 0)
314         {
315             put(outputStream, line);
316             put(outputStream, '\n');
317         }
318     }
319 
320     /** Called to process the first line of each file. This enables header processing. */
321     void processFileFirstLine(OutputRange!char outputStream, const char[] line)
322     {
323         import std.conv : to;
324 
325         _fileCount++;
326 
327         if (_options.noHeader)
328         {
329             processLine(outputStream, line);
330         }
331         else if (_options.hasHeader)
332         {
333             if (_fileCount == 1)
334             {
335                 setHeaderLine(line);
336                 if (_options.lookahead == 0) outputLookaheadCache(outputStream);
337             }
338         }
339         else
340         {
341             assert(_options.autoDetectHeader);
342 
343             final switch (_autoDetectHeaderResult)
344             {
345             case AutoDetectHeaderResult.noHeader:
346                 assert(_fileCount > 1);
347                 processLine(outputStream, line);
348                 break;
349 
350             case AutoDetectHeaderResult.hasHeader:
351                 assert(_fileCount > 1);
352                 break;
353 
354             case AutoDetectHeaderResult.none:
355                 if (_fileCount == 1)
356                 {
357                     assert(_candidateHeaderLine.length == 0);
358                     _candidateHeaderLine = line.to!string;
359                 }
360                 else if (_fileCount == 2)
361                 {
362                     if (_candidateHeaderLine == line)
363                     {
364                         _autoDetectHeaderResult = AutoDetectHeaderResult.hasHeader;
365                         setHeaderLine(_candidateHeaderLine);
366 
367                         /* Edge case: First file has only a header line and look-ahead set to zero. */
368                         if (_stillCaching && _options.lookahead == 0) outputLookaheadCache(outputStream);
369                     }
370                     else
371                     {
372                         _autoDetectHeaderResult = AutoDetectHeaderResult.noHeader;
373                         updateFieldFormatsForLine(_candidateHeaderLine);
374                         processLine(outputStream, line);
375                     }
376                 }
377                 break;
378             }
379         }
380     }
381 
382     /** Called to process all lines except for the first line a file. */
383     void processLine(OutputRange!char outputStream, const char[] line)
384     {
385         if (_stillCaching) cacheDataLine(outputStream, line);
386         else outputDataLine(outputStream, line);
387     }
388 
389     /** Called at the end of all processing. This is needed in case the look-ahead cache
390      * is still being filled when input terminates.
391      */
392     void finish(OutputRange!char outputStream)
393     {
394         if (_stillCaching) outputLookaheadCache(outputStream);
395     }
396 
397 private:
398     /* outputLookaheadCache finalizes processing of the lookahead cache. This includes
399      * Setting the type and width of each field, finalizing the auto-detect header
400      * decision, and outputing all lines in the cache.
401      */
402     void outputLookaheadCache(OutputRange!char outputStream)
403     {
404         import std.algorithm : splitter;
405 
406         assert(_stillCaching);
407 
408         if (_options.autoDetectHeader &&
409             _autoDetectHeaderResult == AutoDetectHeaderResult.none &&
410             _candidateHeaderLine.length != 0)
411         {
412             if (candidateHeaderLooksLikeHeader())
413             {
414                 _autoDetectHeaderResult = AutoDetectHeaderResult.hasHeader;
415                 setHeaderLine(_candidateHeaderLine);
416             }
417             else
418             {
419                 _autoDetectHeaderResult = AutoDetectHeaderResult.noHeader;
420             }
421         }
422 
423 
424         if (_options.hasHeader ||
425             (_options.autoDetectHeader && _autoDetectHeaderResult == AutoDetectHeaderResult.hasHeader))
426         {
427             finalizeFieldFormatting();
428             outputHeader(outputStream);
429         }
430         else if (_options.autoDetectHeader && _autoDetectHeaderResult == AutoDetectHeaderResult.noHeader &&
431                  _candidateHeaderLine.length != 0)
432         {
433             updateFieldFormatsForLine(_candidateHeaderLine);
434             finalizeFieldFormatting();
435             outputDataLine(outputStream, _candidateHeaderLine);
436         }
437         else
438         {
439             finalizeFieldFormatting();
440         }
441 
442         foreach(line; _lookaheadCache.data) outputDataLine(outputStream, line);
443         _lookaheadCache.clear;
444         _stillCaching = false;
445     }
446 
447     bool candidateHeaderLooksLikeHeader() @safe
448     {
449         import std.algorithm : splitter;
450 
451         /* The candidate header is declared as the header if the look-ahead cache has at least
452          * one numeric field that is text in the candidate header.
453          */
454         foreach(fieldIndex, fieldValue; _candidateHeaderLine.splitter(_options.delim).enumerate)
455         {
456             auto candidateFieldFormat = FieldFormat(fieldIndex);
457             candidateFieldFormat.updateForFieldValue(fieldValue, _options);
458             if (_fieldVector.length > fieldIndex &&
459                 candidateFieldFormat.fieldType == FieldType.text &&
460                 (_fieldVector[fieldIndex].fieldType == FieldType.integer ||
461                  _fieldVector[fieldIndex].fieldType == FieldType.floatingPoint ||
462                  _fieldVector[fieldIndex].fieldType == FieldType.exponent))
463             {
464                 return true;
465             }
466         }
467 
468         return false;
469     }
470 
471     void setHeaderLine(const char[] line) @safe
472     {
473         import std.algorithm : splitter;
474 
475         foreach(fieldIndex, header; line.splitter(_options.delim).enumerate)
476         {
477             if (_fieldVector.length == fieldIndex) _fieldVector ~= FieldFormat(fieldIndex);
478             assert(_fieldVector.length > fieldIndex);
479             _fieldVector[fieldIndex].setHeader(header);
480         }
481     }
482 
483     void cacheDataLine(OutputRange!char outputStream, const char[] line)
484     {
485         import std.conv : to;
486 
487         assert(_lookaheadCache.data.length < _options.lookahead);
488 
489         _lookaheadCache ~= line.to!string;
490         updateFieldFormatsForLine(line);
491         if (_lookaheadCache.data.length == _options.lookahead) outputLookaheadCache(outputStream);
492     }
493 
494     void updateFieldFormatsForLine(const char[] line) @safe
495     {
496         import std.algorithm : splitter;
497 
498         foreach(fieldIndex, fieldValue; line.splitter(_options.delim).enumerate)
499         {
500             if (_fieldVector.length == fieldIndex) _fieldVector ~= FieldFormat(fieldIndex);
501             assert(_fieldVector.length > fieldIndex);
502             _fieldVector[fieldIndex].updateForFieldValue(fieldValue, _options);
503         }
504 
505     }
506 
507     void finalizeFieldFormatting() @safe pure @nogc nothrow
508     {
509         size_t nextFieldStart = 0;
510         foreach(ref field; _fieldVector)
511         {
512             nextFieldStart = field.finalizeFormatting(nextFieldStart, _options) + _options.spaceBetweenFields;
513         }
514     }
515 
516     void outputHeader(OutputRange!char outputStream)
517     {
518         size_t nextOutputPosition = 0;
519         foreach(fieldIndex, ref field; _fieldVector.enumerate)
520         {
521             size_t spacesNeeded = field.startPosition - nextOutputPosition;
522             put(outputStream, repeat(" ", spacesNeeded));
523             nextOutputPosition += spacesNeeded;
524             nextOutputPosition += field.writeHeader(outputStream, _options);
525         }
526         put(outputStream, '\n');
527 
528         if (_options.underlineHeader)
529         {
530             nextOutputPosition = 0;
531             foreach(fieldIndex, ref field; _fieldVector.enumerate)
532             {
533                 size_t spacesNeeded = field.startPosition - nextOutputPosition;
534                 put(outputStream, repeat(" ", spacesNeeded));
535                 nextOutputPosition += spacesNeeded;
536                 nextOutputPosition += field.writeHeader!(Yes.writeUnderline)(outputStream, _options);
537             }
538             put(outputStream, '\n');
539         }
540     }
541 
542     void outputDataLine(OutputRange!char outputStream, const char[] line)
543     {
544         import std.algorithm : splitter;
545 
546         /* Repeating header option. */
547         if (_options.repeatHeader != 0 && _dataLineOutputCount != 0 &&
548             (_options.hasHeader || (_options.autoDetectHeader &&
549                                     _autoDetectHeaderResult == AutoDetectHeaderResult.hasHeader)) &&
550             _dataLineOutputCount % _options.repeatHeader == 0)
551         {
552             put(outputStream, '\n');
553             outputHeader(outputStream);
554         }
555 
556         _dataLineOutputCount++;
557 
558         size_t nextOutputPosition = 0;
559         foreach(fieldIndex, fieldValue; line.splitter(_options.delim).enumerate)
560         {
561             if (fieldIndex == _fieldVector.length)
562             {
563                 /* Line is longer than any seen while caching. Add a new FieldFormat entry
564                  * and set the line formatting based on this field value.
565                  */
566                 _fieldVector ~= FieldFormat(fieldIndex);
567                 size_t startPosition = (fieldIndex == 0) ?
568                     0 :
569                     _fieldVector[fieldIndex - 1].endPosition + _options.spaceBetweenFields;
570 
571                 _fieldVector[fieldIndex].updateForFieldValue(fieldValue, _options);
572                 _fieldVector[fieldIndex].finalizeFormatting(startPosition, _options);
573             }
574 
575             assert(fieldIndex < _fieldVector.length);
576 
577             FieldFormat fieldFormat = _fieldVector[fieldIndex];
578             size_t nextFieldStart = fieldFormat.startPosition;
579             size_t spacesNeeded = (nextOutputPosition < nextFieldStart) ?
580                 nextFieldStart - nextOutputPosition :
581                 (fieldIndex == 0) ? 0 : 1;  // Previous field went long. One space between fields
582 
583             put(outputStream, repeat(" ", spacesNeeded));
584             nextOutputPosition += spacesNeeded;
585             nextOutputPosition += fieldFormat.writeFieldValue(outputStream, nextOutputPosition, fieldValue, _options);
586         }
587         put(outputStream, '\n');
588     }
589 }
590 
591 /** Field types recognized and tracked by tsv-pretty processing. */
592 enum FieldType { unknown, text, integer, floatingPoint, exponent };
593 
594 /** Field alignments used by tsv-pretty processing. */
595 enum FieldAlignment { left, right };
596 
597 /** FieldFormat holds all the formatting info needed to format data values in a specific
598  * column. e.g. Field 1 may be text, field 2 may be a float, etc. This is calculated
599  * during the caching phase. Each FieldFormat instance is part of a vector representing
600  * the full row, so each includes the start position on the line and similar data.
601  *
602  * APIs used during the caching phase to gather field value samples
603  *  - this - Initial construction. Takes the field index.
604  *  - setHeader - Used to set the header text.
605  *  - updateForFieldValue - Used to add the next field value sample.
606  *  - finalizeFormatting - Used at the end of caching to finalize the format choices.
607  *
608  * APIs used after caching is finished (after finalizeFormatting):
609  *  - startPosition - Returns the expected start position for the field.
610  *  - endPosition - Returns the expected end position for the field.
611  *  - writeHeader - Outputs the header, properly aligned.
612  *  - writeFieldValue - Outputs the current field value, properly aligned.
613  */
614 
615 struct FieldFormat
616 {
617 private:
618     size_t _fieldIndex;                  // Zero-based index in the line
619     string _header = "";                 // Original field header
620     size_t _headerPrintWidth = 0;
621     FieldType _type = FieldType.unknown;
622     FieldAlignment _alignment = FieldAlignment.left;
623     size_t _startPosition = 0;
624     size_t _printWidth = 0;
625     size_t _precision = 0;          // Number of digits after the decimal point
626 
627     /* These are used while doing initial type and print format detection. */
628     size_t _minRawPrintWidth = 0;
629     size_t _maxRawPrintWidth = 0;
630     size_t _maxDigitsBeforeDecimal = 0;
631     size_t _maxDigitsAfterDecimal = 0;
632     size_t _maxSignificantDigits = 0;  // Digits to include in exponential notation
633 
634 public:
635 
636     /** Initial construction. Takes a field index. */
637     this(size_t fieldIndex) @safe pure nothrow @nogc
638     {
639         _fieldIndex = fieldIndex;
640     }
641 
642     /** Sets the header text. */
643     void setHeader(const char[] header) @safe
644     {
645         import std.conv : to;
646 
647         _header = header.to!string;
648         _headerPrintWidth = _header.monospacePrintWidth;
649     }
650 
651     /** Returns the expected start position for the field. */
652     size_t startPosition() nothrow pure @safe @property
653     {
654         return _startPosition;
655     }
656 
657     /** Returns the expected end position for the field. */
658     size_t endPosition() nothrow pure @safe @property
659     {
660         return _startPosition + _printWidth;
661     }
662 
663     /** Returns the type of field. */
664     FieldType fieldType() nothrow pure @safe @property
665     {
666         return _type;
667     }
668 
669     /** Writes the field header or underline characters to the output stream.
670      *
671      * The current output position should have been written up to the field's start position,
672      * including any spaces between fields. Unlike data fields, there is no need to correct
673      * for previous fields that have run long. This routine does not output trailing spaces.
674      * This makes it simpler for lines to avoid unnecessary trailing spaces.
675      *
676      * Underlines can either be written the full width of the field or the just under the
677      * text of the header. At present this is a template parameter (compile-time).
678      *
679      * The print width of the output is returned.
680      */
681     size_t writeHeader (Flag!"writeUnderline" writeUnderline = No.writeUnderline,
682                         Flag!"fullWidthUnderline" fullWidthUnderline = No.fullWidthUnderline)
683         (OutputRange!char outputStream, in ref TsvPrettyOptions options)
684     {
685         import std.range : repeat;
686 
687         size_t positionsWritten = 0;
688         if (_headerPrintWidth > 0)
689         {
690             static if (writeUnderline)
691             {
692                 static if (fullWidthUnderline)
693                 {
694                     put(outputStream, repeat("-", _printWidth));
695                     positionsWritten += _printWidth;
696                 }
697                 else  // Underline beneath the header text only
698                 {
699                     if (_alignment == FieldAlignment.right)
700                     {
701                         put(outputStream, repeat(" ", _printWidth - _headerPrintWidth));
702                         positionsWritten += _printWidth - _headerPrintWidth;
703                     }
704                     put(outputStream, repeat("-", _headerPrintWidth));
705                     positionsWritten += _headerPrintWidth;
706                 }
707             }
708             else
709             {
710                 if (_alignment == FieldAlignment.right)
711                 {
712                     put(outputStream, repeat(" ", _printWidth - _headerPrintWidth));
713                     positionsWritten += _printWidth - _headerPrintWidth;
714                 }
715                 put(outputStream, _header);
716                 positionsWritten += _headerPrintWidth;
717             }
718         }
719         return positionsWritten;
720     }
721 
722     /** Writes the field value for the current column.
723      *
724      * The caller needs to generate output at least to the column's start position, but
725      * can go beyond if previous fields have run long.
726      *
727      * The field value is aligned properly in the field. Either left aligned (text) or
728      * right aligned (numeric). Floating point fields are both right aligned and
729      * decimal point aligned. The number of bytes written is returned. Trailing spaces
730      * are not added, the caller must add any necessary trailing spaces prior to
731      * printing the next field.
732      */
733     size_t writeFieldValue(OutputRange!char outputStream, size_t currPosition,
734                            const char[] fieldValue, in ref TsvPrettyOptions options)
735     in
736     {
737         assert(currPosition >= _startPosition);   // Caller resposible for advancing to field start position.
738         assert(_type == FieldType.text || _type == FieldType.integer ||
739                _type == FieldType.floatingPoint || _type == FieldType.exponent);
740     }
741     body
742     {
743         import std.algorithm : find, max, min;
744         import std.conv : to, ConvException;
745         import std.format : format;
746 
747         /* Create the print version of the string. Either the raw value or a formatted
748          * version of a float.
749          */
750         string printValue;
751         if (!options.formatFloats || _type == FieldType.text || _type == FieldType.integer)
752         {
753             printValue = fieldValue.to!string;
754         }
755         else
756         {
757             assert(options.formatFloats);
758             assert(_type == FieldType.exponent || _type == FieldType.floatingPoint);
759 
760             if (_type == FieldType.exponent)
761             {
762                 printValue = fieldValue.formatExponentValue(_precision);
763             }
764             else
765             {
766                 printValue = fieldValue.formatFloatingPointValue(_precision);
767             }
768         }
769 
770         if (printValue.length == 0 && options.replaceEmpty) printValue = options.emptyReplacement;
771         size_t printValuePrintWidth = printValue.monospacePrintWidth;
772 
773         /* Calculate leading spaces needed for right alignment. */
774         size_t leadingSpaces = 0;
775         if (_alignment == FieldAlignment.right)
776         {
777             /* Target width adjusts the column width to account for overrun by the previous field. */
778             size_t targetWidth;
779             if (currPosition == _startPosition)
780             {
781                 targetWidth = _printWidth;
782             }
783             else
784             {
785                 size_t startGap = currPosition - _startPosition;
786                 targetWidth = max(printValuePrintWidth,
787                                   startGap < _printWidth ? _printWidth - startGap : 0);
788             }
789 
790             leadingSpaces = (printValuePrintWidth < targetWidth) ?
791                 targetWidth - printValuePrintWidth : 0;
792 
793             /* The above calculation assumes the print value is fully right aligned.
794              * This is not correct when raw value floats are being used rather than
795              * formatted floats, as different values will have different precision.
796              * The next adjustment accounts for this, dropping leading spaces as
797              * needed to align the decimal point. Note that text and exponential
798              * values get aligned strictly against right boundaries.
799              */
800             if (leadingSpaces > 0 && _precision > 0 &&
801                 _type == FieldType.floatingPoint && !options.formatFloats)
802             {
803                 import std.algorithm : canFind, findSplit;
804                 import std..string : isNumeric;
805 
806                 if (printValue.isNumeric && !printValue.canFind!(x => x == 'e' || x == 'E'))
807                 {
808                     size_t decimalAndDigitsLength = printValue.find(".").length;
809                     size_t trailingSpaces =
810                         (decimalAndDigitsLength == 0) ? _precision + 1 :
811                         (decimalAndDigitsLength > _precision) ? 0 :
812                         _precision + 1 - decimalAndDigitsLength;
813 
814                     leadingSpaces = (leadingSpaces > trailingSpaces) ?
815                         leadingSpaces - trailingSpaces : 0;
816                 }
817             }
818         }
819         put(outputStream, repeat(' ', leadingSpaces));
820         put(outputStream, printValue);
821         return printValuePrintWidth + leadingSpaces;
822     }
823 
824     /** Updates type and format given a new field value.
825      *
826      * This is called during look-ahead caching to register a new sample value for the
827      * column. The key components updates are field type and print width.
828      */
829     void updateForFieldValue(const char[] fieldValue, in ref TsvPrettyOptions options) @safe
830     {
831         import std.algorithm : findAmong, findSplit, max, min;
832         import std.conv : to, ConvException;
833         import std..string : isNumeric;
834 
835         size_t fieldValuePrintWidth = fieldValue.monospacePrintWidth;
836         size_t fieldValuePrintWidthWithEmpty =
837             (fieldValuePrintWidth == 0 && options.replaceEmpty) ?
838             options.emptyReplacementPrintWidth :
839             fieldValuePrintWidth;
840 
841         _maxRawPrintWidth = max(_maxRawPrintWidth, fieldValuePrintWidthWithEmpty);
842         _minRawPrintWidth = (_minRawPrintWidth == 0) ?
843             fieldValuePrintWidthWithEmpty :
844             min(_minRawPrintWidth, fieldValuePrintWidthWithEmpty);
845 
846         if (_type == FieldType.text)
847         {
848             /* Already text, can't become anything else. */
849         }
850         else if (fieldValuePrintWidth == 0)
851         {
852             /* Don't let an empty field override a numeric field type. */
853         }
854         else if (!fieldValue.isNumeric)
855         {
856             /* Not parsable as a number. Switch from unknown or numeric type to text. */
857             _type = FieldType.text;
858         }
859         else
860         {
861             /* Field type is currently unknown or numeric, and current field parses as numeric.
862              * See if it parses as integer or float. Integers will parse as floats, so try
863              * integer types first.
864              */
865             FieldType parsesAs = FieldType.unknown;
866             long longValue;
867             ulong ulongValue;
868             double doubleValue;
869             try
870             {
871                 longValue = fieldValue.to!long;
872                 parsesAs = FieldType.integer;
873             }
874             catch (ConvException)
875             {
876                 try
877                 {
878                     ulongValue = fieldValue.to!ulong;
879                     parsesAs = FieldType.integer;
880                 }
881                 catch (ConvException)
882                 {
883                     try
884                     {
885                         doubleValue = fieldValue.to!double;
886                         import std.algorithm : findAmong;
887                         parsesAs = (fieldValue.findAmong("eE").length == 0) ?
888                             FieldType.floatingPoint : FieldType.exponent;
889                     }
890                     catch (ConvException)
891                     {
892                         /* Note: This means isNumeric thinks it's a number, but conversions all failed. */
893                         parsesAs = FieldType.text;
894                     }
895                 }
896             }
897 
898             if (parsesAs == FieldType.text)
899             {
900                 /* Not parsable as a number (despite isNumeric result). Switch to text type. */
901                 _type = FieldType.text;
902             }
903             else if (parsesAs == FieldType.exponent)
904             {
905                 /* Exponential notion supersedes both vanilla floats and integers. */
906                 _type = FieldType.exponent;
907                 _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
908 
909                 if (auto decimalSplit = fieldValue.findSplit("."))
910                 {
911                     auto fromExponent = decimalSplit[2].findAmong("eE");
912                     size_t numDigitsAfterDecimal = decimalSplit[2].length - fromExponent.length;
913                     _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, decimalSplit[0].length);
914                     _maxDigitsAfterDecimal = max(_maxDigitsAfterDecimal, numDigitsAfterDecimal);
915                 }
916                 else
917                 {
918                     /* Exponent without a decimal point. */
919                     auto fromExponent = fieldValue.findAmong("eE");
920                     assert(fromExponent.length > 0);
921                     size_t numDigits = fieldValue.length - fromExponent.length;
922                     _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, numDigits);
923                 }
924             }
925             else if (parsesAs == FieldType.floatingPoint)
926             {
927                 /* Floating point supercedes integer but not exponential. */
928                 if (_type != FieldType.exponent) _type = FieldType.floatingPoint;
929                 _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
930 
931                 if (auto decimalSplit = fieldValue.findSplit("."))
932                 {
933                     _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, decimalSplit[0].length);
934                     _maxDigitsAfterDecimal = max(_maxDigitsAfterDecimal, decimalSplit[2].length);
935                 }
936             }
937             else
938             {
939                 assert(parsesAs == FieldType.integer);
940                 if (_type != FieldType.floatingPoint) _type = FieldType.integer;
941                 _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
942                 _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, fieldValue.length);
943             }
944         }
945     }
946 
947     /** Updates field formatting info based on the current state. It is expected to be
948      * called after adding field entries via updateForFieldValue(). It returns its new
949      * end position.
950      */
951     size_t finalizeFormatting (size_t startPosition, in ref TsvPrettyOptions options) @safe pure @nogc nothrow
952     {
953         import std.algorithm : max, min;
954         _startPosition = startPosition;
955         if (_type == FieldType.unknown) _type = FieldType.text;
956         _alignment = (_type == FieldType.integer || _type == FieldType.floatingPoint
957                       || _type == FieldType.exponent) ?
958             FieldAlignment.right :
959             FieldAlignment.left;
960 
961         if (_type == FieldType.floatingPoint)
962         {
963             size_t precision = min(options.floatPrecision, _maxDigitsAfterDecimal);
964             size_t maxValueWidth = _maxDigitsBeforeDecimal + precision;
965             if (precision > 0) maxValueWidth++;  // Account for the decimal point.
966             _printWidth = max(1, _headerPrintWidth, maxValueWidth);
967             _precision = precision;
968         }
969         else if (_type == FieldType.exponent)
970         {
971             size_t maxPrecision = (_maxSignificantDigits > 0) ? _maxSignificantDigits - 1 : 0;
972             _precision = min(options.floatPrecision, maxPrecision);
973 
974             size_t maxValuePrintWidth = !options.formatFloats ? _maxRawPrintWidth : _precision + 7;
975             _printWidth = max(1, _headerPrintWidth, maxValuePrintWidth);
976         }
977         else if (_type == FieldType.integer)
978         {
979             _printWidth = max(1, _headerPrintWidth, _minRawPrintWidth, _maxRawPrintWidth);
980             _precision = 0;
981         }
982         else
983         {
984             _printWidth = max(1, _headerPrintWidth, _minRawPrintWidth,
985                               min(options.maxFieldPrintWidth, _maxRawPrintWidth));
986             _precision = 0;
987         }
988 
989         return _startPosition + _printWidth;
990     }
991 }
992 
993 /** formatFloatingPointValue returns the printed representation of a raw value
994  * formatted as a fixed precision floating number. This includes zero padding or
995  * truncation of trailing digits as necessary to meet the desired precision.
996  *
997  * If the value cannot be interpreted as a double then the raw value is returned.
998  * Similarly, values in exponential notion are returned without reformatting.
999  *
1000  * This routine is used to format values in columns identified as floating point.
1001  */
1002 string formatFloatingPointValue(const char[] value, size_t precision) @safe
1003 {
1004     import std.algorithm : canFind, find;
1005     import std.array : join;
1006     import std.conv : to, ConvException;
1007     import std.format : format;
1008     import std.math : isFinite;
1009     import std.range : repeat;
1010 
1011     string printValue;
1012 
1013     if (value.canFind!(x => x == 'e' || x == 'E'))
1014     {
1015         /* Exponential notion. Use the raw value. */
1016         printValue = value.to!string;
1017     }
1018     else
1019     {
1020         try
1021         {
1022             double doubleValue = value.to!double;
1023             if (doubleValue.isFinite)
1024             {
1025                 size_t numPrecisionDigits = value.precisionDigits;
1026                 if (numPrecisionDigits >= precision)
1027                 {
1028                     printValue = format("%.*f", precision, doubleValue);
1029                 }
1030                 else if (numPrecisionDigits == 0)
1031                 {
1032                     printValue = format("%.*f", numPrecisionDigits, doubleValue) ~ "." ~ repeat("0", precision).join;
1033                 }
1034                 else
1035                 {
1036                     printValue = format("%.*f", numPrecisionDigits, doubleValue) ~ repeat("0", precision - numPrecisionDigits).join;
1037                 }
1038             }
1039             else printValue = value.to!string;  // NaN or Infinity
1040         }
1041         catch (ConvException) printValue = value.to!string;
1042     }
1043     return printValue;
1044 }
1045 
1046 @safe unittest
1047 {
1048     assert("".formatFloatingPointValue(3) == "");
1049     assert(" ".formatFloatingPointValue(3) == " ");
1050     assert("abc".formatFloatingPointValue(3) == "abc");
1051     assert("nan".formatFloatingPointValue(3) == "nan");
1052     assert("0".formatFloatingPointValue(0) == "0");
1053     assert("1".formatFloatingPointValue(0) == "1");
1054     assert("1.".formatFloatingPointValue(0) == "1");
1055     assert("1".formatFloatingPointValue(3) == "1.000");
1056     assert("1000".formatFloatingPointValue(3) == "1000.000");
1057     assert("1000.001".formatFloatingPointValue(5) == "1000.00100");
1058     assert("1000.001".formatFloatingPointValue(3) == "1000.001");
1059     assert("1000.001".formatFloatingPointValue(2) == "1000.00");
1060     assert("1000.006".formatFloatingPointValue(2) == "1000.01");
1061     assert("-0.1".formatFloatingPointValue(1) == "-0.1");
1062     assert("-0.1".formatFloatingPointValue(3) == "-0.100");
1063     assert("-0.001".formatFloatingPointValue(3) == "-0.001");
1064     assert("-0.006".formatFloatingPointValue(2) == "-0.01");
1065     assert("-0.001".formatFloatingPointValue(1) == "-0.0");
1066     assert("-0.001".formatFloatingPointValue(0) == "-0");
1067     assert("0e+00".formatFloatingPointValue(0) == "0e+00");
1068     assert("0.00e+00".formatFloatingPointValue(0) == "0.00e+00");
1069     assert("1e+06".formatFloatingPointValue(1) == "1e+06");
1070     assert("1e+06".formatFloatingPointValue(2) == "1e+06");
1071     assert("1E-06".formatFloatingPointValue(1) == "1E-06");
1072     assert("1.1E+6".formatFloatingPointValue(2) == "1.1E+6");
1073     assert("1.1E+100".formatFloatingPointValue(2) == "1.1E+100");
1074 }
1075 
1076 /** formatExponentValue returns the printed representation of a raw value formatted
1077  * using exponential notation and a specific precision. If the value cannot be interpreted
1078  * as a double then the a copy of the original value is returned.
1079  *
1080  * This routine is used to format values in columns identified as having exponent format.
1081  */
1082 string formatExponentValue(const char[] value, size_t precision) @safe
1083 {
1084     import std.algorithm : canFind, find, findSplit;
1085     import std.array : join;
1086     import std.conv : to, ConvException;
1087     import std.format : format;
1088     import std.math : isFinite;
1089     import std.range : repeat;
1090 
1091     string printValue;
1092     try
1093     {
1094         double doubleValue = value.to!double;
1095         if (doubleValue.isFinite)
1096         {
1097             size_t numSignificantDigits = value.significantDigits;
1098             size_t numPrecisionDigits = (numSignificantDigits == 0) ? 0 : numSignificantDigits - 1;
1099             if (numPrecisionDigits >= precision)
1100             {
1101                 printValue = format("%.*e", precision, doubleValue);
1102             }
1103             else
1104             {
1105                 string unpaddedPrintValue = format("%.*e", numPrecisionDigits, doubleValue);
1106                 auto exponentSplit = unpaddedPrintValue.findSplit("e");   // Uses the same exponent case as format call.
1107                 if (numPrecisionDigits == 0)
1108                 {
1109                     assert(precision != 0);
1110                     assert(!exponentSplit[0].canFind("."));
1111                     printValue = exponentSplit[0] ~ "." ~ repeat("0", precision).join ~ exponentSplit[1] ~ exponentSplit[2];
1112                 }
1113                 else
1114                 {
1115                     printValue = exponentSplit[0] ~ repeat("0", precision - numPrecisionDigits).join ~ exponentSplit[1] ~ exponentSplit[2];
1116                 }
1117             }
1118         }
1119         else printValue = value.to!string;  // NaN or Infinity
1120     }
1121     catch (ConvException) printValue = value.to!string;
1122 
1123     return printValue;
1124 }
1125 
1126 @safe unittest
1127 {
1128     assert("".formatExponentValue(3) == "");
1129     assert(" ".formatExponentValue(3) == " ");
1130     assert("abc".formatExponentValue(3) == "abc");
1131     assert("nan".formatExponentValue(3) == "nan");
1132     assert("0".formatExponentValue(0) == "0e+00");
1133     assert("1".formatExponentValue(0) == "1e+00");
1134     assert("1.".formatExponentValue(0) == "1e+00");
1135     assert("1".formatExponentValue(3) == "1.000e+00");
1136     assert("1000".formatExponentValue(3) == "1.000e+03");
1137     assert("1000.001".formatExponentValue(5) == "1.00000e+03");
1138     assert("1000.001".formatExponentValue(3) == "1.000e+03");
1139     assert("1000.001".formatExponentValue(6) == "1.000001e+03");
1140     assert("1000.006".formatExponentValue(5) == "1.00001e+03");
1141     assert("-0.1".formatExponentValue(1) == "-1.0e-01");
1142     assert("-0.1".formatExponentValue(3) == "-1.000e-01");
1143     assert("-0.001".formatExponentValue(3) == "-1.000e-03");
1144     assert("-0.001".formatExponentValue(1) == "-1.0e-03");
1145     assert("-0.001".formatExponentValue(0) == "-1e-03");
1146     assert("0e+00".formatExponentValue(0) == "0e+00");
1147     assert("0.00e+00".formatExponentValue(0) == "0e+00");
1148     assert("1e+06".formatExponentValue(1) == "1.0e+06");
1149     assert("1e+06".formatExponentValue(2) == "1.00e+06");
1150     assert("1.0001e+06".formatExponentValue(1) == "1.0e+06");
1151     assert("1.0001e+06".formatExponentValue(5) == "1.00010e+06");
1152 }
1153 
1154 /** Returns the number of significant digits in a numeric string.
1155  *
1156  * Significant digits are those needed to represent a number in exponential notation.
1157  * Examples:
1158  *   22.345 - 5 digits
1159  *   10.010 - 4 digits
1160  *   0.0032 - 2 digits
1161  */
1162 size_t significantDigits(const char[] numericString) @safe pure
1163 {
1164     import std.algorithm : canFind, find, findAmong, findSplit, stripRight;
1165     import std.ascii : isDigit;
1166     import std.math : isFinite;
1167     import std..string : isNumeric;
1168     import std.conv : to;
1169 
1170     assert (numericString.isNumeric);
1171 
1172     size_t significantDigits = 0;
1173     if (numericString.to!double.isFinite)
1174     {
1175         auto digitsPart = numericString.find!(x => x.isDigit && x != '0');
1176         auto exponentPart = digitsPart.findAmong("eE");
1177         digitsPart = digitsPart[0 .. $ - exponentPart.length];
1178 
1179         if (digitsPart.canFind('.'))
1180         {
1181             digitsPart = digitsPart.stripRight('0');
1182             significantDigits = digitsPart.length - 1;
1183         }
1184         else
1185         {
1186             significantDigits = digitsPart.length;
1187         }
1188 
1189         if (significantDigits == 0) significantDigits = 1;
1190     }
1191 
1192     return significantDigits;
1193 }
1194 
1195 @safe pure unittest
1196 {
1197     assert("0".significantDigits == 1);
1198     assert("10".significantDigits == 2);
1199     assert("0.0".significantDigits == 1);
1200     assert("-10.0".significantDigits == 2);
1201     assert("-.01".significantDigits == 1);
1202     assert("-.5401".significantDigits == 4);
1203     assert("1010.010".significantDigits == 6);
1204     assert("0.0003003".significantDigits == 4);
1205     assert("6e+06".significantDigits == 1);
1206     assert("6.0e+06".significantDigits == 1);
1207     assert("6.5e+06".significantDigits == 2);
1208     assert("6.005e+06".significantDigits == 4);
1209 }
1210 
1211 /** Returns the number of digits to the right of the decimal point in a numeric string.
1212  * This routine includes trailing zeros in the count.
1213  */
1214 size_t precisionDigits(const char[] numericString) @safe pure
1215 {
1216     import std.algorithm : canFind, find, findAmong, findSplit, stripRight;
1217     import std.ascii : isDigit;
1218     import std.math : isFinite;
1219     import std..string : isNumeric;
1220     import std.conv : to;
1221 
1222     assert (numericString.isNumeric);
1223 
1224     size_t precisionDigits = 0;
1225     if (numericString.to!double.isFinite)
1226     {
1227         if (auto decimalSplit = numericString.findSplit("."))
1228         {
1229             auto exponentPart = decimalSplit[2].findAmong("eE");
1230             precisionDigits = decimalSplit[2].length - exponentPart.length;
1231         }
1232     }
1233 
1234     return precisionDigits;
1235 }
1236 
1237 @safe pure unittest
1238 {
1239     assert("0".precisionDigits == 0);
1240     assert("10".precisionDigits == 0);
1241     assert("0.0".precisionDigits == 1);
1242     assert("-10.0".precisionDigits == 1);
1243     assert("-.01".precisionDigits == 2);
1244     assert("-.5401".precisionDigits == 4);
1245 }
1246 
1247 /** Calculates the expected print width of a string in monospace (fixed-width) fonts.
1248  */
1249 size_t monospacePrintWidth(const char[] str) @safe nothrow
1250 {
1251     bool isCJK(dchar c)
1252     {
1253         return c >= '\u3000' && c <= '\u9fff';
1254     }
1255 
1256     import std.uni : byGrapheme;
1257 
1258     size_t width = 0;
1259     try foreach (g; str.byGrapheme) width += isCJK(g[0]) ? 2 : 1;
1260     catch (Exception) width = str.length;  // Invalid utf-8 sequence. Catch avoids program failure.
1261 
1262     return width;
1263 }
1264 
1265 unittest
1266 {
1267     assert("".monospacePrintWidth == 0);
1268     assert(" ".monospacePrintWidth == 1);
1269     assert("abc".monospacePrintWidth == 3);
1270     assert("林檎".monospacePrintWidth == 4);
1271     assert("æble".monospacePrintWidth == 4);
1272     assert("ვაშლი".monospacePrintWidth == 5);
1273     assert("größten".monospacePrintWidth == 7);
1274 }