1 /**
2 Command line tool that prints TSV data aligned for easier reading on consoles
3 and traditional command-line environments.
4 
5 Copyright (c) 2017-2018, eBay Software Foundation
6 Initially written by Jon Degenhardt
7 
8 License: Boost License 1.0 (http://boost.org/LICENSE_1_0.txt)
9 */
10 module tsv_pretty;
11 
12 import std.range;
13 import std.stdio;
14 import std.typecons : Flag, Yes, No, tuple;
15 
16 version(unittest)
17 {
18     // When running unit tests, use main from -main compiler switch.
19 }
20 else
21 {
22     /** Main program. Invokes command line arg processing and tsv-pretty to perform
23      * the real work. Any errors are caught and reported.
24      */
25     int main(string[] cmdArgs)
26     {
27         /* When running in DMD code coverage mode, turn on report merging. */
28         version(D_Coverage) version(DigitalMars)
29         {
30             import core.runtime : dmd_coverSetMerge;
31             dmd_coverSetMerge(true);
32         }
33 
34         TsvPrettyOptions options;
35         auto r = options.processArgs(cmdArgs);
36         if (!r[0]) return r[1];
37         try tsvPretty(options, cmdArgs[1 .. $]);
38         catch (Exception exc)
39         {
40             stderr.writefln("Error [%s]: %s", options.programName, exc.msg);
41             return 1;
42         }
43         return 0;
44     }
45 }
46 
47 auto helpTextVerbose = q"EOS
48 Synopsis: tsv-pretty [options] [file...]
49 
50 tsv-pretty outputs TSV data in a format intended to be more human readable when
51 working on the command line. This is done primarily by lining up data into
52 fixed-width columns. Text is left aligned, numbers are right aligned. Floating
53 points numbers are aligned on the decimal point when feasible.
54 
55 Processing begins by reading the initial set of lines into memory to determine
56 the field widths and data types of each column. This look-ahead buffer is used
57 for header detection as well. Output begins after this processing is complete.
58 
59 By default, only the alignment is changed, the actual values are not modified.
60 Several of the formatting options do modify the values.
61 
62 Features:
63 
64 * Floating point numbers: Floats can be printed in fixed-width precision, using
65   the same precision for all floats in a column. This makes then line up nicely.
66   Precision is determined by values seen during look-ahead processing. The max
67   precision defaults to 9, this can be changed when smaller or larger values are
68   desired. See the '--f|format-floats' and '--p|precision' options.
69 
70 * Header lines: Headers are detected automatically when possible. This can be
71   overridden when automatic detection doesn't work as desired. Headers can be
72   underlined and repeated at regular intervals.
73 
74 * Missing values: A substitute value can be used for empty fields. This is often
75   less confusing than spaces. See '--e|replace-empty' and '--E|empty-replacement'.
76 
77 * Exponential notion: As part float formatting, '--f|format-floats' re-formats
78   columns where exponential notation is found so all the values in the column
79   are displayed using exponential notation with the same precision.
80 
81 * Preamble: A number of initial lines can be designated as a preamble and output
82   unchanged. The preamble is before the header, if a header is present.
83 
84 * Fonts: Fixed-width fonts are assumed. CJK characters are assumed to be double
85   width. This is not always correct, but works well in most cases.
86 
87 Options:
88 EOS";
89 
90 auto helpText = q"EOS
91 Synopsis: tsv-pretty [options] [file...]
92 
93 tsv-pretty outputs TSV data in a more human readable format. This is done by lining
94 up data into fixed-width columns. Text is left aligned, numbers are right aligned.
95 Floating points numbers are aligned on the decimal point when feasible.
96 
97 Options:
98 EOS";
99 
100 /** TsvPrettyOptions is used to process and store command line options. */
101 struct TsvPrettyOptions
102 {
103     string programName;
104     bool helpVerbose = false;           // --help-verbose
105     bool hasHeader = false;             // --H|header (Note: Default false assumed by validation code)
106     bool autoDetectHeader = true;       // Derived (Note: Default true assumed by validation code)
107     bool noHeader = false;              // --x|no-header (Note: Default false assumed by validation code)
108     size_t lookahead = 1000;            // --l|lookahead
109     size_t repeatHeader = 0;            // --r|repeat-header num (zero means no repeat)
110     bool underlineHeader = false;       // --u|underline-header
111     bool formatFloats = false;          // --f|format-floats
112     size_t floatPrecision = 9;          // --p|precision num (max precision when formatting floats.)
113     bool replaceEmpty = false;          // --e|replace-empty
114     string emptyReplacement = "";       // --E|empty-replacement
115     size_t emptyReplacementPrintWidth = 0;    // Derived
116     char delim = '\t';                  // --d|delimiter
117     size_t spaceBetweenFields = 2;      // --s|space-between-fields num
118     size_t maxFieldPrintWidth = 40;     // --m|max-text-width num; Max width for variable width text fields.
119     size_t preambleLines = 0;           // --a|preamble; Number of preamble lines.
120     bool versionWanted = false;         // --V|version
121 
122     /* Returns a tuple. First value is true if command line arguments were successfully
123      * processed and execution should continue, or false if an error occurred or the user
124      * asked for help. If false, the second value is the appropriate exit code (0 or 1).
125      *
126      * Returning true (execution continues) means args have been validated and derived
127      * values calculated. In addition, field indices have been converted to zero-based.
128      * If the whole line is the key, the individual fields list will be cleared.
129      */
130     auto processArgs (ref string[] cmdArgs)
131     {
132         import std.algorithm : any, each;
133         import std.getopt;
134         import std.path : baseName, stripExtension;
135 
136         programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name";
137 
138         try
139         {
140             arraySep = ",";    // Use comma to separate values in command line options
141             auto r = getopt(
142                 cmdArgs,
143                 "help-verbose",           "       Print full help.", &helpVerbose,
144                 std.getopt.config.caseSensitive,
145                 "H|header",               "       Treat the first line of each file as a header.", &hasHeader,
146                 std.getopt.config.caseInsensitive,
147                 "x|no-header",            "       Assume no header. Turns off automatic header detection.", &noHeader,
148                 "l|lookahead",            "NUM    Lines to read to interpret data before generating output. Default: 1000", &lookahead,
149 
150                 "r|repeat-header",        "NUM    Lines to print before repeating the header. Default: No repeating header", &repeatHeader,
151 
152                 "u|underline-header",     "       Underline the header.", &underlineHeader,
153                 "f|format-floats",        "       Format floats for better readability. Default: No", &formatFloats,
154                 "p|precision",            "NUM    Max floating point precision. Implies --format-floats. Default: 9", &floatPrecisionOptionHandler,
155                 std.getopt.config.caseSensitive,
156                 "e|replace-empty",        "       Replace empty fields with '--'.", &replaceEmpty,
157                 "E|empty-replacement",    "STR    Replace empty fields with a string.", &emptyReplacement,
158                 std.getopt.config.caseInsensitive,
159                 "d|delimiter",            "CHR    Field delimiter. Default: TAB. (Single byte UTF-8 characters only.)", &delim,
160                 "s|space-between-fields", "NUM    Spaces between each field (Default: 2)", &spaceBetweenFields,
161                 "m|max-text-width",       "NUM    Max reserved field width for variable width text fields. Default: 40", &maxFieldPrintWidth,
162                 "a|preamble",             "NUM    Treat the first NUM lines as a preamble and output them unchanged.", &preambleLines,
163                 std.getopt.config.caseSensitive,
164                 "V|version",              "       Print version information and exit.", &versionWanted,
165                 std.getopt.config.caseInsensitive,
166                 );
167 
168             if (r.helpWanted)
169             {
170                 defaultGetoptPrinter(helpText, r.options);
171                 return tuple(false, 0);
172             }
173             else if (helpVerbose)
174             {
175                 defaultGetoptPrinter(helpTextVerbose, r.options);
176                 return tuple(false, 0);
177             }
178             else if (versionWanted)
179             {
180                 import tsvutils_version;
181                 writeln(tsvutilsVersionNotice("tsv-pretty"));
182                 return tuple(false, 0);
183             }
184 
185             /* Validation and derivations. */
186             if (noHeader && hasHeader) throw new Exception("Cannot specify both --H|header and --x|no-header.");
187 
188             if (noHeader || hasHeader) autoDetectHeader = false;
189 
190             /* Zero look-ahead has limited utility unless the first line is known to
191              * be a header. Good chance the user will get an unintended behavior.
192              */
193             if (lookahead == 0 && autoDetectHeader)
194             {
195                 assert (!noHeader && !hasHeader);
196                 throw new Exception("Cannot auto-detect header with zero look-ahead. Specify either '--H|header' or '--x|no-header' when using '--l|lookahead 0'.");
197             }
198 
199             if (emptyReplacement.length != 0) replaceEmpty = true;
200             else if (replaceEmpty) emptyReplacement = "--";
201 
202             if (emptyReplacement.length != 0)
203             {
204                 emptyReplacementPrintWidth = emptyReplacement.monospacePrintWidth;
205             }
206         }
207         catch (Exception exc)
208         {
209             stderr.writefln("[%s] Error processing command line arguments: %s", programName, exc.msg);
210             return tuple(false, 1);
211         }
212         return tuple(true, 0);
213     }
214 
215     /* Option handler for --p|precision. It also sets --f|format-floats. */
216     private void floatPrecisionOptionHandler(string option, string optionVal) @safe pure
217     {
218         import std.conv : to;
219         floatPrecision = optionVal.to!size_t;
220         formatFloats = true;
221     }
222 }
223 
224 /** tsvPretty is the main loop, operating on input files and passing control to a
225  * TSVPrettyProccessor instance.
226  *
227  * This separates physical I/O sources and sinks from the underlying processing
228  * algorithm, which operates on generic ranges. A lockingTextWriter is created and
229  * released on every input line. This has effect flushing standard output every line,
230  * desirable in command line tools.
231  */
232 void tsvPretty(in ref TsvPrettyOptions options, string[] files)
233 {
234     auto firstNonPreambleLine = options.preambleLines + 1;
235     auto tpp = TsvPrettyProcessor(options);
236     foreach (filename; (files.length > 0) ? files : ["-"])
237     {
238         auto inputStream = (filename == "-") ? stdin : filename.File();
239         foreach (lineNum, line; inputStream.byLine.enumerate(1))
240         {
241             if (lineNum < firstNonPreambleLine)
242             {
243                 tpp.processPreambleLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
244             }
245             else if (lineNum == firstNonPreambleLine)
246             {
247                 tpp.processFileFirstLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
248             }
249             else
250             {
251                 tpp.processLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line);
252             }
253         }
254     }
255     tpp.finish(outputRangeObject!(char, char[])(stdout.lockingTextWriter));
256 }
257 
258 /** TsvPrettyProcessor maintains state of processing and exposes operations for
259  * processing individual input lines.
260  *
261  * TsvPrettyProcessor knows that input is file-based, but doesn't deal with actual
262  * files or reading lines from input. That is the job of the caller. Output is
263  * written to an output range. The caller is expected to pass each line to in the
264  * order received, that is an assumption built-into the its processing.
265  *
266  * In addition to the constructor, there are four API methods:
267  *  - processPreambleLine - Called to process a preamble line occurring before
268  *    the header line or first line of data.
269  *  - processFileFirstLine - Called to process the first line of each file. This
270  *    enables header processing.
271  *  - processLine - Called to process all lines except for the first line a file.
272  *  - finish - Called at the end of all processing. This is needed in case the
273  *    look-ahead cache is still being filled when input terminates.
274  */
275 
276 struct TsvPrettyProcessor
277 {
278     import std.array : appender;
279 
280 private:
281     private enum AutoDetectHeaderResult { none, hasHeader, noHeader };
282 
283     private TsvPrettyOptions _options;
284     private size_t _fileCount = 0;
285     private size_t _dataLineOutputCount = 0;
286     private bool _stillCaching = true;
287     private string _candidateHeaderLine;
288     private auto _lookaheadCache = appender!(string[])();
289     private FieldFormat[] _fieldVector;
290     private AutoDetectHeaderResult _autoDetectHeaderResult = AutoDetectHeaderResult.none;
291 
292     /** Constructor. */
293     this(const TsvPrettyOptions options) @safe pure nothrow @nogc
294     {
295         _options = options;
296         if (options.noHeader && options.lookahead == 0) _stillCaching = false;
297     }
298 
299     invariant
300     {
301         assert(_options.hasHeader || _options.noHeader || _options.autoDetectHeader);
302         assert((_options.lookahead == 0 && _lookaheadCache.data.length == 0) ||
303                _lookaheadCache.data.length < _options.lookahead);
304     }
305 
306     /** Called to process a preamble line occurring before the header line or first
307      * line of data.
308      */
309     void processPreambleLine(OutputRange!char outputStream, const char[] line)
310     {
311         if (_fileCount == 0)
312         {
313             put(outputStream, line);
314             put(outputStream, '\n');
315         }
316     }
317 
318     /** Called to process the first line of each file. This enables header processing. */
319     void processFileFirstLine(OutputRange!char outputStream, const char[] line)
320     {
321         import std.conv : to;
322 
323         _fileCount++;
324 
325         if (_options.noHeader)
326         {
327             processLine(outputStream, line);
328         }
329         else if (_options.hasHeader)
330         {
331             if (_fileCount == 1)
332             {
333                 setHeaderLine(line);
334                 if (_options.lookahead == 0) outputLookaheadCache(outputStream);
335             }
336         }
337         else
338         {
339             assert(_options.autoDetectHeader);
340 
341             final switch (_autoDetectHeaderResult)
342             {
343             case AutoDetectHeaderResult.noHeader:
344                 assert(_fileCount > 1);
345                 processLine(outputStream, line);
346                 break;
347 
348             case AutoDetectHeaderResult.hasHeader:
349                 assert(_fileCount > 1);
350                 break;
351 
352             case AutoDetectHeaderResult.none:
353                 if (_fileCount == 1)
354                 {
355                     assert(_candidateHeaderLine.length == 0);
356                     _candidateHeaderLine = line.to!string;
357                 }
358                 else if (_fileCount == 2)
359                 {
360                     if (_candidateHeaderLine == line)
361                     {
362                         _autoDetectHeaderResult = AutoDetectHeaderResult.hasHeader;
363                         setHeaderLine(_candidateHeaderLine);
364 
365                         /* Edge case: First file has only a header line and look-ahead set to zero. */
366                         if (_stillCaching && _options.lookahead == 0) outputLookaheadCache(outputStream);
367                     }
368                     else
369                     {
370                         _autoDetectHeaderResult = AutoDetectHeaderResult.noHeader;
371                         updateFieldFormatsForLine(_candidateHeaderLine);
372                         processLine(outputStream, line);
373                     }
374                 }
375                 break;
376             }
377         }
378     }
379 
380     /** Called to process all lines except for the first line a file. */
381     void processLine(OutputRange!char outputStream, const char[] line)
382     {
383         if (_stillCaching) cacheDataLine(outputStream, line);
384         else outputDataLine(outputStream, line);
385     }
386 
387     /** Called at the end of all processing. This is needed in case the look-ahead cache
388      * is still being filled when input terminates.
389      */
390     void finish(OutputRange!char outputStream)
391     {
392         if (_stillCaching) outputLookaheadCache(outputStream);
393     }
394 
395 private:
396     /* outputLookaheadCache finalizes processing of the lookahead cache. This includes
397      * Setting the type and width of each field, finalizing the auto-detect header
398      * decision, and outputing all lines in the cache.
399      */
400     void outputLookaheadCache(OutputRange!char outputStream)
401     {
402         import std.algorithm : splitter;
403 
404         assert(_stillCaching);
405 
406         if (_options.autoDetectHeader &&
407             _autoDetectHeaderResult == AutoDetectHeaderResult.none &&
408             _candidateHeaderLine.length != 0)
409         {
410             if (candidateHeaderLooksLikeHeader())
411             {
412                 _autoDetectHeaderResult = AutoDetectHeaderResult.hasHeader;
413                 setHeaderLine(_candidateHeaderLine);
414             }
415             else
416             {
417                 _autoDetectHeaderResult = AutoDetectHeaderResult.noHeader;
418             }
419         }
420 
421 
422         if (_options.hasHeader ||
423             (_options.autoDetectHeader && _autoDetectHeaderResult == AutoDetectHeaderResult.hasHeader))
424         {
425             finalizeFieldFormatting();
426             outputHeader(outputStream);
427         }
428         else if (_options.autoDetectHeader && _autoDetectHeaderResult == AutoDetectHeaderResult.noHeader &&
429                  _candidateHeaderLine.length != 0)
430         {
431             updateFieldFormatsForLine(_candidateHeaderLine);
432             finalizeFieldFormatting();
433             outputDataLine(outputStream, _candidateHeaderLine);
434         }
435         else
436         {
437             finalizeFieldFormatting();
438         }
439 
440         foreach(line; _lookaheadCache.data) outputDataLine(outputStream, line);
441         _lookaheadCache.clear;
442         _stillCaching = false;
443     }
444 
445     bool candidateHeaderLooksLikeHeader() @safe
446     {
447         import std.algorithm : splitter;
448 
449         /* The candidate header is declared as the header if the look-ahead cache has at least
450          * one numeric field that is text in the candidate header.
451          */
452         foreach(fieldIndex, fieldValue; _candidateHeaderLine.splitter(_options.delim).enumerate)
453         {
454             auto candidateFieldFormat = FieldFormat(fieldIndex);
455             candidateFieldFormat.updateForFieldValue(fieldValue, _options);
456             if (_fieldVector.length > fieldIndex &&
457                 candidateFieldFormat.fieldType == FieldType.text &&
458                 (_fieldVector[fieldIndex].fieldType == FieldType.integer ||
459                  _fieldVector[fieldIndex].fieldType == FieldType.floatingPoint ||
460                  _fieldVector[fieldIndex].fieldType == FieldType.exponent))
461             {
462                 return true;
463             }
464         }
465 
466         return false;
467     }
468 
469     void setHeaderLine(const char[] line) @safe
470     {
471         import std.algorithm : splitter;
472 
473         foreach(fieldIndex, header; line.splitter(_options.delim).enumerate)
474         {
475             if (_fieldVector.length == fieldIndex) _fieldVector ~= FieldFormat(fieldIndex);
476             assert(_fieldVector.length > fieldIndex);
477             _fieldVector[fieldIndex].setHeader(header);
478         }
479     }
480 
481     void cacheDataLine(OutputRange!char outputStream, const char[] line)
482     {
483         import std.conv : to;
484 
485         assert(_lookaheadCache.data.length < _options.lookahead);
486 
487         _lookaheadCache ~= line.to!string;
488         updateFieldFormatsForLine(line);
489         if (_lookaheadCache.data.length == _options.lookahead) outputLookaheadCache(outputStream);
490     }
491 
492     void updateFieldFormatsForLine(const char[] line) @safe
493     {
494         import std.algorithm : splitter;
495 
496         foreach(fieldIndex, fieldValue; line.splitter(_options.delim).enumerate)
497         {
498             if (_fieldVector.length == fieldIndex) _fieldVector ~= FieldFormat(fieldIndex);
499             assert(_fieldVector.length > fieldIndex);
500             _fieldVector[fieldIndex].updateForFieldValue(fieldValue, _options);
501         }
502 
503     }
504 
505     void finalizeFieldFormatting() @safe pure @nogc nothrow
506     {
507         size_t nextFieldStart = 0;
508         foreach(ref field; _fieldVector)
509         {
510             nextFieldStart = field.finalizeFormatting(nextFieldStart, _options) + _options.spaceBetweenFields;
511         }
512     }
513 
514     void outputHeader(OutputRange!char outputStream)
515     {
516         size_t nextOutputPosition = 0;
517         foreach(fieldIndex, ref field; _fieldVector.enumerate)
518         {
519             size_t spacesNeeded = field.startPosition - nextOutputPosition;
520             put(outputStream, repeat(" ", spacesNeeded));
521             nextOutputPosition += spacesNeeded;
522             nextOutputPosition += field.writeHeader(outputStream, _options);
523         }
524         put(outputStream, '\n');
525 
526         if (_options.underlineHeader)
527         {
528             nextOutputPosition = 0;
529             foreach(fieldIndex, ref field; _fieldVector.enumerate)
530             {
531                 size_t spacesNeeded = field.startPosition - nextOutputPosition;
532                 put(outputStream, repeat(" ", spacesNeeded));
533                 nextOutputPosition += spacesNeeded;
534                 nextOutputPosition += field.writeHeader!(Yes.writeUnderline)(outputStream, _options);
535             }
536             put(outputStream, '\n');
537         }
538     }
539 
540     void outputDataLine(OutputRange!char outputStream, const char[] line)
541     {
542         import std.algorithm : splitter;
543 
544         /* Repeating header option. */
545         if (_options.repeatHeader != 0 && _dataLineOutputCount != 0 &&
546             (_options.hasHeader || (_options.autoDetectHeader &&
547                                     _autoDetectHeaderResult == AutoDetectHeaderResult.hasHeader)) &&
548             _dataLineOutputCount % _options.repeatHeader == 0)
549         {
550             put(outputStream, '\n');
551             outputHeader(outputStream);
552         }
553 
554         _dataLineOutputCount++;
555 
556         size_t nextOutputPosition = 0;
557         foreach(fieldIndex, fieldValue; line.splitter(_options.delim).enumerate)
558         {
559             if (fieldIndex == _fieldVector.length)
560             {
561                 /* Line is longer than any seen while caching. Add a new FieldFormat entry
562                  * and set the line formatting based on this field value.
563                  */
564                 _fieldVector ~= FieldFormat(fieldIndex);
565                 size_t startPosition = (fieldIndex == 0) ?
566                     0 :
567                     _fieldVector[fieldIndex - 1].endPosition + _options.spaceBetweenFields;
568 
569                 _fieldVector[fieldIndex].updateForFieldValue(fieldValue, _options);
570                 _fieldVector[fieldIndex].finalizeFormatting(startPosition, _options);
571             }
572 
573             assert(fieldIndex < _fieldVector.length);
574 
575             FieldFormat fieldFormat = _fieldVector[fieldIndex];
576             size_t nextFieldStart = fieldFormat.startPosition;
577             size_t spacesNeeded = (nextOutputPosition < nextFieldStart) ?
578                 nextFieldStart - nextOutputPosition :
579                 (fieldIndex == 0) ? 0 : 1;  // Previous field went long. One space between fields
580 
581             put(outputStream, repeat(" ", spacesNeeded));
582             nextOutputPosition += spacesNeeded;
583             nextOutputPosition += fieldFormat.writeFieldValue(outputStream, nextOutputPosition, fieldValue, _options);
584         }
585         put(outputStream, '\n');
586     }
587 }
588 
589 /** Field types recognized and tracked by tsv-pretty processing. */
590 enum FieldType { unknown, text, integer, floatingPoint, exponent };
591 
592 /** Field alignments used by tsv-pretty processing. */
593 enum FieldAlignment { left, right };
594 
595 /** FieldFormat holds all the formatting info needed to format data values in a specific
596  * column. e.g. Field 1 may be text, field 2 may be a float, etc. This is calculated
597  * during the caching phase. Each FieldFormat instance is part of a vector representing
598  * the full row, so each includes the start position on the line and similar data.
599  *
600  * APIs used during the caching phase to gather field value samples
601  *  - this - Initial construction. Takes the field index.
602  *  - setHeader - Used to set the header text.
603  *  - updateForFieldValue - Used to add the next field value sample.
604  *  - finalizeFormatting - Used at the end of caching to finalize the format choices.
605  *
606  * APIs used after caching is finished (after finalizeFormatting):
607  *  - startPosition - Returns the expected start position for the field.
608  *  - endPosition - Returns the expected end position for the field.
609  *  - writeHeader - Outputs the header, properly aligned.
610  *  - writeFieldValue - Outputs the current field value, properly aligned.
611  */
612 
613 struct FieldFormat
614 {
615 private:
616     size_t _fieldIndex;                  // Zero-based index in the line
617     string _header = "";                 // Original field header
618     size_t _headerPrintWidth = 0;
619     FieldType _type = FieldType.unknown;
620     FieldAlignment _alignment = FieldAlignment.left;
621     size_t _startPosition = 0;
622     size_t _printWidth = 0;
623     size_t _precision = 0;          // Number of digits after the decimal point
624 
625     /* These are used while doing initial type and print format detection. */
626     size_t _minRawPrintWidth = 0;
627     size_t _maxRawPrintWidth = 0;
628     size_t _maxDigitsBeforeDecimal = 0;
629     size_t _maxDigitsAfterDecimal = 0;
630     size_t _maxSignificantDigits = 0;  // Digits to include in exponential notation
631 
632 public:
633 
634     /** Initial construction. Takes a field index. */
635     this(size_t fieldIndex) @safe pure nothrow @nogc
636     {
637         _fieldIndex = fieldIndex;
638     }
639 
640     /** Sets the header text. */
641     void setHeader(const char[] header) @safe
642     {
643         import std.conv : to;
644 
645         _header = header.to!string;
646         _headerPrintWidth = _header.monospacePrintWidth;
647     }
648 
649     /** Returns the expected start position for the field. */
650     size_t startPosition() nothrow pure @safe @property
651     {
652         return _startPosition;
653     }
654 
655     /** Returns the expected end position for the field. */
656     size_t endPosition() nothrow pure @safe @property
657     {
658         return _startPosition + _printWidth;
659     }
660 
661     /** Returns the type of field. */
662     FieldType fieldType() nothrow pure @safe @property
663     {
664         return _type;
665     }
666 
667     /** Writes the field header or underline characters to the output stream.
668      *
669      * The current output position should have been written up to the field's start position,
670      * including any spaces between fields. Unlike data fields, there is no need to correct
671      * for previous fields that have run long. This routine does not output trailing spaces.
672      * This makes it simpler for lines to avoid unnecessary trailing spaces.
673      *
674      * Underlines can either be written the full width of the field or the just under the
675      * text of the header. At present this is a template parameter (compile-time).
676      *
677      * The print width of the output is returned.
678      */
679     size_t writeHeader (Flag!"writeUnderline" writeUnderline = No.writeUnderline,
680                         Flag!"fullWidthUnderline" fullWidthUnderline = No.fullWidthUnderline)
681         (OutputRange!char outputStream, in ref TsvPrettyOptions options)
682     {
683         import std.range : repeat;
684 
685         size_t positionsWritten = 0;
686         if (_headerPrintWidth > 0)
687         {
688             static if (writeUnderline)
689             {
690                 static if (fullWidthUnderline)
691                 {
692                     put(outputStream, repeat("-", _printWidth));
693                     positionsWritten += _printWidth;
694                 }
695                 else  // Underline beneath the header text only
696                 {
697                     if (_alignment == FieldAlignment.right)
698                     {
699                         put(outputStream, repeat(" ", _printWidth - _headerPrintWidth));
700                         positionsWritten += _printWidth - _headerPrintWidth;
701                     }
702                     put(outputStream, repeat("-", _headerPrintWidth));
703                     positionsWritten += _headerPrintWidth;
704                 }
705             }
706             else
707             {
708                 if (_alignment == FieldAlignment.right)
709                 {
710                     put(outputStream, repeat(" ", _printWidth - _headerPrintWidth));
711                     positionsWritten += _printWidth - _headerPrintWidth;
712                 }
713                 put(outputStream, _header);
714                 positionsWritten += _headerPrintWidth;
715             }
716         }
717         return positionsWritten;
718     }
719 
720     /** Writes the field value for the current column.
721      *
722      * The caller needs to generate output at least to the column's start position, but
723      * can go beyond if previous fields have run long.
724      *
725      * The field value is aligned properly in the field. Either left aligned (text) or
726      * right aligned (numeric). Floating point fields are both right aligned and
727      * decimal point aligned. The number of bytes written is returned. Trailing spaces
728      * are not added, the caller must add any necessary trailing spaces prior to
729      * printing the next field.
730      */
731     size_t writeFieldValue(OutputRange!char outputStream, size_t currPosition,
732                            const char[] fieldValue, in ref TsvPrettyOptions options)
733     in
734     {
735         assert(currPosition >= _startPosition);   // Caller resposible for advancing to field start position.
736         assert(_type == FieldType.text || _type == FieldType.integer ||
737                _type == FieldType.floatingPoint || _type == FieldType.exponent);
738     }
739     body
740     {
741         import std.algorithm : find, max, min;
742         import std.conv : to, ConvException;
743         import std.format : format;
744 
745         /* Create the print version of the string. Either the raw value or a formatted
746          * version of a float.
747          */
748         string printValue;
749         if (!options.formatFloats || _type == FieldType.text || _type == FieldType.integer)
750         {
751             printValue = fieldValue.to!string;
752         }
753         else
754         {
755             assert(options.formatFloats);
756             assert(_type == FieldType.exponent || _type == FieldType.floatingPoint);
757 
758             if (_type == FieldType.exponent)
759             {
760                 printValue = fieldValue.formatExponentValue(_precision);
761             }
762             else
763             {
764                 printValue = fieldValue.formatFloatingPointValue(_precision);
765             }
766         }
767 
768         if (printValue.length == 0 && options.replaceEmpty) printValue = options.emptyReplacement;
769         size_t printValuePrintWidth = printValue.monospacePrintWidth;
770 
771         /* Calculate leading spaces needed for right alignment. */
772         size_t leadingSpaces = 0;
773         if (_alignment == FieldAlignment.right)
774         {
775             /* Target width adjusts the column width to account for overrun by the previous field. */
776             size_t targetWidth;
777             if (currPosition == _startPosition)
778             {
779                 targetWidth = _printWidth;
780             }
781             else
782             {
783                 size_t startGap = currPosition - _startPosition;
784                 targetWidth = max(printValuePrintWidth,
785                                   startGap < _printWidth ? _printWidth - startGap : 0);
786             }
787 
788             leadingSpaces = (printValuePrintWidth < targetWidth) ?
789                 targetWidth - printValuePrintWidth : 0;
790 
791             /* The above calculation assumes the print value is fully right aligned.
792              * This is not correct when raw value floats are being used rather than
793              * formatted floats, as different values will have different precision.
794              * The next adjustment accounts for this, dropping leading spaces as
795              * needed to align the decimal point. Note that text and exponential
796              * values get aligned strictly against right boundaries.
797              */
798             if (leadingSpaces > 0 && _precision > 0 &&
799                 _type == FieldType.floatingPoint && !options.formatFloats)
800             {
801                 import std.algorithm : canFind, findSplit;
802                 import std.string : isNumeric;
803 
804                 if (printValue.isNumeric && !printValue.canFind!(x => x == 'e' || x == 'E'))
805                 {
806                     size_t decimalAndDigitsLength = printValue.find(".").length;
807                     size_t trailingSpaces =
808                         (decimalAndDigitsLength == 0) ? _precision + 1 :
809                         (decimalAndDigitsLength > _precision) ? 0 :
810                         _precision + 1 - decimalAndDigitsLength;
811 
812                     leadingSpaces = (leadingSpaces > trailingSpaces) ?
813                         leadingSpaces - trailingSpaces : 0;
814                 }
815             }
816         }
817         put(outputStream, repeat(' ', leadingSpaces));
818         put(outputStream, printValue);
819         return printValuePrintWidth + leadingSpaces;
820     }
821 
822     /** Updates type and format given a new field value.
823      *
824      * This is called during look-ahead caching to register a new sample value for the
825      * column. The key components updates are field type and print width.
826      */
827     void updateForFieldValue(const char[] fieldValue, in ref TsvPrettyOptions options) @safe
828     {
829         import std.algorithm : findAmong, findSplit, max, min;
830         import std.conv : to, ConvException;
831         import std.string : isNumeric;
832 
833         size_t fieldValuePrintWidth = fieldValue.monospacePrintWidth;
834         size_t fieldValuePrintWidthWithEmpty =
835             (fieldValuePrintWidth == 0 && options.replaceEmpty) ?
836             options.emptyReplacementPrintWidth :
837             fieldValuePrintWidth;
838 
839         _maxRawPrintWidth = max(_maxRawPrintWidth, fieldValuePrintWidthWithEmpty);
840         _minRawPrintWidth = (_minRawPrintWidth == 0) ?
841             fieldValuePrintWidthWithEmpty :
842             min(_minRawPrintWidth, fieldValuePrintWidthWithEmpty);
843 
844         if (_type == FieldType.text)
845         {
846             /* Already text, can't become anything else. */
847         }
848         else if (fieldValuePrintWidth == 0)
849         {
850             /* Don't let an empty field override a numeric field type. */
851         }
852         else if (!fieldValue.isNumeric)
853         {
854             /* Not parsable as a number. Switch from unknown or numeric type to text. */
855             _type = FieldType.text;
856         }
857         else
858         {
859             /* Field type is currently unknown or numeric, and current field parses as numeric.
860              * See if it parses as integer or float. Integers will parse as floats, so try
861              * integer types first.
862              */
863             FieldType parsesAs = FieldType.unknown;
864             long longValue;
865             ulong ulongValue;
866             double doubleValue;
867             try
868             {
869                 longValue = fieldValue.to!long;
870                 parsesAs = FieldType.integer;
871             }
872             catch (ConvException)
873             {
874                 try
875                 {
876                     ulongValue = fieldValue.to!ulong;
877                     parsesAs = FieldType.integer;
878                 }
879                 catch (ConvException)
880                 {
881                     try
882                     {
883                         doubleValue = fieldValue.to!double;
884                         import std.algorithm : findAmong;
885                         parsesAs = (fieldValue.findAmong("eE").length == 0) ?
886                             FieldType.floatingPoint : FieldType.exponent;
887                     }
888                     catch (ConvException)
889                     {
890                         /* Note: This means isNumeric thinks it's a number, but conversions all failed. */
891                         parsesAs = FieldType.text;
892                     }
893                 }
894             }
895 
896             if (parsesAs == FieldType.text)
897             {
898                 /* Not parsable as a number (despite isNumeric result). Switch to text type. */
899                 _type = FieldType.text;
900             }
901             else if (parsesAs == FieldType.exponent)
902             {
903                 /* Exponential notion supersedes both vanilla floats and integers. */
904                 _type = FieldType.exponent;
905                 _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
906 
907                 if (auto decimalSplit = fieldValue.findSplit("."))
908                 {
909                     auto fromExponent = decimalSplit[2].findAmong("eE");
910                     size_t numDigitsAfterDecimal = decimalSplit[2].length - fromExponent.length;
911                     _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, decimalSplit[0].length);
912                     _maxDigitsAfterDecimal = max(_maxDigitsAfterDecimal, numDigitsAfterDecimal);
913                 }
914                 else
915                 {
916                     /* Exponent without a decimal point. */
917                     auto fromExponent = fieldValue.findAmong("eE");
918                     assert(fromExponent.length > 0);
919                     size_t numDigits = fieldValue.length - fromExponent.length;
920                     _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, numDigits);
921                 }
922             }
923             else if (parsesAs == FieldType.floatingPoint)
924             {
925                 /* Floating point supercedes integer but not exponential. */
926                 if (_type != FieldType.exponent) _type = FieldType.floatingPoint;
927                 _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
928 
929                 if (auto decimalSplit = fieldValue.findSplit("."))
930                 {
931                     _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, decimalSplit[0].length);
932                     _maxDigitsAfterDecimal = max(_maxDigitsAfterDecimal, decimalSplit[2].length);
933                 }
934             }
935             else
936             {
937                 assert(parsesAs == FieldType.integer);
938                 if (_type != FieldType.floatingPoint) _type = FieldType.integer;
939                 _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits);
940                 _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, fieldValue.length);
941             }
942         }
943     }
944 
945     /** Updates field formatting info based on the current state. It is expected to be
946      * called after adding field entries via updateForFieldValue(). It returns its new
947      * end position.
948      */
949     size_t finalizeFormatting (size_t startPosition, in ref TsvPrettyOptions options) @safe pure @nogc nothrow
950     {
951         import std.algorithm : max, min;
952         _startPosition = startPosition;
953         if (_type == FieldType.unknown) _type = FieldType.text;
954         _alignment = (_type == FieldType.integer || _type == FieldType.floatingPoint
955                       || _type == FieldType.exponent) ?
956             FieldAlignment.right :
957             FieldAlignment.left;
958 
959         if (_type == FieldType.floatingPoint)
960         {
961             size_t precision = min(options.floatPrecision, _maxDigitsAfterDecimal);
962             size_t maxValueWidth = _maxDigitsBeforeDecimal + precision;
963             if (precision > 0) maxValueWidth++;  // Account for the decimal point.
964             _printWidth = max(1, _headerPrintWidth, maxValueWidth);
965             _precision = precision;
966         }
967         else if (_type == FieldType.exponent)
968         {
969             size_t maxPrecision = (_maxSignificantDigits > 0) ? _maxSignificantDigits - 1 : 0;
970             _precision = min(options.floatPrecision, maxPrecision);
971 
972             size_t maxValuePrintWidth = !options.formatFloats ? _maxRawPrintWidth : _precision + 7;
973             _printWidth = max(1, _headerPrintWidth, maxValuePrintWidth);
974         }
975         else if (_type == FieldType.integer)
976         {
977             _printWidth = max(1, _headerPrintWidth, _minRawPrintWidth, _maxRawPrintWidth);
978             _precision = 0;
979         }
980         else
981         {
982             _printWidth = max(1, _headerPrintWidth, _minRawPrintWidth,
983                               min(options.maxFieldPrintWidth, _maxRawPrintWidth));
984             _precision = 0;
985         }
986 
987         return _startPosition + _printWidth;
988     }
989 }
990 
991 /** formatFloatingPointValue returns the printed representation of a raw value
992  * formatted as a fixed precision floating number. This includes zero padding or
993  * truncation of trailing digits as necessary to meet the desired precision.
994  *
995  * If the value cannot be interpreted as a double then the raw value is returned.
996  * Similarly, values in exponential notion are returned without reformatting.
997  *
998  * This routine is used to format values in columns identified as floating point.
999  */
1000 string formatFloatingPointValue(const char[] value, size_t precision) @safe
1001 {
1002     import std.algorithm : canFind, find;
1003     import std.array : join;
1004     import std.conv : to, ConvException;
1005     import std.format : format;
1006     import std.math : isFinite;
1007     import std.range : repeat;
1008 
1009     string printValue;
1010 
1011     if (value.canFind!(x => x == 'e' || x == 'E'))
1012     {
1013         /* Exponential notion. Use the raw value. */
1014         printValue = value.to!string;
1015     }
1016     else
1017     {
1018         try
1019         {
1020             double doubleValue = value.to!double;
1021             if (doubleValue.isFinite)
1022             {
1023                 size_t numPrecisionDigits = value.precisionDigits;
1024                 if (numPrecisionDigits >= precision)
1025                 {
1026                     printValue = format("%.*f", precision, doubleValue);
1027                 }
1028                 else if (numPrecisionDigits == 0)
1029                 {
1030                     printValue = format("%.*f", numPrecisionDigits, doubleValue) ~ "." ~ repeat("0", precision).join;
1031                 }
1032                 else
1033                 {
1034                     printValue = format("%.*f", numPrecisionDigits, doubleValue) ~ repeat("0", precision - numPrecisionDigits).join;
1035                 }
1036             }
1037             else printValue = value.to!string;  // NaN or Infinity
1038         }
1039         catch (ConvException) printValue = value.to!string;
1040     }
1041     return printValue;
1042 }
1043 
1044 @safe unittest
1045 {
1046     assert("".formatFloatingPointValue(3) == "");
1047     assert(" ".formatFloatingPointValue(3) == " ");
1048     assert("abc".formatFloatingPointValue(3) == "abc");
1049     assert("nan".formatFloatingPointValue(3) == "nan");
1050     assert("0".formatFloatingPointValue(0) == "0");
1051     assert("1".formatFloatingPointValue(0) == "1");
1052     assert("1.".formatFloatingPointValue(0) == "1");
1053     assert("1".formatFloatingPointValue(3) == "1.000");
1054     assert("1000".formatFloatingPointValue(3) == "1000.000");
1055     assert("1000.001".formatFloatingPointValue(5) == "1000.00100");
1056     assert("1000.001".formatFloatingPointValue(3) == "1000.001");
1057     assert("1000.001".formatFloatingPointValue(2) == "1000.00");
1058     assert("1000.006".formatFloatingPointValue(2) == "1000.01");
1059     assert("-0.1".formatFloatingPointValue(1) == "-0.1");
1060     assert("-0.1".formatFloatingPointValue(3) == "-0.100");
1061     assert("-0.001".formatFloatingPointValue(3) == "-0.001");
1062     assert("-0.006".formatFloatingPointValue(2) == "-0.01");
1063     assert("-0.001".formatFloatingPointValue(1) == "-0.0");
1064     assert("-0.001".formatFloatingPointValue(0) == "-0");
1065     assert("0e+00".formatFloatingPointValue(0) == "0e+00");
1066     assert("0.00e+00".formatFloatingPointValue(0) == "0.00e+00");
1067     assert("1e+06".formatFloatingPointValue(1) == "1e+06");
1068     assert("1e+06".formatFloatingPointValue(2) == "1e+06");
1069     assert("1E-06".formatFloatingPointValue(1) == "1E-06");
1070     assert("1.1E+6".formatFloatingPointValue(2) == "1.1E+6");
1071     assert("1.1E+100".formatFloatingPointValue(2) == "1.1E+100");
1072 }
1073 
1074 /** formatExponentValue returns the printed representation of a raw value formatted
1075  * using exponential notation and a specific precision. If the value cannot be interpreted
1076  * as a double then the a copy of the original value is returned.
1077  *
1078  * This routine is used to format values in columns identified as having exponent format.
1079  */
1080 string formatExponentValue(const char[] value, size_t precision) @safe
1081 {
1082     import std.algorithm : canFind, find, findSplit;
1083     import std.array : join;
1084     import std.conv : to, ConvException;
1085     import std.format : format;
1086     import std.math : isFinite;
1087     import std.range : repeat;
1088 
1089     string printValue;
1090     try
1091     {
1092         double doubleValue = value.to!double;
1093         if (doubleValue.isFinite)
1094         {
1095             size_t numSignificantDigits = value.significantDigits;
1096             size_t numPrecisionDigits = (numSignificantDigits == 0) ? 0 : numSignificantDigits - 1;
1097             if (numPrecisionDigits >= precision)
1098             {
1099                 printValue = format("%.*e", precision, doubleValue);
1100             }
1101             else
1102             {
1103                 string unpaddedPrintValue = format("%.*e", numPrecisionDigits, doubleValue);
1104                 auto exponentSplit = unpaddedPrintValue.findSplit("e");   // Uses the same exponent case as format call.
1105                 if (numPrecisionDigits == 0)
1106                 {
1107                     assert(precision != 0);
1108                     assert(!exponentSplit[0].canFind("."));
1109                     printValue = exponentSplit[0] ~ "." ~ repeat("0", precision).join ~ exponentSplit[1] ~ exponentSplit[2];
1110                 }
1111                 else
1112                 {
1113                     printValue = exponentSplit[0] ~ repeat("0", precision - numPrecisionDigits).join ~ exponentSplit[1] ~ exponentSplit[2];
1114                 }
1115             }
1116         }
1117         else printValue = value.to!string;  // NaN or Infinity
1118     }
1119     catch (ConvException) printValue = value.to!string;
1120 
1121     return printValue;
1122 }
1123 
1124 @safe unittest
1125 {
1126     assert("".formatExponentValue(3) == "");
1127     assert(" ".formatExponentValue(3) == " ");
1128     assert("abc".formatExponentValue(3) == "abc");
1129     assert("nan".formatExponentValue(3) == "nan");
1130     assert("0".formatExponentValue(0) == "0e+00");
1131     assert("1".formatExponentValue(0) == "1e+00");
1132     assert("1.".formatExponentValue(0) == "1e+00");
1133     assert("1".formatExponentValue(3) == "1.000e+00");
1134     assert("1000".formatExponentValue(3) == "1.000e+03");
1135     assert("1000.001".formatExponentValue(5) == "1.00000e+03");
1136     assert("1000.001".formatExponentValue(3) == "1.000e+03");
1137     assert("1000.001".formatExponentValue(6) == "1.000001e+03");
1138     assert("1000.006".formatExponentValue(5) == "1.00001e+03");
1139     assert("-0.1".formatExponentValue(1) == "-1.0e-01");
1140     assert("-0.1".formatExponentValue(3) == "-1.000e-01");
1141     assert("-0.001".formatExponentValue(3) == "-1.000e-03");
1142     assert("-0.001".formatExponentValue(1) == "-1.0e-03");
1143     assert("-0.001".formatExponentValue(0) == "-1e-03");
1144     assert("0e+00".formatExponentValue(0) == "0e+00");
1145     assert("0.00e+00".formatExponentValue(0) == "0e+00");
1146     assert("1e+06".formatExponentValue(1) == "1.0e+06");
1147     assert("1e+06".formatExponentValue(2) == "1.00e+06");
1148     assert("1.0001e+06".formatExponentValue(1) == "1.0e+06");
1149     assert("1.0001e+06".formatExponentValue(5) == "1.00010e+06");
1150 }
1151 
1152 /** Returns the number of significant digits in a numeric string.
1153  *
1154  * Significant digits are those needed to represent a number in exponential notation.
1155  * Examples:
1156  *   22.345 - 5 digits
1157  *   10.010 - 4 digits
1158  *   0.0032 - 2 digits
1159  */
1160 size_t significantDigits(const char[] numericString) @safe pure
1161 {
1162     import std.algorithm : canFind, find, findAmong, findSplit, stripRight;
1163     import std.ascii : isDigit;
1164     import std.math : isFinite;
1165     import std.string : isNumeric;
1166     import std.conv : to;
1167     assert (numericString.isNumeric);
1168 
1169     size_t significantDigits = 0;
1170     if (numericString.to!double.isFinite)
1171     {
1172         auto digitsPart = numericString.find!(x => x.isDigit && x != '0');
1173         auto exponentPart = digitsPart.findAmong("eE");
1174         digitsPart = digitsPart[0 .. $ - exponentPart.length];
1175 
1176         if (digitsPart.canFind('.'))
1177         {
1178             digitsPart = digitsPart.stripRight('0');
1179             significantDigits = digitsPart.length - 1;
1180         }
1181         else
1182         {
1183             significantDigits = digitsPart.length;
1184         }
1185 
1186         if (significantDigits == 0) significantDigits = 1;
1187     }
1188 
1189     return significantDigits;
1190 }
1191 
1192 @safe pure unittest
1193 {
1194     assert("0".significantDigits == 1);
1195     assert("10".significantDigits == 2);
1196     assert("0.0".significantDigits == 1);
1197     assert("-10.0".significantDigits == 2);
1198     assert("-.01".significantDigits == 1);
1199     assert("-.5401".significantDigits == 4);
1200     assert("1010.010".significantDigits == 6);
1201     assert("0.0003003".significantDigits == 4);
1202     assert("6e+06".significantDigits == 1);
1203     assert("6.0e+06".significantDigits == 1);
1204     assert("6.5e+06".significantDigits == 2);
1205     assert("6.005e+06".significantDigits == 4);
1206 }
1207 
1208 /** Returns the number of digits to the right of the decimal point in a numeric string.
1209  * This routine includes trailing zeros in the count.
1210  */
1211 size_t precisionDigits(const char[] numericString) @safe pure
1212 {
1213     import std.algorithm : canFind, find, findAmong, findSplit, stripRight;
1214     import std.ascii : isDigit;
1215     import std.math : isFinite;
1216     import std.string : isNumeric;
1217     import std.conv : to;
1218     assert (numericString.isNumeric);
1219 
1220     size_t precisionDigits = 0;
1221     if (numericString.to!double.isFinite)
1222     {
1223         if (auto decimalSplit = numericString.findSplit("."))
1224         {
1225             auto exponentPart = decimalSplit[2].findAmong("eE");
1226             precisionDigits = decimalSplit[2].length - exponentPart.length;
1227         }
1228     }
1229 
1230     return precisionDigits;
1231 }
1232 
1233 @safe pure unittest
1234 {
1235     assert("0".precisionDigits == 0);
1236     assert("10".precisionDigits == 0);
1237     assert("0.0".precisionDigits == 1);
1238     assert("-10.0".precisionDigits == 1);
1239     assert("-.01".precisionDigits == 2);
1240     assert("-.5401".precisionDigits == 4);
1241 }
1242 
1243 /** Calculates the expected print width of a string in monospace (fixed-width) fonts.
1244  */
1245 size_t monospacePrintWidth(const char[] str) @safe nothrow
1246 {
1247     bool isCJK(dchar c)
1248     {
1249         return c >= '\u3000' && c <= '\u9fff';
1250     }
1251 
1252     import std.uni : byGrapheme;
1253 
1254     size_t width = 0;
1255     try foreach (g; str.byGrapheme) width += isCJK(g[0]) ? 2 : 1;
1256     catch (Exception) width = str.length;  // Invalid utf-8 sequence. Catch avoids program failure.
1257 
1258     return width;
1259 }
1260 
1261 unittest
1262 {
1263     assert("".monospacePrintWidth == 0);
1264     assert(" ".monospacePrintWidth == 1);
1265     assert("abc".monospacePrintWidth == 3);
1266     assert("林檎".monospacePrintWidth == 4);
1267     assert("æble".monospacePrintWidth == 4);
1268     assert("ვაშლი".monospacePrintWidth == 5);
1269     assert("größten".monospacePrintWidth == 7);
1270 }