1 /** 2 Command line tool that prints TSV data aligned for easier reading on consoles 3 and traditional command-line environments. 4 5 Copyright (c) 2017-2020, eBay Inc. 6 Initially written by Jon Degenhardt 7 8 License: Boost License 1.0 (http://boost.org/LICENSE_1_0.txt) 9 */ 10 module tsv_utils.tsv_pretty; 11 12 import std.range; 13 import std.stdio; 14 import std.typecons : Flag, Yes, No, tuple; 15 16 static if (__VERSION__ >= 2085) extern(C) __gshared string[] rt_options = [ "gcopt=cleanup:none" ]; 17 18 version(unittest) 19 { 20 // When running unit tests, use main from -main compiler switch. 21 } 22 else 23 { 24 /** Main program. Invokes command line arg processing and tsv-pretty to perform 25 * the real work. Any errors are caught and reported. 26 */ 27 int main(string[] cmdArgs) 28 { 29 /* When running in DMD code coverage mode, turn on report merging. */ 30 version(D_Coverage) version(DigitalMars) 31 { 32 import core.runtime : dmd_coverSetMerge; 33 dmd_coverSetMerge(true); 34 } 35 36 TsvPrettyOptions options; 37 auto r = options.processArgs(cmdArgs); 38 if (!r[0]) return r[1]; 39 try tsvPretty(options, cmdArgs[1 .. $]); 40 catch (Exception exc) 41 { 42 stderr.writefln("Error [%s]: %s", options.programName, exc.msg); 43 return 1; 44 } 45 return 0; 46 } 47 } 48 49 auto helpTextVerbose = q"EOS 50 Synopsis: tsv-pretty [options] [file...] 51 52 tsv-pretty outputs TSV data in a format intended to be more human readable when 53 working on the command line. This is done primarily by lining up data into 54 fixed-width columns. Text is left aligned, numbers are right aligned. Floating 55 points numbers are aligned on the decimal point when feasible. 56 57 Processing begins by reading the initial set of lines into memory to determine 58 the field widths and data types of each column. This look-ahead buffer is used 59 for header detection as well. Output begins after this processing is complete. 60 61 By default, only the alignment is changed, the actual values are not modified. 62 Several of the formatting options do modify the values. 63 64 Features: 65 66 * Floating point numbers: Floats can be printed in fixed-width precision, using 67 the same precision for all floats in a column. This makes then line up nicely. 68 Precision is determined by values seen during look-ahead processing. The max 69 precision defaults to 9, this can be changed when smaller or larger values are 70 desired. See the '--f|format-floats' and '--p|precision' options. 71 72 * Header lines: Headers are detected automatically when possible. This can be 73 overridden when automatic detection doesn't work as desired. Headers can be 74 underlined and repeated at regular intervals. 75 76 * Missing values: A substitute value can be used for empty fields. This is often 77 less confusing than spaces. See '--e|replace-empty' and '--E|empty-replacement'. 78 79 * Exponential notion: As part float formatting, '--f|format-floats' re-formats 80 columns where exponential notation is found so all the values in the column 81 are displayed using exponential notation with the same precision. 82 83 * Preamble: A number of initial lines can be designated as a preamble and output 84 unchanged. The preamble is before the header, if a header is present. Preamble 85 lines can be auto-detected via the heuristic that they lack field delimiters. 86 This works well when the field delimiter is a TAB. 87 88 * Fonts: Fixed-width fonts are assumed. CJK characters are assumed to be double 89 width. This is not always correct, but works well in most cases. 90 91 Options: 92 EOS"; 93 94 auto helpText = q"EOS 95 Synopsis: tsv-pretty [options] [file...] 96 97 tsv-pretty outputs TSV data in a more human readable format. This is done by lining 98 up data into fixed-width columns. Text is left aligned, numbers are right aligned. 99 Floating points numbers are aligned on the decimal point when feasible. 100 101 Options: 102 EOS"; 103 104 /** TsvPrettyOptions is used to process and store command line options. */ 105 struct TsvPrettyOptions 106 { 107 string programName; 108 bool helpVerbose = false; // --help-verbose 109 bool hasHeader = false; // --H|header (Note: Default false assumed by validation code) 110 bool autoDetectHeader = true; // Derived (Note: Default true assumed by validation code) 111 bool noHeader = false; // --x|no-header (Note: Default false assumed by validation code) 112 size_t lookahead = 1000; // --l|lookahead 113 size_t repeatHeader = 0; // --r|repeat-header num (zero means no repeat) 114 bool underlineHeader = false; // --u|underline-header 115 bool formatFloats = false; // --f|format-floats 116 size_t floatPrecision = 9; // --p|precision num (max precision when formatting floats.) 117 bool replaceEmpty = false; // --e|replace-empty 118 string emptyReplacement = ""; // --E|empty-replacement 119 size_t emptyReplacementPrintWidth = 0; // Derived 120 char delim = '\t'; // --d|delimiter 121 size_t spaceBetweenFields = 2; // --s|space-between-fields num 122 size_t maxFieldPrintWidth = 40; // --m|max-text-width num; Max width for variable width text fields. 123 bool autoDetectPreamble = false; // --a|auto-preamble 124 size_t preambleLines = 0; // --b|preamble; Number of preamble lines. 125 bool versionWanted = false; // --V|version 126 127 /* Returns a tuple. First value is true if command line arguments were successfully 128 * processed and execution should continue, or false if an error occurred or the user 129 * asked for help. If false, the second value is the appropriate exit code (0 or 1). 130 * 131 * Returning true (execution continues) means args have been validated and derived 132 * values calculated. In addition, field indices have been converted to zero-based. 133 * If the whole line is the key, the individual fields list will be cleared. 134 */ 135 auto processArgs (ref string[] cmdArgs) 136 { 137 import std.algorithm : any, each; 138 import std.getopt; 139 import std.path : baseName, stripExtension; 140 141 programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name"; 142 143 try 144 { 145 arraySep = ","; // Use comma to separate values in command line options 146 auto r = getopt( 147 cmdArgs, 148 "help-verbose", " Print full help.", &helpVerbose, 149 std.getopt.config.caseSensitive, 150 "H|header", " Treat the first line of each file as a header.", &hasHeader, 151 std.getopt.config.caseInsensitive, 152 "x|no-header", " Assume no header. Turns off automatic header detection.", &noHeader, 153 "l|lookahead", "NUM Lines to read to interpret data before generating output. Default: 1000", &lookahead, 154 155 "r|repeat-header", "NUM Lines to print before repeating the header. Default: No repeating header", &repeatHeader, 156 157 "u|underline-header", " Underline the header.", &underlineHeader, 158 "f|format-floats", " Format floats for better readability. Default: No", &formatFloats, 159 "p|precision", "NUM Max floating point precision. Implies --format-floats. Default: 9", &floatPrecisionOptionHandler, 160 std.getopt.config.caseSensitive, 161 "e|replace-empty", " Replace empty fields with '--'.", &replaceEmpty, 162 "E|empty-replacement", "STR Replace empty fields with a string.", &emptyReplacement, 163 std.getopt.config.caseInsensitive, 164 "d|delimiter", "CHR Field delimiter. Default: TAB. (Single byte UTF-8 characters only.)", &delim, 165 "s|space-between-fields", "NUM Spaces between each field (Default: 2)", &spaceBetweenFields, 166 "m|max-text-width", "NUM Max reserved field width for variable width text fields. Default: 40", &maxFieldPrintWidth, 167 "a|auto-preamble", " Treat initial lines in a file as a preamble if the line contains no field delimiters.", &autoDetectPreamble, 168 "b|preamble", "NUM Treat the first NUM lines as a preamble and output them unchanged.", &preambleLines, 169 std.getopt.config.caseSensitive, 170 "V|version", " Print version information and exit.", &versionWanted, 171 std.getopt.config.caseInsensitive, 172 ); 173 174 if (r.helpWanted) 175 { 176 defaultGetoptPrinter(helpText, r.options); 177 return tuple(false, 0); 178 } 179 else if (helpVerbose) 180 { 181 defaultGetoptPrinter(helpTextVerbose, r.options); 182 return tuple(false, 0); 183 } 184 else if (versionWanted) 185 { 186 import tsv_utils.common.tsvutils_version; 187 writeln(tsvutilsVersionNotice("tsv-pretty")); 188 return tuple(false, 0); 189 } 190 191 /* Validation and derivations. */ 192 if (noHeader && hasHeader) throw new Exception("Cannot specify both --H|header and --x|no-header."); 193 194 if (noHeader || hasHeader) autoDetectHeader = false; 195 196 /* Zero look-ahead has limited utility unless the first line is known to 197 * be a header. Good chance the user will get an unintended behavior. 198 */ 199 if (lookahead == 0 && autoDetectHeader) 200 { 201 assert (!noHeader && !hasHeader); 202 throw new Exception("Cannot auto-detect header with zero look-ahead. Specify either '--H|header' or '--x|no-header' when using '--l|lookahead 0'."); 203 } 204 205 if (autoDetectPreamble && preambleLines != 0) 206 { 207 throw new Exception("Do not use '--b|preamble NUM' and '--a|auto-preamble' together. ('--b|preamble 0' is okay.)"); 208 } 209 210 if (emptyReplacement.length != 0) replaceEmpty = true; 211 else if (replaceEmpty) emptyReplacement = "--"; 212 213 if (emptyReplacement.length != 0) 214 { 215 emptyReplacementPrintWidth = emptyReplacement.monospacePrintWidth; 216 } 217 } 218 catch (Exception exc) 219 { 220 stderr.writefln("[%s] Error processing command line arguments: %s", programName, exc.msg); 221 return tuple(false, 1); 222 } 223 return tuple(true, 0); 224 } 225 226 /* Option handler for --p|precision. It also sets --f|format-floats. */ 227 private void floatPrecisionOptionHandler(string option, string optionVal) @safe pure 228 { 229 import std.conv : to; 230 floatPrecision = optionVal.to!size_t; 231 formatFloats = true; 232 } 233 } 234 235 /** tsvPretty is the main loop, operating on input files and passing control to a 236 * TSVPrettyProccessor instance. 237 * 238 * This separates physical I/O sources and sinks from the underlying processing 239 * algorithm, which operates on generic ranges. A lockingTextWriter is created and 240 * released on every input line. This has effect flushing standard output every line, 241 * desirable in command line tools. 242 * 243 * This routine also handles identification of preamble lines. This is mostly for 244 * simplification of the TsvPrettyProcessor code. 245 */ 246 void tsvPretty(const ref TsvPrettyOptions options, const string[] files) 247 { 248 import std.algorithm : canFind; 249 250 auto firstNonPreambleLine = options.preambleLines + 1; 251 auto tpp = TsvPrettyProcessor(options); 252 foreach (filename; (files.length > 0) ? files : ["-"]) 253 { 254 bool autoDetectPreambleDone = false; 255 auto inputStream = (filename == "-") ? stdin : filename.File(); 256 foreach (lineNum, line; inputStream.byLine.enumerate(1)) 257 { 258 bool isPreambleLine = false; 259 bool isFirstNonPreambleLine = false; 260 261 if (options.autoDetectPreamble) 262 { 263 if (!autoDetectPreambleDone) 264 { 265 if (line.canFind(options.delim)) 266 { 267 autoDetectPreambleDone = true; 268 isFirstNonPreambleLine = true; 269 } 270 else 271 { 272 isPreambleLine = true; 273 } 274 } 275 } 276 else if (lineNum < firstNonPreambleLine) 277 { 278 isPreambleLine = true; 279 } 280 else if (lineNum == firstNonPreambleLine) 281 { 282 isFirstNonPreambleLine = true; 283 } 284 285 286 if (isPreambleLine) 287 { 288 tpp.processPreambleLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line); 289 } 290 else if (isFirstNonPreambleLine) 291 { 292 tpp.processFileFirstLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line); 293 } 294 else 295 { 296 tpp.processLine(outputRangeObject!(char, char[])(stdout.lockingTextWriter), line); 297 } 298 } 299 } 300 tpp.finish(outputRangeObject!(char, char[])(stdout.lockingTextWriter)); 301 } 302 303 /** TsvPrettyProcessor maintains state of processing and exposes operations for 304 * processing individual input lines. 305 * 306 * TsvPrettyProcessor knows that input is file-based, but doesn't deal with actual 307 * files or reading lines from input. That is the job of the caller. Output is 308 * written to an output range. The caller is expected to pass each line to in the 309 * order received, that is an assumption built-into the its processing. 310 * 311 * In addition to the constructor, there are four API methods: 312 * - processPreambleLine - Called to process a preamble line occurring before 313 * the header line or first line of data. 314 * - processFileFirstLine - Called to process the first line of each file. This 315 * enables header processing. 316 * - processLine - Called to process all lines except for the first line a file. 317 * - finish - Called at the end of all processing. This is needed in case the 318 * look-ahead cache is still being filled when input terminates. 319 */ 320 321 struct TsvPrettyProcessor 322 { 323 import std.array : appender; 324 325 private: 326 private enum AutoDetectHeaderResult { none, hasHeader, noHeader }; 327 328 private TsvPrettyOptions _options; 329 private size_t _fileCount = 0; 330 private size_t _dataLineOutputCount = 0; 331 private bool _stillCaching = true; 332 private string _candidateHeaderLine; 333 private auto _lookaheadCache = appender!(string[])(); 334 private FieldFormat[] _fieldVector; 335 private AutoDetectHeaderResult _autoDetectHeaderResult = AutoDetectHeaderResult.none; 336 337 /** Constructor. */ 338 this(const TsvPrettyOptions options) @safe pure nothrow @nogc 339 { 340 _options = options; 341 if (options.noHeader && options.lookahead == 0) _stillCaching = false; 342 } 343 344 invariant 345 { 346 assert(_options.hasHeader || _options.noHeader || _options.autoDetectHeader); 347 assert((_options.lookahead == 0 && _lookaheadCache.data.length == 0) || 348 _lookaheadCache.data.length < _options.lookahead); 349 } 350 351 /** Called to process a preamble line occurring before the header line or first 352 * line of data. 353 */ 354 void processPreambleLine(OutputRange!char outputStream, const char[] line) 355 { 356 if (_fileCount == 0) 357 { 358 put(outputStream, line); 359 put(outputStream, '\n'); 360 } 361 } 362 363 /** Called to process the first line of each file. This enables header processing. */ 364 void processFileFirstLine(OutputRange!char outputStream, const char[] line) 365 { 366 import std.conv : to; 367 368 _fileCount++; 369 370 if (_options.noHeader) 371 { 372 processLine(outputStream, line); 373 } 374 else if (_options.hasHeader) 375 { 376 if (_fileCount == 1) 377 { 378 setHeaderLine(line); 379 if (_options.lookahead == 0) outputLookaheadCache(outputStream); 380 } 381 } 382 else 383 { 384 assert(_options.autoDetectHeader); 385 386 final switch (_autoDetectHeaderResult) 387 { 388 case AutoDetectHeaderResult.noHeader: 389 assert(_fileCount > 1); 390 processLine(outputStream, line); 391 break; 392 393 case AutoDetectHeaderResult.hasHeader: 394 assert(_fileCount > 1); 395 break; 396 397 case AutoDetectHeaderResult.none: 398 if (_fileCount == 1) 399 { 400 assert(_candidateHeaderLine.length == 0); 401 _candidateHeaderLine = line.to!string; 402 } 403 else if (_fileCount == 2) 404 { 405 if (_candidateHeaderLine == line) 406 { 407 _autoDetectHeaderResult = AutoDetectHeaderResult.hasHeader; 408 setHeaderLine(_candidateHeaderLine); 409 410 /* Edge case: First file has only a header line and look-ahead set to zero. */ 411 if (_stillCaching && _options.lookahead == 0) outputLookaheadCache(outputStream); 412 } 413 else 414 { 415 _autoDetectHeaderResult = AutoDetectHeaderResult.noHeader; 416 updateFieldFormatsForLine(_candidateHeaderLine); 417 processLine(outputStream, line); 418 } 419 } 420 break; 421 } 422 } 423 } 424 425 /** Called to process all lines except for the first line a file. */ 426 void processLine(OutputRange!char outputStream, const char[] line) 427 { 428 if (_stillCaching) cacheDataLine(outputStream, line); 429 else outputDataLine(outputStream, line); 430 } 431 432 /** Called at the end of all processing. This is needed in case the look-ahead cache 433 * is still being filled when input terminates. 434 */ 435 void finish(OutputRange!char outputStream) 436 { 437 if (_stillCaching) outputLookaheadCache(outputStream); 438 } 439 440 private: 441 /* outputLookaheadCache finalizes processing of the lookahead cache. This includes 442 * Setting the type and width of each field, finalizing the auto-detect header 443 * decision, and outputing all lines in the cache. 444 */ 445 void outputLookaheadCache(OutputRange!char outputStream) 446 { 447 import std.algorithm : splitter; 448 449 assert(_stillCaching); 450 451 if (_options.autoDetectHeader && 452 _autoDetectHeaderResult == AutoDetectHeaderResult.none && 453 _candidateHeaderLine.length != 0) 454 { 455 if (candidateHeaderLooksLikeHeader()) 456 { 457 _autoDetectHeaderResult = AutoDetectHeaderResult.hasHeader; 458 setHeaderLine(_candidateHeaderLine); 459 } 460 else 461 { 462 _autoDetectHeaderResult = AutoDetectHeaderResult.noHeader; 463 } 464 } 465 466 467 if (_options.hasHeader || 468 (_options.autoDetectHeader && _autoDetectHeaderResult == AutoDetectHeaderResult.hasHeader)) 469 { 470 finalizeFieldFormatting(); 471 outputHeader(outputStream); 472 } 473 else if (_options.autoDetectHeader && _autoDetectHeaderResult == AutoDetectHeaderResult.noHeader && 474 _candidateHeaderLine.length != 0) 475 { 476 updateFieldFormatsForLine(_candidateHeaderLine); 477 finalizeFieldFormatting(); 478 outputDataLine(outputStream, _candidateHeaderLine); 479 } 480 else 481 { 482 finalizeFieldFormatting(); 483 } 484 485 foreach(line; _lookaheadCache.data) outputDataLine(outputStream, line); 486 _lookaheadCache.clear; 487 _stillCaching = false; 488 } 489 490 bool candidateHeaderLooksLikeHeader() @safe 491 { 492 import std.algorithm : splitter; 493 494 /* The candidate header is declared as the header if the look-ahead cache has at least 495 * one numeric field that is text in the candidate header. 496 */ 497 foreach(fieldIndex, fieldValue; _candidateHeaderLine.splitter(_options.delim).enumerate) 498 { 499 auto candidateFieldFormat = FieldFormat(fieldIndex); 500 candidateFieldFormat.updateForFieldValue(fieldValue, _options); 501 if (_fieldVector.length > fieldIndex && 502 candidateFieldFormat.fieldType == FieldType.text && 503 (_fieldVector[fieldIndex].fieldType == FieldType.integer || 504 _fieldVector[fieldIndex].fieldType == FieldType.floatingPoint || 505 _fieldVector[fieldIndex].fieldType == FieldType.exponent)) 506 { 507 return true; 508 } 509 } 510 511 return false; 512 } 513 514 void setHeaderLine(const char[] line) @safe 515 { 516 import std.algorithm : splitter; 517 518 foreach(fieldIndex, header; line.splitter(_options.delim).enumerate) 519 { 520 if (_fieldVector.length == fieldIndex) _fieldVector ~= FieldFormat(fieldIndex); 521 assert(_fieldVector.length > fieldIndex); 522 _fieldVector[fieldIndex].setHeader(header); 523 } 524 } 525 526 void cacheDataLine(OutputRange!char outputStream, const char[] line) 527 { 528 import std.conv : to; 529 530 assert(_lookaheadCache.data.length < _options.lookahead); 531 532 _lookaheadCache ~= line.to!string; 533 updateFieldFormatsForLine(line); 534 if (_lookaheadCache.data.length == _options.lookahead) outputLookaheadCache(outputStream); 535 } 536 537 void updateFieldFormatsForLine(const char[] line) @safe 538 { 539 import std.algorithm : splitter; 540 541 foreach(fieldIndex, fieldValue; line.splitter(_options.delim).enumerate) 542 { 543 if (_fieldVector.length == fieldIndex) _fieldVector ~= FieldFormat(fieldIndex); 544 assert(_fieldVector.length > fieldIndex); 545 _fieldVector[fieldIndex].updateForFieldValue(fieldValue, _options); 546 } 547 548 } 549 550 void finalizeFieldFormatting() @safe pure @nogc nothrow 551 { 552 size_t nextFieldStart = 0; 553 foreach(ref field; _fieldVector) 554 { 555 nextFieldStart = field.finalizeFormatting(nextFieldStart, _options) + _options.spaceBetweenFields; 556 } 557 } 558 559 void outputHeader(OutputRange!char outputStream) 560 { 561 size_t nextOutputPosition = 0; 562 foreach(fieldIndex, ref field; _fieldVector.enumerate) 563 { 564 size_t spacesNeeded = field.startPosition - nextOutputPosition; 565 put(outputStream, repeat(" ", spacesNeeded)); 566 nextOutputPosition += spacesNeeded; 567 nextOutputPosition += field.writeHeader(outputStream, _options); 568 } 569 put(outputStream, '\n'); 570 571 if (_options.underlineHeader) 572 { 573 nextOutputPosition = 0; 574 foreach(fieldIndex, ref field; _fieldVector.enumerate) 575 { 576 size_t spacesNeeded = field.startPosition - nextOutputPosition; 577 put(outputStream, repeat(" ", spacesNeeded)); 578 nextOutputPosition += spacesNeeded; 579 nextOutputPosition += field.writeHeader!(Yes.writeUnderline)(outputStream, _options); 580 } 581 put(outputStream, '\n'); 582 } 583 } 584 585 void outputDataLine(OutputRange!char outputStream, const char[] line) 586 { 587 import std.algorithm : splitter; 588 589 /* Repeating header option. */ 590 if (_options.repeatHeader != 0 && _dataLineOutputCount != 0 && 591 (_options.hasHeader || (_options.autoDetectHeader && 592 _autoDetectHeaderResult == AutoDetectHeaderResult.hasHeader)) && 593 _dataLineOutputCount % _options.repeatHeader == 0) 594 { 595 put(outputStream, '\n'); 596 outputHeader(outputStream); 597 } 598 599 _dataLineOutputCount++; 600 601 size_t nextOutputPosition = 0; 602 foreach(fieldIndex, fieldValue; line.splitter(_options.delim).enumerate) 603 { 604 if (fieldIndex == _fieldVector.length) 605 { 606 /* Line is longer than any seen while caching. Add a new FieldFormat entry 607 * and set the line formatting based on this field value. 608 */ 609 _fieldVector ~= FieldFormat(fieldIndex); 610 size_t startPosition = (fieldIndex == 0) ? 611 0 : 612 _fieldVector[fieldIndex - 1].endPosition + _options.spaceBetweenFields; 613 614 _fieldVector[fieldIndex].updateForFieldValue(fieldValue, _options); 615 _fieldVector[fieldIndex].finalizeFormatting(startPosition, _options); 616 } 617 618 assert(fieldIndex < _fieldVector.length); 619 620 FieldFormat fieldFormat = _fieldVector[fieldIndex]; 621 size_t nextFieldStart = fieldFormat.startPosition; 622 size_t spacesNeeded = (nextOutputPosition < nextFieldStart) ? 623 nextFieldStart - nextOutputPosition : 624 (fieldIndex == 0) ? 0 : 1; // Previous field went long. One space between fields 625 626 put(outputStream, repeat(" ", spacesNeeded)); 627 nextOutputPosition += spacesNeeded; 628 nextOutputPosition += fieldFormat.writeFieldValue(outputStream, nextOutputPosition, fieldValue, _options); 629 } 630 put(outputStream, '\n'); 631 } 632 } 633 634 /** Field types recognized and tracked by tsv-pretty processing. */ 635 enum FieldType { unknown, text, integer, floatingPoint, exponent }; 636 637 /** Field alignments used by tsv-pretty processing. */ 638 enum FieldAlignment { left, right }; 639 640 /** FieldFormat holds all the formatting info needed to format data values in a specific 641 * column. e.g. Field 1 may be text, field 2 may be a float, etc. This is calculated 642 * during the caching phase. Each FieldFormat instance is part of a vector representing 643 * the full row, so each includes the start position on the line and similar data. 644 * 645 * APIs used during the caching phase to gather field value samples 646 * - this - Initial construction. Takes the field index. 647 * - setHeader - Used to set the header text. 648 * - updateForFieldValue - Used to add the next field value sample. 649 * - finalizeFormatting - Used at the end of caching to finalize the format choices. 650 * 651 * APIs used after caching is finished (after finalizeFormatting): 652 * - startPosition - Returns the expected start position for the field. 653 * - endPosition - Returns the expected end position for the field. 654 * - writeHeader - Outputs the header, properly aligned. 655 * - writeFieldValue - Outputs the current field value, properly aligned. 656 */ 657 658 struct FieldFormat 659 { 660 private: 661 size_t _fieldIndex; // Zero-based index in the line 662 string _header = ""; // Original field header 663 size_t _headerPrintWidth = 0; 664 FieldType _type = FieldType.unknown; 665 FieldAlignment _alignment = FieldAlignment.left; 666 size_t _startPosition = 0; 667 size_t _printWidth = 0; 668 size_t _precision = 0; // Number of digits after the decimal point 669 670 /* These are used while doing initial type and print format detection. */ 671 size_t _minRawPrintWidth = 0; 672 size_t _maxRawPrintWidth = 0; 673 size_t _maxDigitsBeforeDecimal = 0; 674 size_t _maxDigitsAfterDecimal = 0; 675 size_t _maxSignificantDigits = 0; // Digits to include in exponential notation 676 677 public: 678 679 /** Initial construction. Takes a field index. */ 680 this(size_t fieldIndex) @safe pure nothrow @nogc 681 { 682 _fieldIndex = fieldIndex; 683 } 684 685 /** Sets the header text. */ 686 void setHeader(const char[] header) @safe 687 { 688 import std.conv : to; 689 690 _header = header.to!string; 691 _headerPrintWidth = _header.monospacePrintWidth; 692 } 693 694 /** Returns the expected start position for the field. */ 695 size_t startPosition() nothrow pure @safe @property 696 { 697 return _startPosition; 698 } 699 700 /** Returns the expected end position for the field. */ 701 size_t endPosition() nothrow pure @safe @property 702 { 703 return _startPosition + _printWidth; 704 } 705 706 /** Returns the type of field. */ 707 FieldType fieldType() nothrow pure @safe @property 708 { 709 return _type; 710 } 711 712 /** Writes the field header or underline characters to the output stream. 713 * 714 * The current output position should have been written up to the field's start position, 715 * including any spaces between fields. Unlike data fields, there is no need to correct 716 * for previous fields that have run long. This routine does not output trailing spaces. 717 * This makes it simpler for lines to avoid unnecessary trailing spaces. 718 * 719 * Underlines can either be written the full width of the field or the just under the 720 * text of the header. At present this is a template parameter (compile-time). 721 * 722 * The print width of the output is returned. 723 */ 724 size_t writeHeader (Flag!"writeUnderline" writeUnderline = No.writeUnderline, 725 Flag!"fullWidthUnderline" fullWidthUnderline = No.fullWidthUnderline) 726 (OutputRange!char outputStream, const ref TsvPrettyOptions options) 727 { 728 import std.range : repeat; 729 730 size_t positionsWritten = 0; 731 if (_headerPrintWidth > 0) 732 { 733 static if (writeUnderline) 734 { 735 static if (fullWidthUnderline) 736 { 737 put(outputStream, repeat("-", _printWidth)); 738 positionsWritten += _printWidth; 739 } 740 else // Underline beneath the header text only 741 { 742 if (_alignment == FieldAlignment.right) 743 { 744 put(outputStream, repeat(" ", _printWidth - _headerPrintWidth)); 745 positionsWritten += _printWidth - _headerPrintWidth; 746 } 747 put(outputStream, repeat("-", _headerPrintWidth)); 748 positionsWritten += _headerPrintWidth; 749 } 750 } 751 else 752 { 753 if (_alignment == FieldAlignment.right) 754 { 755 put(outputStream, repeat(" ", _printWidth - _headerPrintWidth)); 756 positionsWritten += _printWidth - _headerPrintWidth; 757 } 758 put(outputStream, _header); 759 positionsWritten += _headerPrintWidth; 760 } 761 } 762 return positionsWritten; 763 } 764 765 /** Writes the field value for the current column. 766 * 767 * The caller needs to generate output at least to the column's start position, but 768 * can go beyond if previous fields have run long. 769 * 770 * The field value is aligned properly in the field. Either left aligned (text) or 771 * right aligned (numeric). Floating point fields are both right aligned and 772 * decimal point aligned. The number of bytes written is returned. Trailing spaces 773 * are not added, the caller must add any necessary trailing spaces prior to 774 * printing the next field. 775 */ 776 size_t writeFieldValue(OutputRange!char outputStream, size_t currPosition, 777 const char[] fieldValue, in ref TsvPrettyOptions options) 778 in 779 { 780 assert(currPosition >= _startPosition); // Caller resposible for advancing to field start position. 781 assert(_type == FieldType.text || _type == FieldType.integer || 782 _type == FieldType.floatingPoint || _type == FieldType.exponent); 783 } 784 do 785 { 786 import std.algorithm : find, max, min; 787 import std.conv : to, ConvException; 788 import std.format : format; 789 790 /* Create the print version of the string. Either the raw value or a formatted 791 * version of a float. 792 */ 793 string printValue; 794 if (!options.formatFloats || _type == FieldType.text || _type == FieldType.integer) 795 { 796 printValue = fieldValue.to!string; 797 } 798 else 799 { 800 assert(options.formatFloats); 801 assert(_type == FieldType.exponent || _type == FieldType.floatingPoint); 802 803 if (_type == FieldType.exponent) 804 { 805 printValue = fieldValue.formatExponentValue(_precision); 806 } 807 else 808 { 809 printValue = fieldValue.formatFloatingPointValue(_precision); 810 } 811 } 812 813 if (printValue.length == 0 && options.replaceEmpty) printValue = options.emptyReplacement; 814 size_t printValuePrintWidth = printValue.monospacePrintWidth; 815 816 /* Calculate leading spaces needed for right alignment. */ 817 size_t leadingSpaces = 0; 818 if (_alignment == FieldAlignment.right) 819 { 820 /* Target width adjusts the column width to account for overrun by the previous field. */ 821 size_t targetWidth; 822 if (currPosition == _startPosition) 823 { 824 targetWidth = _printWidth; 825 } 826 else 827 { 828 size_t startGap = currPosition - _startPosition; 829 targetWidth = max(printValuePrintWidth, 830 startGap < _printWidth ? _printWidth - startGap : 0); 831 } 832 833 leadingSpaces = (printValuePrintWidth < targetWidth) ? 834 targetWidth - printValuePrintWidth : 0; 835 836 /* The above calculation assumes the print value is fully right aligned. 837 * This is not correct when raw value floats are being used rather than 838 * formatted floats, as different values will have different precision. 839 * The next adjustment accounts for this, dropping leading spaces as 840 * needed to align the decimal point. Note that text and exponential 841 * values get aligned strictly against right boundaries. 842 */ 843 if (leadingSpaces > 0 && _precision > 0 && 844 _type == FieldType.floatingPoint && !options.formatFloats) 845 { 846 import std.algorithm : canFind, findSplit; 847 import std.string : isNumeric; 848 849 if (printValue.isNumeric && !printValue.canFind!(x => x == 'e' || x == 'E')) 850 { 851 size_t decimalAndDigitsLength = printValue.find(".").length; 852 size_t trailingSpaces = 853 (decimalAndDigitsLength == 0) ? _precision + 1 : 854 (decimalAndDigitsLength > _precision) ? 0 : 855 _precision + 1 - decimalAndDigitsLength; 856 857 leadingSpaces = (leadingSpaces > trailingSpaces) ? 858 leadingSpaces - trailingSpaces : 0; 859 } 860 } 861 } 862 put(outputStream, repeat(' ', leadingSpaces)); 863 put(outputStream, printValue); 864 return printValuePrintWidth + leadingSpaces; 865 } 866 867 /** Updates type and format given a new field value. 868 * 869 * This is called during look-ahead caching to register a new sample value for the 870 * column. The key components updates are field type and print width. 871 */ 872 void updateForFieldValue(const char[] fieldValue, const ref TsvPrettyOptions options) @safe 873 { 874 import std.algorithm : findAmong, findSplit, max, min; 875 import std.conv : to, ConvException; 876 import std.string : isNumeric; 877 878 size_t fieldValuePrintWidth = fieldValue.monospacePrintWidth; 879 size_t fieldValuePrintWidthWithEmpty = 880 (fieldValuePrintWidth == 0 && options.replaceEmpty) ? 881 options.emptyReplacementPrintWidth : 882 fieldValuePrintWidth; 883 884 _maxRawPrintWidth = max(_maxRawPrintWidth, fieldValuePrintWidthWithEmpty); 885 _minRawPrintWidth = (_minRawPrintWidth == 0) ? 886 fieldValuePrintWidthWithEmpty : 887 min(_minRawPrintWidth, fieldValuePrintWidthWithEmpty); 888 889 if (_type == FieldType.text) 890 { 891 /* Already text, can't become anything else. */ 892 } 893 else if (fieldValuePrintWidth == 0) 894 { 895 /* Don't let an empty field override a numeric field type. */ 896 } 897 else if (!fieldValue.isNumeric) 898 { 899 /* Not parsable as a number. Switch from unknown or numeric type to text. */ 900 _type = FieldType.text; 901 } 902 else 903 { 904 /* Field type is currently unknown or numeric, and current field parses as numeric. 905 * See if it parses as integer or float. Integers will parse as floats, so try 906 * integer types first. 907 */ 908 FieldType parsesAs = FieldType.unknown; 909 long longValue; 910 ulong ulongValue; 911 double doubleValue; 912 try 913 { 914 longValue = fieldValue.to!long; 915 parsesAs = FieldType.integer; 916 } 917 catch (ConvException) 918 { 919 try 920 { 921 ulongValue = fieldValue.to!ulong; 922 parsesAs = FieldType.integer; 923 } 924 catch (ConvException) 925 { 926 try 927 { 928 doubleValue = fieldValue.to!double; 929 import std.algorithm : findAmong; 930 parsesAs = (fieldValue.findAmong("eE").length == 0) ? 931 FieldType.floatingPoint : FieldType.exponent; 932 } 933 catch (ConvException) 934 { 935 /* Note: This means isNumeric thinks it's a number, but conversions all failed. */ 936 parsesAs = FieldType.text; 937 } 938 } 939 } 940 941 if (parsesAs == FieldType.text) 942 { 943 /* Not parsable as a number (despite isNumeric result). Switch to text type. */ 944 _type = FieldType.text; 945 } 946 else if (parsesAs == FieldType.exponent) 947 { 948 /* Exponential notion supersedes both vanilla floats and integers. */ 949 _type = FieldType.exponent; 950 _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits); 951 952 if (auto decimalSplit = fieldValue.findSplit(".")) 953 { 954 auto fromExponent = decimalSplit[2].findAmong("eE"); 955 size_t numDigitsAfterDecimal = decimalSplit[2].length - fromExponent.length; 956 _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, decimalSplit[0].length); 957 _maxDigitsAfterDecimal = max(_maxDigitsAfterDecimal, numDigitsAfterDecimal); 958 } 959 else 960 { 961 /* Exponent without a decimal point. */ 962 auto fromExponent = fieldValue.findAmong("eE"); 963 assert(fromExponent.length > 0); 964 size_t numDigits = fieldValue.length - fromExponent.length; 965 _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, numDigits); 966 } 967 } 968 else if (parsesAs == FieldType.floatingPoint) 969 { 970 /* Floating point supercedes integer but not exponential. */ 971 if (_type != FieldType.exponent) _type = FieldType.floatingPoint; 972 _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits); 973 974 if (auto decimalSplit = fieldValue.findSplit(".")) 975 { 976 _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, decimalSplit[0].length); 977 _maxDigitsAfterDecimal = max(_maxDigitsAfterDecimal, decimalSplit[2].length); 978 } 979 } 980 else 981 { 982 assert(parsesAs == FieldType.integer); 983 if (_type != FieldType.floatingPoint) _type = FieldType.integer; 984 _maxSignificantDigits = max(_maxSignificantDigits, fieldValue.significantDigits); 985 _maxDigitsBeforeDecimal = max(_maxDigitsBeforeDecimal, fieldValue.length); 986 } 987 } 988 } 989 990 /** Updates field formatting info based on the current state. It is expected to be 991 * called after adding field entries via updateForFieldValue(). It returns its new 992 * end position. 993 */ 994 size_t finalizeFormatting (size_t startPosition, const ref TsvPrettyOptions options) @safe pure @nogc nothrow 995 { 996 import std.algorithm : max, min; 997 _startPosition = startPosition; 998 if (_type == FieldType.unknown) _type = FieldType.text; 999 _alignment = (_type == FieldType.integer || _type == FieldType.floatingPoint 1000 || _type == FieldType.exponent) ? 1001 FieldAlignment.right : 1002 FieldAlignment.left; 1003 1004 if (_type == FieldType.floatingPoint) 1005 { 1006 size_t precision = min(options.floatPrecision, _maxDigitsAfterDecimal); 1007 size_t maxValueWidth = _maxDigitsBeforeDecimal + precision; 1008 if (precision > 0) maxValueWidth++; // Account for the decimal point. 1009 _printWidth = max(1, _headerPrintWidth, maxValueWidth); 1010 _precision = precision; 1011 } 1012 else if (_type == FieldType.exponent) 1013 { 1014 size_t maxPrecision = (_maxSignificantDigits > 0) ? _maxSignificantDigits - 1 : 0; 1015 _precision = min(options.floatPrecision, maxPrecision); 1016 1017 size_t maxValuePrintWidth = !options.formatFloats ? _maxRawPrintWidth : _precision + 7; 1018 _printWidth = max(1, _headerPrintWidth, maxValuePrintWidth); 1019 } 1020 else if (_type == FieldType.integer) 1021 { 1022 _printWidth = max(1, _headerPrintWidth, _minRawPrintWidth, _maxRawPrintWidth); 1023 _precision = 0; 1024 } 1025 else 1026 { 1027 _printWidth = max(1, _headerPrintWidth, _minRawPrintWidth, 1028 min(options.maxFieldPrintWidth, _maxRawPrintWidth)); 1029 _precision = 0; 1030 } 1031 1032 return _startPosition + _printWidth; 1033 } 1034 } 1035 1036 /** formatFloatingPointValue returns the printed representation of a raw value 1037 * formatted as a fixed precision floating number. This includes zero padding or 1038 * truncation of trailing digits as necessary to meet the desired precision. 1039 * 1040 * If the value cannot be interpreted as a double then the raw value is returned. 1041 * Similarly, values in exponential notion are returned without reformatting. 1042 * 1043 * This routine is used to format values in columns identified as floating point. 1044 */ 1045 string formatFloatingPointValue(const char[] value, size_t precision) @safe 1046 { 1047 import std.algorithm : canFind, find; 1048 import std.array : join; 1049 import std.conv : to, ConvException; 1050 import std.format : format; 1051 import std.math : isFinite; 1052 import std.range : repeat; 1053 1054 string printValue; 1055 1056 if (value.canFind!(x => x == 'e' || x == 'E')) 1057 { 1058 /* Exponential notion. Use the raw value. */ 1059 printValue = value.to!string; 1060 } 1061 else 1062 { 1063 try 1064 { 1065 double doubleValue = value.to!double; 1066 if (doubleValue.isFinite) 1067 { 1068 size_t numPrecisionDigits = value.precisionDigits; 1069 if (numPrecisionDigits >= precision) 1070 { 1071 printValue = format("%.*f", precision, doubleValue); 1072 } 1073 else if (numPrecisionDigits == 0) 1074 { 1075 printValue = format("%.*f", numPrecisionDigits, doubleValue) ~ "." ~ repeat("0", precision).join; 1076 } 1077 else 1078 { 1079 printValue = format("%.*f", numPrecisionDigits, doubleValue) ~ repeat("0", precision - numPrecisionDigits).join; 1080 } 1081 } 1082 else printValue = value.to!string; // NaN or Infinity 1083 } 1084 catch (ConvException) printValue = value.to!string; 1085 } 1086 return printValue; 1087 } 1088 1089 @safe unittest 1090 { 1091 assert("".formatFloatingPointValue(3) == ""); 1092 assert(" ".formatFloatingPointValue(3) == " "); 1093 assert("abc".formatFloatingPointValue(3) == "abc"); 1094 assert("nan".formatFloatingPointValue(3) == "nan"); 1095 assert("0".formatFloatingPointValue(0) == "0"); 1096 assert("1".formatFloatingPointValue(0) == "1"); 1097 assert("1.".formatFloatingPointValue(0) == "1"); 1098 assert("1".formatFloatingPointValue(3) == "1.000"); 1099 assert("1000".formatFloatingPointValue(3) == "1000.000"); 1100 assert("1000.001".formatFloatingPointValue(5) == "1000.00100"); 1101 assert("1000.001".formatFloatingPointValue(3) == "1000.001"); 1102 assert("1000.001".formatFloatingPointValue(2) == "1000.00"); 1103 assert("1000.006".formatFloatingPointValue(2) == "1000.01"); 1104 assert("-0.1".formatFloatingPointValue(1) == "-0.1"); 1105 assert("-0.1".formatFloatingPointValue(3) == "-0.100"); 1106 assert("-0.001".formatFloatingPointValue(3) == "-0.001"); 1107 assert("-0.006".formatFloatingPointValue(2) == "-0.01"); 1108 assert("-0.001".formatFloatingPointValue(1) == "-0.0"); 1109 assert("-0.001".formatFloatingPointValue(0) == "-0"); 1110 assert("0e+00".formatFloatingPointValue(0) == "0e+00"); 1111 assert("0.00e+00".formatFloatingPointValue(0) == "0.00e+00"); 1112 assert("1e+06".formatFloatingPointValue(1) == "1e+06"); 1113 assert("1e+06".formatFloatingPointValue(2) == "1e+06"); 1114 assert("1E-06".formatFloatingPointValue(1) == "1E-06"); 1115 assert("1.1E+6".formatFloatingPointValue(2) == "1.1E+6"); 1116 assert("1.1E+100".formatFloatingPointValue(2) == "1.1E+100"); 1117 } 1118 1119 /** formatExponentValue returns the printed representation of a raw value formatted 1120 * using exponential notation and a specific precision. If the value cannot be interpreted 1121 * as a double then the a copy of the original value is returned. 1122 * 1123 * This routine is used to format values in columns identified as having exponent format. 1124 */ 1125 string formatExponentValue(const char[] value, size_t precision) @safe 1126 { 1127 import std.algorithm : canFind, find, findSplit; 1128 import std.array : join; 1129 import std.conv : to, ConvException; 1130 import std.format : format; 1131 import std.math : isFinite; 1132 import std.range : repeat; 1133 1134 string printValue; 1135 try 1136 { 1137 double doubleValue = value.to!double; 1138 if (doubleValue.isFinite) 1139 { 1140 size_t numSignificantDigits = value.significantDigits; 1141 size_t numPrecisionDigits = (numSignificantDigits == 0) ? 0 : numSignificantDigits - 1; 1142 if (numPrecisionDigits >= precision) 1143 { 1144 printValue = format("%.*e", precision, doubleValue); 1145 } 1146 else 1147 { 1148 string unpaddedPrintValue = format("%.*e", numPrecisionDigits, doubleValue); 1149 auto exponentSplit = unpaddedPrintValue.findSplit("e"); // Uses the same exponent case as format call. 1150 if (numPrecisionDigits == 0) 1151 { 1152 assert(precision != 0); 1153 assert(!exponentSplit[0].canFind(".")); 1154 printValue = exponentSplit[0] ~ "." ~ repeat("0", precision).join ~ exponentSplit[1] ~ exponentSplit[2]; 1155 } 1156 else 1157 { 1158 printValue = exponentSplit[0] ~ repeat("0", precision - numPrecisionDigits).join ~ exponentSplit[1] ~ exponentSplit[2]; 1159 } 1160 } 1161 } 1162 else printValue = value.to!string; // NaN or Infinity 1163 } 1164 catch (ConvException) printValue = value.to!string; 1165 1166 return printValue; 1167 } 1168 1169 @safe unittest 1170 { 1171 assert("".formatExponentValue(3) == ""); 1172 assert(" ".formatExponentValue(3) == " "); 1173 assert("abc".formatExponentValue(3) == "abc"); 1174 assert("nan".formatExponentValue(3) == "nan"); 1175 assert("0".formatExponentValue(0) == "0e+00"); 1176 assert("1".formatExponentValue(0) == "1e+00"); 1177 assert("1.".formatExponentValue(0) == "1e+00"); 1178 assert("1".formatExponentValue(3) == "1.000e+00"); 1179 assert("1000".formatExponentValue(3) == "1.000e+03"); 1180 assert("1000.001".formatExponentValue(5) == "1.00000e+03"); 1181 assert("1000.001".formatExponentValue(3) == "1.000e+03"); 1182 assert("1000.001".formatExponentValue(6) == "1.000001e+03"); 1183 assert("1000.006".formatExponentValue(5) == "1.00001e+03"); 1184 assert("-0.1".formatExponentValue(1) == "-1.0e-01"); 1185 assert("-0.1".formatExponentValue(3) == "-1.000e-01"); 1186 assert("-0.001".formatExponentValue(3) == "-1.000e-03"); 1187 assert("-0.001".formatExponentValue(1) == "-1.0e-03"); 1188 assert("-0.001".formatExponentValue(0) == "-1e-03"); 1189 assert("0e+00".formatExponentValue(0) == "0e+00"); 1190 assert("0.00e+00".formatExponentValue(0) == "0e+00"); 1191 assert("1e+06".formatExponentValue(1) == "1.0e+06"); 1192 assert("1e+06".formatExponentValue(2) == "1.00e+06"); 1193 assert("1.0001e+06".formatExponentValue(1) == "1.0e+06"); 1194 assert("1.0001e+06".formatExponentValue(5) == "1.00010e+06"); 1195 } 1196 1197 /** Returns the number of significant digits in a numeric string. 1198 * 1199 * Significant digits are those needed to represent a number in exponential notation. 1200 * Examples: 1201 * 22.345 - 5 digits 1202 * 10.010 - 4 digits 1203 * 0.0032 - 2 digits 1204 */ 1205 size_t significantDigits(const char[] numericString) @safe pure 1206 { 1207 import std.algorithm : canFind, find, findAmong, findSplit, stripRight; 1208 import std.ascii : isDigit; 1209 import std.math : isFinite; 1210 import std.string : isNumeric; 1211 import std.conv : to; 1212 1213 assert (numericString.isNumeric); 1214 1215 size_t significantDigits = 0; 1216 if (numericString.to!double.isFinite) 1217 { 1218 auto digitsPart = numericString.find!(x => x.isDigit && x != '0'); 1219 auto exponentPart = digitsPart.findAmong("eE"); 1220 digitsPart = digitsPart[0 .. $ - exponentPart.length]; 1221 1222 if (digitsPart.canFind('.')) 1223 { 1224 digitsPart = digitsPart.stripRight('0'); 1225 significantDigits = digitsPart.length - 1; 1226 } 1227 else 1228 { 1229 significantDigits = digitsPart.length; 1230 } 1231 1232 if (significantDigits == 0) significantDigits = 1; 1233 } 1234 1235 return significantDigits; 1236 } 1237 1238 @safe pure unittest 1239 { 1240 assert("0".significantDigits == 1); 1241 assert("10".significantDigits == 2); 1242 assert("0.0".significantDigits == 1); 1243 assert("-10.0".significantDigits == 2); 1244 assert("-.01".significantDigits == 1); 1245 assert("-.5401".significantDigits == 4); 1246 assert("1010.010".significantDigits == 6); 1247 assert("0.0003003".significantDigits == 4); 1248 assert("6e+06".significantDigits == 1); 1249 assert("6.0e+06".significantDigits == 1); 1250 assert("6.5e+06".significantDigits == 2); 1251 assert("6.005e+06".significantDigits == 4); 1252 } 1253 1254 /** Returns the number of digits to the right of the decimal point in a numeric string. 1255 * This routine includes trailing zeros in the count. 1256 */ 1257 size_t precisionDigits(const char[] numericString) @safe pure 1258 { 1259 import std.algorithm : canFind, find, findAmong, findSplit, stripRight; 1260 import std.ascii : isDigit; 1261 import std.math : isFinite; 1262 import std.string : isNumeric; 1263 import std.conv : to; 1264 1265 assert (numericString.isNumeric); 1266 1267 size_t precisionDigits = 0; 1268 if (numericString.to!double.isFinite) 1269 { 1270 if (auto decimalSplit = numericString.findSplit(".")) 1271 { 1272 auto exponentPart = decimalSplit[2].findAmong("eE"); 1273 precisionDigits = decimalSplit[2].length - exponentPart.length; 1274 } 1275 } 1276 1277 return precisionDigits; 1278 } 1279 1280 @safe pure unittest 1281 { 1282 assert("0".precisionDigits == 0); 1283 assert("10".precisionDigits == 0); 1284 assert("0.0".precisionDigits == 1); 1285 assert("-10.0".precisionDigits == 1); 1286 assert("-.01".precisionDigits == 2); 1287 assert("-.5401".precisionDigits == 4); 1288 } 1289 1290 /** Calculates the expected print width of a string in monospace (fixed-width) fonts. 1291 */ 1292 size_t monospacePrintWidth(const char[] str) @safe nothrow 1293 { 1294 bool isCJK(dchar c) 1295 { 1296 return c >= '\u3000' && c <= '\u9fff'; 1297 } 1298 1299 import std.uni : byGrapheme; 1300 1301 size_t width = 0; 1302 try foreach (g; str.byGrapheme) width += isCJK(g[0]) ? 2 : 1; 1303 catch (Exception) width = str.length; // Invalid utf-8 sequence. Catch avoids program failure. 1304 1305 return width; 1306 } 1307 1308 unittest 1309 { 1310 assert("".monospacePrintWidth == 0); 1311 assert(" ".monospacePrintWidth == 1); 1312 assert("abc".monospacePrintWidth == 3); 1313 assert("林檎".monospacePrintWidth == 4); 1314 assert("æble".monospacePrintWidth == 4); 1315 assert("ვაშლი".monospacePrintWidth == 5); 1316 assert("größten".monospacePrintWidth == 7); 1317 }