1 /** 2 Utilities used by tsv-utils applications. InputFieldReordering, BufferedOutputRange, 3 and a several others. 4 5 Utilities in this file: 6 $(LIST 7 * [InputFieldReordering] - A class that creates a reordered subset of fields from 8 an input line. Fields in the subset are accessed by array indicies. This is 9 especially useful when processing the subset in a specific order, such as the 10 order listed on the command-line at run-time. 11 12 * [BufferedOutputRange] - An OutputRange with an internal buffer used to buffer 13 output. Intended for use with stdout, it is a significant performance benefit. 14 15 * [bufferedByLine] - An input range that reads from a File handle line by line. 16 It is similar to the standard library method std.stdio.File.byLine, but quite a 17 bit faster. This is achieved by reading in larger blocks and buffering. 18 19 * [joinAppend] - A function that performs a join, but appending the join output to 20 an output stream. It is a performance improvement over using join or joiner with 21 writeln. 22 23 * [getTsvFieldValue] - A convenience function when only a single value is needed from 24 an input line. 25 26 * Field-lists: [parseFieldList], [makeFieldListOptionHandler] - Helper functions for 27 parsing field-lists entered on the command line. 28 29 * [throwIfWindowsNewlineOnUnix] - A utility for Unix platform builds to detecting 30 Windows newlines in input. 31 ) 32 33 Copyright (c) 2015-2020, eBay Inc. 34 Initially written by Jon Degenhardt 35 36 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt) 37 */ 38 39 module tsv_utils.common.utils; 40 41 import std.range; 42 import std.traits : isIntegral, isSomeChar, isSomeString, isUnsigned; 43 import std.typecons : Flag, No, Yes; 44 45 // InputFieldReording class. 46 47 /** Flag used by the InputFieldReordering template. */ 48 alias EnablePartialLines = Flag!"enablePartialLines"; 49 50 /** 51 InputFieldReordering - Move select fields from an input line to an output array, 52 reordering along the way. 53 54 The InputFieldReordering class is used to reorder a subset of fields from an input line. 55 The caller instantiates an InputFieldReordering object at the start of input processing. 56 The instance contains a mapping from input index to output index, plus a buffer holding 57 the reordered fields. The caller processes each input line by calling initNewLine, 58 splitting the line into fields, and calling processNextField on each field. The output 59 buffer is ready when the allFieldsFilled method returns true. 60 61 Fields are not copied, instead the output buffer points to the fields passed by the caller. 62 The caller needs to use or copy the output buffer while the fields are still valid, which 63 is normally until reading the next input line. The program below illustrates the basic use 64 case. It reads stdin and outputs fields [3, 0, 2], in that order. (See also joinAppend, 65 below, which has a performance improvement over join used here.) 66 67 --- 68 int main(string[] args) 69 { 70 import tsv_utils.common.utils; 71 import std.algorithm, std.array, std.range, std.stdio; 72 size_t[] fieldIndicies = [3, 0, 2]; 73 auto fieldReordering = new InputFieldReordering!char(fieldIndicies); 74 foreach (line; stdin.byLine) 75 { 76 fieldReordering.initNewLine; 77 foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate) 78 { 79 fieldReordering.processNextField(fieldIndex, fieldValue); 80 if (fieldReordering.allFieldsFilled) break; 81 } 82 if (fieldReordering.allFieldsFilled) 83 { 84 writeln(fieldReordering.outputFields.join('\t')); 85 } 86 else 87 { 88 writeln("Error: Insufficient number of field on the line."); 89 } 90 } 91 return 0; 92 } 93 --- 94 95 Field indicies are zero-based. An individual field can be listed multiple times. The 96 outputFields array is not valid until all the specified fields have been processed. The 97 allFieldsFilled method tests this. If a line does not have enough fields the outputFields 98 buffer cannot be used. For most TSV applications this is okay, as it means the line is 99 invalid and cannot be used. However, if partial lines are okay, the template can be 100 instantiated with EnablePartialLines.yes. This will ensure that any fields not filled-in 101 are empty strings in the outputFields return. 102 */ 103 final class InputFieldReordering(C, EnablePartialLines partialLinesOk = EnablePartialLines.no) 104 if (isSomeChar!C) 105 { 106 /* Implementation: The class works by creating an array of tuples mapping the input 107 * field index to the location in the outputFields array. The 'fromToMap' array is 108 * sorted in input field order, enabling placement in the outputFields buffer during a 109 * pass over the input fields. The map is created by the constructor. An example: 110 * 111 * inputFieldIndicies: [3, 0, 7, 7, 1, 0, 9] 112 * fromToMap: [<0,1>, <0,5>, <1,4>, <3,0>, <7,2>, <7,3>, <9,6>] 113 * 114 * During processing of an a line, an array slice, mapStack, is used to track how 115 * much of the fromToMap remains to be processed. 116 */ 117 import std.range; 118 import std.typecons : Tuple; 119 120 alias TupleFromTo = Tuple!(size_t, "from", size_t, "to"); 121 122 private C[][] outputFieldsBuf; 123 private TupleFromTo[] fromToMap; 124 private TupleFromTo[] mapStack; 125 126 final this(const ref size_t[] inputFieldIndicies, size_t start = 0) pure nothrow @safe 127 { 128 import std.algorithm : sort; 129 130 outputFieldsBuf = new C[][](inputFieldIndicies.length); 131 fromToMap.reserve(inputFieldIndicies.length); 132 133 foreach (to, from; inputFieldIndicies.enumerate(start)) 134 { 135 fromToMap ~= TupleFromTo(from, to); 136 } 137 138 sort(fromToMap); 139 initNewLine; 140 } 141 142 /** initNewLine initializes the object for a new line. */ 143 final void initNewLine() pure nothrow @safe 144 { 145 mapStack = fromToMap; 146 static if (partialLinesOk) 147 { 148 import std.algorithm : each; 149 outputFieldsBuf.each!((ref s) => s.length = 0); 150 } 151 } 152 153 /** processNextField maps an input field to the correct locations in the 154 * outputFields array. 155 * 156 * processNextField should be called once for each field on the line, in the order 157 * found. The processing of the line can terminate once allFieldsFilled returns 158 * true. 159 * 160 * The return value is the number of output fields the input field maps to. Zero 161 * means the field is not mapped to the output fields array. 162 * 163 * If, prior to allFieldsProcessed returning true, any fields on the input line 164 * are not passed to processNextField, the caller should either ensure the fields 165 * are not part of the output fields or have partial lines enabled. 166 */ 167 final size_t processNextField(size_t fieldIndex, C[] fieldValue) pure nothrow @safe @nogc 168 { 169 size_t numFilled = 0; 170 while (!mapStack.empty && fieldIndex == mapStack.front.from) 171 { 172 outputFieldsBuf[mapStack.front.to] = fieldValue; 173 mapStack.popFront; 174 numFilled++; 175 } 176 return numFilled; 177 } 178 179 /** allFieldsFilled returned true if all fields expected have been processed. */ 180 final bool allFieldsFilled() const pure nothrow @safe @nogc 181 { 182 return mapStack.empty; 183 } 184 185 /** outputFields is the assembled output fields. Unless partial lines are enabled, 186 * it is only valid after allFieldsFilled is true. 187 */ 188 final C[][] outputFields() pure nothrow @safe @nogc 189 { 190 return outputFieldsBuf[]; 191 } 192 } 193 194 /* Tests using different character types. */ 195 @safe unittest 196 { 197 import std.conv : to; 198 199 auto inputLines = [["r1f0", "r1f1", "r1f2", "r1f3"], 200 ["r2f0", "abc", "ÀBCßßZ", "ghi"], 201 ["r3f0", "123", "456", "789"]]; 202 203 size_t[] fields_2_0 = [2, 0]; 204 205 auto expected_2_0 = [["r1f2", "r1f0"], 206 ["ÀBCßßZ", "r2f0"], 207 ["456", "r3f0"]]; 208 209 char[][][] charExpected_2_0 = to!(char[][][])(expected_2_0); 210 wchar[][][] wcharExpected_2_0 = to!(wchar[][][])(expected_2_0); 211 dchar[][][] dcharExpected_2_0 = to!(dchar[][][])(expected_2_0); 212 dstring[][] dstringExpected_2_0 = to!(dstring[][])(expected_2_0); 213 214 auto charIFR = new InputFieldReordering!char(fields_2_0); 215 auto wcharIFR = new InputFieldReordering!wchar(fields_2_0); 216 auto dcharIFR = new InputFieldReordering!dchar(fields_2_0); 217 218 foreach (lineIndex, line; inputLines) 219 { 220 charIFR.initNewLine; 221 wcharIFR.initNewLine; 222 dcharIFR.initNewLine; 223 224 foreach (fieldIndex, fieldValue; line) 225 { 226 charIFR.processNextField(fieldIndex, to!(char[])(fieldValue)); 227 wcharIFR.processNextField(fieldIndex, to!(wchar[])(fieldValue)); 228 dcharIFR.processNextField(fieldIndex, to!(dchar[])(fieldValue)); 229 230 assert ((fieldIndex >= 2) == charIFR.allFieldsFilled); 231 assert ((fieldIndex >= 2) == wcharIFR.allFieldsFilled); 232 assert ((fieldIndex >= 2) == dcharIFR.allFieldsFilled); 233 } 234 assert(charIFR.allFieldsFilled); 235 assert(wcharIFR.allFieldsFilled); 236 assert(dcharIFR.allFieldsFilled); 237 238 assert(charIFR.outputFields == charExpected_2_0[lineIndex]); 239 assert(wcharIFR.outputFields == wcharExpected_2_0[lineIndex]); 240 assert(dcharIFR.outputFields == dcharExpected_2_0[lineIndex]); 241 } 242 } 243 244 /* Test of partial line support. */ 245 @safe unittest 246 { 247 import std.conv : to; 248 249 auto inputLines = [["r1f0", "r1f1", "r1f2", "r1f3"], 250 ["r2f0", "abc", "ÀBCßßZ", "ghi"], 251 ["r3f0", "123", "456", "789"]]; 252 253 size_t[] fields_2_0 = [2, 0]; 254 255 // The expected states of the output field while each line and field are processed. 256 auto expectedBylineByfield_2_0 = 257 [ 258 [["", "r1f0"], ["", "r1f0"], ["r1f2", "r1f0"], ["r1f2", "r1f0"]], 259 [["", "r2f0"], ["", "r2f0"], ["ÀBCßßZ", "r2f0"], ["ÀBCßßZ", "r2f0"]], 260 [["", "r3f0"], ["", "r3f0"], ["456", "r3f0"], ["456", "r3f0"]], 261 ]; 262 263 char[][][][] charExpectedBylineByfield_2_0 = to!(char[][][][])(expectedBylineByfield_2_0); 264 265 auto charIFR = new InputFieldReordering!(char, EnablePartialLines.yes)(fields_2_0); 266 267 foreach (lineIndex, line; inputLines) 268 { 269 charIFR.initNewLine; 270 foreach (fieldIndex, fieldValue; line) 271 { 272 charIFR.processNextField(fieldIndex, to!(char[])(fieldValue)); 273 assert(charIFR.outputFields == charExpectedBylineByfield_2_0[lineIndex][fieldIndex]); 274 } 275 } 276 } 277 278 /* Field combination tests. */ 279 @safe unittest 280 { 281 import std.conv : to; 282 import std.stdio; 283 284 auto inputLines = [["00", "01", "02", "03"], 285 ["10", "11", "12", "13"], 286 ["20", "21", "22", "23"]]; 287 288 size_t[] fields_0 = [0]; 289 size_t[] fields_3 = [3]; 290 size_t[] fields_01 = [0, 1]; 291 size_t[] fields_10 = [1, 0]; 292 size_t[] fields_03 = [0, 3]; 293 size_t[] fields_30 = [3, 0]; 294 size_t[] fields_0123 = [0, 1, 2, 3]; 295 size_t[] fields_3210 = [3, 2, 1, 0]; 296 size_t[] fields_03001 = [0, 3, 0, 0, 1]; 297 298 auto expected_0 = to!(char[][][])([["00"], 299 ["10"], 300 ["20"]]); 301 302 auto expected_3 = to!(char[][][])([["03"], 303 ["13"], 304 ["23"]]); 305 306 auto expected_01 = to!(char[][][])([["00", "01"], 307 ["10", "11"], 308 ["20", "21"]]); 309 310 auto expected_10 = to!(char[][][])([["01", "00"], 311 ["11", "10"], 312 ["21", "20"]]); 313 314 auto expected_03 = to!(char[][][])([["00", "03"], 315 ["10", "13"], 316 ["20", "23"]]); 317 318 auto expected_30 = to!(char[][][])([["03", "00"], 319 ["13", "10"], 320 ["23", "20"]]); 321 322 auto expected_0123 = to!(char[][][])([["00", "01", "02", "03"], 323 ["10", "11", "12", "13"], 324 ["20", "21", "22", "23"]]); 325 326 auto expected_3210 = to!(char[][][])([["03", "02", "01", "00"], 327 ["13", "12", "11", "10"], 328 ["23", "22", "21", "20"]]); 329 330 auto expected_03001 = to!(char[][][])([["00", "03", "00", "00", "01"], 331 ["10", "13", "10", "10", "11"], 332 ["20", "23", "20", "20", "21"]]); 333 334 auto ifr_0 = new InputFieldReordering!char(fields_0); 335 auto ifr_3 = new InputFieldReordering!char(fields_3); 336 auto ifr_01 = new InputFieldReordering!char(fields_01); 337 auto ifr_10 = new InputFieldReordering!char(fields_10); 338 auto ifr_03 = new InputFieldReordering!char(fields_03); 339 auto ifr_30 = new InputFieldReordering!char(fields_30); 340 auto ifr_0123 = new InputFieldReordering!char(fields_0123); 341 auto ifr_3210 = new InputFieldReordering!char(fields_3210); 342 auto ifr_03001 = new InputFieldReordering!char(fields_03001); 343 344 foreach (lineIndex, line; inputLines) 345 { 346 ifr_0.initNewLine; 347 ifr_3.initNewLine; 348 ifr_01.initNewLine; 349 ifr_10.initNewLine; 350 ifr_03.initNewLine; 351 ifr_30.initNewLine; 352 ifr_0123.initNewLine; 353 ifr_3210.initNewLine; 354 ifr_03001.initNewLine; 355 356 foreach (fieldIndex, fieldValue; line) 357 { 358 ifr_0.processNextField(fieldIndex, to!(char[])(fieldValue)); 359 ifr_3.processNextField(fieldIndex, to!(char[])(fieldValue)); 360 ifr_01.processNextField(fieldIndex, to!(char[])(fieldValue)); 361 ifr_10.processNextField(fieldIndex, to!(char[])(fieldValue)); 362 ifr_03.processNextField(fieldIndex, to!(char[])(fieldValue)); 363 ifr_30.processNextField(fieldIndex, to!(char[])(fieldValue)); 364 ifr_0123.processNextField(fieldIndex, to!(char[])(fieldValue)); 365 ifr_3210.processNextField(fieldIndex, to!(char[])(fieldValue)); 366 ifr_03001.processNextField(fieldIndex, to!(char[])(fieldValue)); 367 } 368 369 assert(ifr_0.outputFields == expected_0[lineIndex]); 370 assert(ifr_3.outputFields == expected_3[lineIndex]); 371 assert(ifr_01.outputFields == expected_01[lineIndex]); 372 assert(ifr_10.outputFields == expected_10[lineIndex]); 373 assert(ifr_03.outputFields == expected_03[lineIndex]); 374 assert(ifr_30.outputFields == expected_30[lineIndex]); 375 assert(ifr_0123.outputFields == expected_0123[lineIndex]); 376 assert(ifr_3210.outputFields == expected_3210[lineIndex]); 377 assert(ifr_03001.outputFields == expected_03001[lineIndex]); 378 } 379 } 380 381 382 import std.stdio : File, isFileHandle, KeepTerminator; 383 import std.range : isOutputRange; 384 import std.traits : Unqual; 385 386 /** 387 BufferedOutputRange is a performance enhancement over writing directly to an output 388 stream. It holds a File open for write or an OutputRange. Ouput is accumulated in an 389 internal buffer and written to the output stream as a block. 390 391 Writing to stdout is a key use case. BufferedOutputRange is often dramatically faster 392 than writing to stdout directly. This is especially noticable for outputs with short 393 lines, as it blocks many writes together in a single write. 394 395 The internal buffer is written to the output stream after flushSize has been reached. 396 This is checked at newline boundaries, when appendln is called or when put is called 397 with a single newline character. Other writes check maxSize, which is used to avoid 398 runaway buffers. 399 400 401 BufferedOutputRange has a put method allowing it to be used a range. It has a number 402 of other methods providing additional control. 403 404 $(LIST 405 * `this(outputStream [, flushSize, reserveSize, maxSize])` - Constructor. Takes the 406 output stream, e.g. stdout. Other arguments are optional, defaults normally suffice. 407 408 * `append(stuff)` - Append to the internal buffer. 409 410 * `appendln(stuff)` - Append to the internal buffer, followed by a newline. The buffer 411 is flushed to the output stream if is has reached flushSize. 412 413 * `appendln()` - Append a newline to the internal buffer. The buffer is flushed to the 414 output stream if is has reached flushSize. 415 416 * `joinAppend(inputRange, delim)` - An optimization of `append(inputRange.joiner(delim))`. 417 For reasons that are not clear, joiner is quite slow. 418 419 * `flushIfFull()` - Flush the internal buffer to the output stream if flushSize has been 420 reached. 421 422 * `flush()` - Write the internal buffer to the output stream. 423 424 * `put(stuff)` - Appends to the internal buffer. Acts as `appendln()` if passed a single 425 newline character, '\n' or "\n". 426 ) 427 428 The internal buffer is automatically flushed when the BufferedOutputRange goes out of 429 scope. 430 */ 431 struct BufferedOutputRange(OutputTarget) 432 if (isFileHandle!(Unqual!OutputTarget) || isOutputRange!(Unqual!OutputTarget, char)) 433 { 434 import std.range : isOutputRange; 435 import std.array : appender; 436 import std.format : format; 437 438 /* Identify the output element type. Only supporting char and ubyte for now. */ 439 static if (isFileHandle!OutputTarget || isOutputRange!(OutputTarget, char)) 440 { 441 alias C = char; 442 } 443 else static if (isOutputRange!(OutputTarget, ubyte)) 444 { 445 alias C = ubyte; 446 } 447 else static assert(false); 448 449 private enum defaultReserveSize = 11264; 450 private enum defaultFlushSize = 10240; 451 private enum defaultMaxSize = 4194304; 452 453 private OutputTarget _outputTarget; 454 private auto _outputBuffer = appender!(C[]); 455 private immutable size_t _flushSize; 456 private immutable size_t _maxSize; 457 458 this(OutputTarget outputTarget, 459 size_t flushSize = defaultFlushSize, 460 size_t reserveSize = defaultReserveSize, 461 size_t maxSize = defaultMaxSize) 462 @safe 463 { 464 assert(flushSize <= maxSize); 465 466 _outputTarget = outputTarget; 467 _flushSize = flushSize; 468 _maxSize = (flushSize <= maxSize) ? maxSize : flushSize; 469 _outputBuffer.reserve(reserveSize); 470 } 471 472 ~this() @safe 473 { 474 flush(); 475 } 476 477 void flush() @safe 478 { 479 static if (isFileHandle!OutputTarget) _outputTarget.write(_outputBuffer.data); 480 else _outputTarget.put(_outputBuffer.data); 481 482 _outputBuffer.clear; 483 } 484 485 bool flushIfFull() @safe 486 { 487 bool isFull = _outputBuffer.data.length >= _flushSize; 488 if (isFull) flush(); 489 return isFull; 490 } 491 492 /* flushIfMaxSize is a safety check to avoid runaway buffer growth. */ 493 void flushIfMaxSize() @safe 494 { 495 if (_outputBuffer.data.length >= _maxSize) flush(); 496 } 497 498 /* maybeFlush is intended for the case where put is called with a trailing newline. 499 * 500 * Flushing occurs if the buffer has a trailing newline and has reached flush size. 501 * Flushing also occurs if the buffer has reached max size. 502 */ 503 private bool maybeFlush() @safe 504 { 505 immutable bool doFlush = 506 _outputBuffer.data.length >= _flushSize && 507 (_outputBuffer.data[$-1] == '\n' || _outputBuffer.data.length >= _maxSize); 508 509 if (doFlush) flush(); 510 return doFlush; 511 } 512 513 514 private void appendRaw(T)(T stuff) pure @safe 515 { 516 import std.range : rangePut = put; 517 rangePut(_outputBuffer, stuff); 518 } 519 520 void append(T)(T stuff) @safe 521 { 522 appendRaw(stuff); 523 maybeFlush(); 524 } 525 526 bool appendln() @safe 527 { 528 appendRaw('\n'); 529 return flushIfFull(); 530 } 531 532 bool appendln(T)(T stuff) 533 { 534 appendRaw(stuff); 535 return appendln(); 536 } 537 538 /* joinAppend is an optimization of append(inputRange.joiner(delimiter). 539 * This form is quite a bit faster, 40%+ on some benchmarks. 540 */ 541 void joinAppend(InputRange, E)(InputRange inputRange, E delimiter) 542 if (isInputRange!InputRange && 543 is(ElementType!InputRange : const C[]) && 544 (is(E : const C[]) || is(E : const C))) 545 { 546 if (!inputRange.empty) 547 { 548 appendRaw(inputRange.front); 549 inputRange.popFront; 550 } 551 foreach (x; inputRange) 552 { 553 appendRaw(delimiter); 554 appendRaw(x); 555 } 556 flushIfMaxSize(); 557 } 558 559 /* Make this an output range. */ 560 void put(T)(T stuff) 561 { 562 import std.traits; 563 import std.stdio; 564 565 static if (isSomeChar!T) 566 { 567 if (stuff == '\n') appendln(); 568 else appendRaw(stuff); 569 } 570 else static if (isSomeString!T) 571 { 572 if (stuff == "\n") appendln(); 573 else append(stuff); 574 } 575 else append(stuff); 576 } 577 } 578 579 unittest 580 { 581 import tsv_utils.common.unittest_utils; 582 import std.file : rmdirRecurse, readText; 583 import std.path : buildPath; 584 585 auto testDir = makeUnittestTempDir("tsv_utils_buffered_output"); 586 scope(exit) testDir.rmdirRecurse; 587 588 import std.algorithm : map, joiner; 589 import std.range : iota; 590 import std.conv : to; 591 592 /* Basic test. Note that exiting the scope triggers flush. */ 593 string filepath1 = buildPath(testDir, "file1.txt"); 594 { 595 import std.stdio : File; 596 597 auto ostream = BufferedOutputRange!File(filepath1.File("w")); 598 ostream.append("file1: "); 599 ostream.append("abc"); 600 ostream.append(["def", "ghi", "jkl"]); 601 ostream.appendln(100.to!string); 602 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 603 ostream.appendln(); 604 } 605 assert(filepath1.readText == "file1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 606 607 /* Test with no reserve and no flush at every line. */ 608 string filepath2 = buildPath(testDir, "file2.txt"); 609 { 610 import std.stdio : File; 611 612 auto ostream = BufferedOutputRange!File(filepath2.File("w"), 0, 0); 613 ostream.append("file2: "); 614 ostream.append("abc"); 615 ostream.append(["def", "ghi", "jkl"]); 616 ostream.appendln("100"); 617 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 618 ostream.appendln(); 619 } 620 assert(filepath2.readText == "file2: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 621 622 /* With a locking text writer. Requires version 2.078.0 623 See: https://issues.dlang.org/show_bug.cgi?id=9661 624 */ 625 static if (__VERSION__ >= 2078) 626 { 627 string filepath3 = buildPath(testDir, "file3.txt"); 628 { 629 import std.stdio : File; 630 631 auto ltw = filepath3.File("w").lockingTextWriter; 632 { 633 auto ostream = BufferedOutputRange!(typeof(ltw))(ltw); 634 ostream.append("file3: "); 635 ostream.append("abc"); 636 ostream.append(["def", "ghi", "jkl"]); 637 ostream.appendln("100"); 638 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 639 ostream.appendln(); 640 } 641 } 642 assert(filepath3.readText == "file3: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 643 } 644 645 /* With an Appender. */ 646 import std.array : appender; 647 auto app1 = appender!(char[]); 648 { 649 auto ostream = BufferedOutputRange!(typeof(app1))(app1); 650 ostream.append("appender1: "); 651 ostream.append("abc"); 652 ostream.append(["def", "ghi", "jkl"]); 653 ostream.appendln("100"); 654 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 655 ostream.appendln(); 656 } 657 assert(app1.data == "appender1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 658 659 /* With an Appender, but checking flush boundaries. */ 660 auto app2 = appender!(char[]); 661 { 662 auto ostream = BufferedOutputRange!(typeof(app2))(app2, 10, 0); // Flush if 10+ 663 bool wasFlushed = false; 664 665 assert(app2.data == ""); 666 667 ostream.append("12345678"); // Not flushed yet. 668 assert(app2.data == ""); 669 670 wasFlushed = ostream.appendln; // Nineth char, not flushed yet. 671 assert(!wasFlushed); 672 assert(app2.data == ""); 673 674 wasFlushed = ostream.appendln; // Tenth char, now flushed. 675 assert(wasFlushed); 676 assert(app2.data == "12345678\n\n"); 677 678 app2.clear; 679 assert(app2.data == ""); 680 681 ostream.append("12345678"); 682 683 wasFlushed = ostream.flushIfFull; 684 assert(!wasFlushed); 685 assert(app2.data == ""); 686 687 ostream.flush; 688 assert(app2.data == "12345678"); 689 690 app2.clear; 691 assert(app2.data == ""); 692 693 ostream.append("123456789012345"); 694 assert(app2.data == ""); 695 } 696 assert(app2.data == "123456789012345"); 697 698 /* Using joinAppend. */ 699 auto app1b = appender!(char[]); 700 { 701 auto ostream = BufferedOutputRange!(typeof(app1b))(app1b); 702 ostream.append("appenderB: "); 703 ostream.joinAppend(["a", "bc", "def"], '-'); 704 ostream.append(':'); 705 ostream.joinAppend(["g", "hi", "jkl"], '-'); 706 ostream.appendln("*100*"); 707 ostream.joinAppend(iota(0, 6).map!(x => x.to!string), ' '); 708 ostream.append(' '); 709 ostream.joinAppend(iota(6, 10).map!(x => x.to!string), " "); 710 ostream.appendln(); 711 } 712 assert(app1b.data == "appenderB: a-bc-def:g-hi-jkl*100*\n0 1 2 3 4 5 6 7 8 9\n", 713 "app1b.data: |" ~app1b.data ~ "|"); 714 715 /* Operating as an output range. When passed to a function as a ref, exiting 716 * the function does not flush. When passed as a value, it get flushed when 717 * the function returns. Also test both UCFS and non-UFCS styles. 718 */ 719 720 void outputStuffAsRef(T)(ref T range) 721 if (isOutputRange!(T, char)) 722 { 723 range.put('1'); 724 put(range, "23"); 725 range.put('\n'); 726 range.put(["5", "67"]); 727 put(range, iota(8, 10).map!(x => x.to!string)); 728 put(range, "\n"); 729 } 730 731 void outputStuffAsVal(T)(T range) 732 if (isOutputRange!(T, char)) 733 { 734 put(range, '1'); 735 range.put("23"); 736 put(range, '\n'); 737 put(range, ["5", "67"]); 738 range.put(iota(8, 10).map!(x => x.to!string)); 739 range.put("\n"); 740 } 741 742 auto app3 = appender!(char[]); 743 { 744 auto ostream = BufferedOutputRange!(typeof(app3))(app3, 12, 0); 745 outputStuffAsRef(ostream); 746 assert(app3.data == "", "app3.data: |" ~app3.data ~ "|"); 747 outputStuffAsRef(ostream); 748 assert(app3.data == "123\n56789\n123\n", "app3.data: |" ~app3.data ~ "|"); 749 } 750 assert(app3.data == "123\n56789\n123\n56789\n", "app3.data: |" ~app3.data ~ "|"); 751 752 auto app4 = appender!(char[]); 753 { 754 auto ostream = BufferedOutputRange!(typeof(app4))(app4, 12, 0); 755 outputStuffAsVal(ostream); 756 assert(app4.data == "123\n56789\n", "app4.data: |" ~app4.data ~ "|"); 757 outputStuffAsVal(ostream); 758 assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|"); 759 } 760 assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|"); 761 762 /* Test maxSize. */ 763 auto app5 = appender!(char[]); 764 { 765 auto ostream = BufferedOutputRange!(typeof(app5))(app5, 5, 0, 10); // maxSize 10 766 assert(app5.data == ""); 767 768 ostream.append("1234567"); // Not flushed yet (no newline). 769 assert(app5.data == ""); 770 771 ostream.append("89012"); // Flushed by maxSize 772 assert(app5.data == "123456789012"); 773 774 ostream.put("1234567"); // Not flushed yet (no newline). 775 assert(app5.data == "123456789012"); 776 777 ostream.put("89012"); // Flushed by maxSize 778 assert(app5.data == "123456789012123456789012"); 779 780 ostream.joinAppend(["ab", "cd"], '-'); // Not flushed yet 781 ostream.joinAppend(["de", "gh", "ij"], '-'); // Flushed by maxSize 782 assert(app5.data == "123456789012123456789012ab-cdde-gh-ij"); 783 } 784 assert(app5.data == "123456789012123456789012ab-cdde-gh-ij"); 785 } 786 787 /** 788 bufferedByLine is a performance enhancement over std.stdio.File.byLine. It works by 789 reading a large buffer from the input stream rather than just a single line. 790 791 The file argument needs to be a File object open for reading, typically a filesystem 792 file or standard input. Use the Yes.keepTerminator template parameter to keep the 793 newline. This is similar to stdio.File.byLine, except specified as a template paramter 794 rather than a runtime parameter. 795 796 Reading in blocks does mean that input is not read until a full buffer is available or 797 end-of-file is reached. For this reason, bufferedByLine is not appropriate for 798 interactive input. 799 */ 800 801 auto bufferedByLine(KeepTerminator keepTerminator = No.keepTerminator, Char = char, 802 ubyte terminator = '\n', size_t readSize = 1024 * 128, size_t growSize = 1024 * 16) 803 (File file) 804 if (is(Char == char) || is(Char == ubyte)) 805 { 806 static assert(0 < growSize && growSize <= readSize); 807 808 static final class BufferedByLineImpl 809 { 810 /* Buffer state variables 811 * - _buffer.length - Full length of allocated buffer. 812 * - _dataEnd - End of currently valid data (end of last read). 813 * - _lineStart - Start of current line. 814 * - _lineEnd - End of current line. 815 */ 816 private File _file; 817 private ubyte[] _buffer; 818 private size_t _lineStart = 0; 819 private size_t _lineEnd = 0; 820 private size_t _dataEnd = 0; 821 822 this (File f) @safe 823 { 824 _file = f; 825 _buffer = new ubyte[readSize + growSize]; 826 } 827 828 bool empty() const pure @safe 829 { 830 return _file.eof && _lineStart == _dataEnd; 831 } 832 833 Char[] front() pure @safe 834 { 835 assert(!empty, "Attempt to take the front of an empty bufferedByLine."); 836 837 static if (keepTerminator == Yes.keepTerminator) 838 { 839 return cast(Char[]) _buffer[_lineStart .. _lineEnd]; 840 } 841 else 842 { 843 assert(_lineStart < _lineEnd); 844 immutable end = (_buffer[_lineEnd - 1] == terminator) ? _lineEnd - 1 : _lineEnd; 845 return cast(Char[]) _buffer[_lineStart .. end]; 846 } 847 } 848 849 /* Note: Call popFront at initialization to do the initial read. */ 850 void popFront() @safe 851 { 852 import std.algorithm: copy, find; 853 assert(!empty, "Attempt to popFront an empty bufferedByLine."); 854 855 /* Pop the current line. */ 856 _lineStart = _lineEnd; 857 858 /* Set up the next line if more data is available, either in the buffer or 859 * the file. The next line ends at the next newline, if there is one. 860 * 861 * Notes: 862 * - 'find' returns the slice starting with the character searched for, or 863 * an empty range if not found. 864 * - _lineEnd is set to _dataEnd both when the current buffer does not have 865 * a newline and when it ends with one. 866 */ 867 auto found = _buffer[_lineStart .. _dataEnd].find(terminator); 868 _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1; 869 870 if (found.empty && !_file.eof) 871 { 872 /* No newline in current buffer. Read from the file until the next 873 * newline is found. 874 */ 875 assert(_lineEnd == _dataEnd); 876 877 if (_lineStart > 0) 878 { 879 /* Move remaining data to the start of the buffer. */ 880 immutable remainingLength = _dataEnd - _lineStart; 881 copy(_buffer[_lineStart .. _dataEnd], _buffer[0 .. remainingLength]); 882 _lineStart = 0; 883 _lineEnd = _dataEnd = remainingLength; 884 } 885 886 do 887 { 888 /* Grow the buffer if necessary. */ 889 immutable availableSize = _buffer.length - _dataEnd; 890 if (availableSize < readSize) 891 { 892 size_t growBy = growSize; 893 while (availableSize + growBy < readSize) growBy += growSize; 894 _buffer.length += growBy; 895 } 896 897 /* Read the next block. */ 898 _dataEnd += 899 _file.rawRead(_buffer[_dataEnd .. _dataEnd + readSize]) 900 .length; 901 902 found = _buffer[_lineEnd .. _dataEnd].find(terminator); 903 _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1; 904 905 } while (found.empty && !_file.eof); 906 } 907 } 908 } 909 910 assert(file.isOpen, "bufferedByLine passed a closed file."); 911 912 auto r = new BufferedByLineImpl(file); 913 r.popFront; 914 return r; 915 } 916 917 unittest 918 { 919 import std.array : appender; 920 import std.conv : to; 921 import std.file : rmdirRecurse, readText; 922 import std.path : buildPath; 923 import std.range : lockstep; 924 import std.stdio; 925 import tsv_utils.common.unittest_utils; 926 927 auto testDir = makeUnittestTempDir("tsv_utils_buffered_byline"); 928 scope(exit) testDir.rmdirRecurse; 929 930 /* Create two data files with the same data. Read both in parallel with byLine and 931 * bufferedByLine and compare each line. 932 */ 933 auto data1 = appender!(char[])(); 934 935 foreach (i; 1 .. 1001) data1.put('\n'); 936 foreach (i; 1 .. 1001) data1.put("a\n"); 937 foreach (i; 1 .. 1001) { data1.put(i.to!string); data1.put('\n'); } 938 foreach (i; 1 .. 1001) 939 { 940 foreach (j; 1 .. i+1) data1.put('x'); 941 data1.put('\n'); 942 } 943 944 string file1a = buildPath(testDir, "file1a.txt"); 945 string file1b = buildPath(testDir, "file1b.txt"); 946 { 947 948 file1a.File("w").write(data1.data); 949 file1b.File("w").write(data1.data); 950 } 951 952 /* Default parameters. */ 953 { 954 auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator); 955 auto f1bIn = file1b.File().byLine(No.keepTerminator); 956 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 957 } 958 { 959 auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator); 960 auto f1bIn = file1b.File().byLine(Yes.keepTerminator); 961 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 962 } 963 964 /* Smaller read size. This will trigger buffer growth. */ 965 { 966 auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', 512, 256); 967 auto f1bIn = file1b.File().byLine(No.keepTerminator); 968 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 969 } 970 971 /* Exercise boundary cases in buffer growth. 972 * Note: static-foreach requires DMD 2.076 / LDC 1.6 973 */ 974 static foreach (readSize; [1, 2, 4]) 975 { 976 static foreach (growSize; 1 .. readSize + 1) 977 {{ 978 auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 979 auto f1bIn = file1b.File().byLine(No.keepTerminator); 980 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 981 }} 982 static foreach (growSize; 1 .. readSize + 1) 983 {{ 984 auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 985 auto f1bIn = file1b.File().byLine(Yes.keepTerminator); 986 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 987 }} 988 } 989 990 991 /* Files that do not end in a newline. */ 992 993 string file2a = buildPath(testDir, "file2a.txt"); 994 string file2b = buildPath(testDir, "file2b.txt"); 995 string file3a = buildPath(testDir, "file3a.txt"); 996 string file3b = buildPath(testDir, "file3b.txt"); 997 string file4a = buildPath(testDir, "file4a.txt"); 998 string file4b = buildPath(testDir, "file4b.txt"); 999 { 1000 file1a.File("w").write("a"); 1001 file1b.File("w").write("a"); 1002 file2a.File("w").write("ab"); 1003 file2b.File("w").write("ab"); 1004 file3a.File("w").write("abc"); 1005 file3b.File("w").write("abc"); 1006 } 1007 1008 static foreach (readSize; [1, 2, 4]) 1009 { 1010 static foreach (growSize; 1 .. readSize + 1) 1011 {{ 1012 auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 1013 auto f1bIn = file1b.File().byLine(No.keepTerminator); 1014 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1015 1016 auto f2aIn = file2a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 1017 auto f2bIn = file2b.File().byLine(No.keepTerminator); 1018 foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1019 1020 auto f3aIn = file3a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 1021 auto f3bIn = file3b.File().byLine(No.keepTerminator); 1022 foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1023 }} 1024 static foreach (growSize; 1 .. readSize + 1) 1025 {{ 1026 auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 1027 auto f1bIn = file1b.File().byLine(Yes.keepTerminator); 1028 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1029 1030 auto f2aIn = file2a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 1031 auto f2bIn = file2b.File().byLine(Yes.keepTerminator); 1032 foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1033 1034 auto f3aIn = file3a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 1035 auto f3bIn = file3b.File().byLine(Yes.keepTerminator); 1036 foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1037 }} 1038 } 1039 } 1040 1041 /** 1042 joinAppend performs a join operation on an input range, appending the results to 1043 an output range. 1044 1045 joinAppend was written as a performance enhancement over using std.algorithm.joiner 1046 or std.array.join with writeln. Using joiner with writeln is quite slow, 3-4x slower 1047 than std.array.join with writeln. The joiner performance may be due to interaction 1048 with writeln, this was not investigated. Using joiner with stdout.lockingTextWriter 1049 is better, but still substantially slower than join. Using join works reasonably well, 1050 but is allocating memory unnecessarily. 1051 1052 Using joinAppend with Appender is a bit faster than join, and allocates less memory. 1053 The Appender re-uses the underlying data buffer, saving memory. The example below 1054 illustrates. It is a modification of the InputFieldReordering example. The role 1055 Appender plus joinAppend are playing is to buffer the output. BufferedOutputRange 1056 uses a similar technique to buffer multiple lines. 1057 1058 Note: The original uses joinAppend have been replaced by BufferedOutputRange, which has 1059 its own joinAppend method. However, joinAppend remains useful when constructing internal 1060 buffers where BufferedOutputRange is not appropriate. 1061 1062 --- 1063 int main(string[] args) 1064 { 1065 import tsvutil; 1066 import std.algorithm, std.array, std.range, std.stdio; 1067 size_t[] fieldIndicies = [3, 0, 2]; 1068 auto fieldReordering = new InputFieldReordering!char(fieldIndicies); 1069 auto outputBuffer = appender!(char[]); 1070 foreach (line; stdin.byLine) 1071 { 1072 fieldReordering.initNewLine; 1073 foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate) 1074 { 1075 fieldReordering.processNextField(fieldIndex, fieldValue); 1076 if (fieldReordering.allFieldsFilled) break; 1077 } 1078 if (fieldReordering.allFieldsFilled) 1079 { 1080 outputBuffer.clear; 1081 writeln(fieldReordering.outputFields.joinAppend(outputBuffer, ('\t'))); 1082 } 1083 else 1084 { 1085 writeln("Error: Insufficient number of field on the line."); 1086 } 1087 } 1088 return 0; 1089 } 1090 --- 1091 */ 1092 OutputRange joinAppend(InputRange, OutputRange, E) 1093 (InputRange inputRange, ref OutputRange outputRange, E delimiter) 1094 if (isInputRange!InputRange && 1095 (is(ElementType!InputRange : const E[]) && 1096 isOutputRange!(OutputRange, E[])) 1097 || 1098 (is(ElementType!InputRange : const E) && 1099 isOutputRange!(OutputRange, E)) 1100 ) 1101 { 1102 if (!inputRange.empty) 1103 { 1104 outputRange.put(inputRange.front); 1105 inputRange.popFront; 1106 } 1107 foreach (x; inputRange) 1108 { 1109 outputRange.put(delimiter); 1110 outputRange.put(x); 1111 } 1112 return outputRange; 1113 } 1114 1115 @safe unittest 1116 { 1117 import std.array : appender; 1118 import std.algorithm : equal; 1119 1120 char[] c1 = ['a', 'b', 'c']; 1121 char[] c2 = ['d', 'e', 'f']; 1122 char[] c3 = ['g', 'h', 'i']; 1123 auto cvec = [c1, c2, c3]; 1124 1125 auto s1 = "abc"; 1126 auto s2 = "def"; 1127 auto s3 = "ghi"; 1128 auto svec = [s1, s2, s3]; 1129 1130 auto charAppender = appender!(char[])(); 1131 1132 assert(cvec.joinAppend(charAppender, '_').data == "abc_def_ghi"); 1133 assert(equal(cvec, [c1, c2, c3])); 1134 1135 charAppender.put('$'); 1136 assert(svec.joinAppend(charAppender, '|').data == "abc_def_ghi$abc|def|ghi"); 1137 assert(equal(cvec, [s1, s2, s3])); 1138 1139 charAppender.clear; 1140 assert(svec.joinAppend(charAppender, '|').data == "abc|def|ghi"); 1141 1142 auto intAppender = appender!(int[])(); 1143 1144 auto i1 = [100, 101, 102]; 1145 auto i2 = [200, 201, 202]; 1146 auto i3 = [300, 301, 302]; 1147 auto ivec = [i1, i2, i3]; 1148 1149 assert(ivec.joinAppend(intAppender, 0).data == 1150 [100, 101, 102, 0, 200, 201, 202, 0, 300, 301, 302]); 1151 1152 intAppender.clear; 1153 assert(i1.joinAppend(intAppender, 0).data == 1154 [100, 0, 101, 0, 102]); 1155 assert(i2.joinAppend(intAppender, 1).data == 1156 [100, 0, 101, 0, 102, 1157 200, 1, 201, 1, 202]); 1158 assert(i3.joinAppend(intAppender, 2).data == 1159 [100, 0, 101, 0, 102, 1160 200, 1, 201, 1, 202, 1161 300, 2, 301, 2, 302]); 1162 } 1163 1164 /** 1165 getTsvFieldValue extracts the value of a single field from a delimited text string. 1166 1167 This is a convenience function intended for cases when only a single field from an 1168 input line is needed. If multiple values are needed, it will be more efficient to 1169 work directly with std.algorithm.splitter or the InputFieldReordering class. 1170 1171 The input text is split by a delimiter character. The specified field is converted 1172 to the desired type and the value returned. 1173 1174 An exception is thrown if there are not enough fields on the line or if conversion 1175 fails. Conversion is done with std.conv.to, it throws a std.conv.ConvException on 1176 failure. If not enough fields, the exception text is generated referencing 1-upped 1177 field numbers as would be provided by command line users. 1178 */ 1179 T getTsvFieldValue(T, C)(const C[] line, size_t fieldIndex, C delim) 1180 if (isSomeChar!C) 1181 { 1182 import std.algorithm : splitter; 1183 import std.conv : to; 1184 import std.format : format; 1185 import std.range; 1186 1187 auto splitLine = line.splitter(delim); 1188 size_t atField = 0; 1189 1190 while (atField < fieldIndex && !splitLine.empty) 1191 { 1192 splitLine.popFront; 1193 atField++; 1194 } 1195 1196 T val; 1197 if (splitLine.empty) 1198 { 1199 if (fieldIndex == 0) 1200 { 1201 /* This is a workaround to a splitter special case - If the input is empty, 1202 * the returned split range is empty. This doesn't properly represent a single 1203 * column file. More correct mathematically, and for this case, would be a 1204 * single value representing an empty string. The input line is a convenient 1205 * source of an empty line. Info: 1206 * Bug: https://issues.dlang.org/show_bug.cgi?id=15735 1207 * Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030 1208 */ 1209 assert(line.empty); 1210 val = line.to!T; 1211 } 1212 else 1213 { 1214 throw new Exception( 1215 format("Not enough fields on line. Number required: %d; Number found: %d", 1216 fieldIndex + 1, atField)); 1217 } 1218 } 1219 else 1220 { 1221 val = splitLine.front.to!T; 1222 } 1223 1224 return val; 1225 } 1226 1227 @safe unittest 1228 { 1229 import std.conv : ConvException, to; 1230 import std.exception; 1231 1232 /* Common cases. */ 1233 assert(getTsvFieldValue!double("123", 0, '\t') == 123.0); 1234 assert(getTsvFieldValue!double("-10.5", 0, '\t') == -10.5); 1235 assert(getTsvFieldValue!size_t("abc|123", 1, '|') == 123); 1236 assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99); 1237 assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99); 1238 assert(getTsvFieldValue!string("紅\t红\t99", 2, '\t') == "99"); 1239 assert(getTsvFieldValue!string("紅\t红\t99", 1, '\t') == "红"); 1240 assert(getTsvFieldValue!string("紅\t红\t99", 0, '\t') == "紅"); 1241 assert(getTsvFieldValue!string("红色和绿色\tred and green\t赤と緑\t10.5", 2, '\t') == "赤と緑"); 1242 assert(getTsvFieldValue!double("红色和绿色\tred and green\t赤と緑\t10.5", 3, '\t') == 10.5); 1243 1244 /* The empty field cases. */ 1245 assert(getTsvFieldValue!string("", 0, '\t') == ""); 1246 assert(getTsvFieldValue!string("\t", 0, '\t') == ""); 1247 assert(getTsvFieldValue!string("\t", 1, '\t') == ""); 1248 assert(getTsvFieldValue!string("", 0, ':') == ""); 1249 assert(getTsvFieldValue!string(":", 0, ':') == ""); 1250 assert(getTsvFieldValue!string(":", 1, ':') == ""); 1251 1252 /* Tests with different data types. */ 1253 string stringLine = "orange and black\tნარინჯისფერი და შავი\t88.5"; 1254 char[] charLine = "orange and black\tნარინჯისფერი და შავი\t88.5".to!(char[]); 1255 dchar[] dcharLine = stringLine.to!(dchar[]); 1256 wchar[] wcharLine = stringLine.to!(wchar[]); 1257 1258 assert(getTsvFieldValue!string(stringLine, 0, '\t') == "orange and black"); 1259 assert(getTsvFieldValue!string(stringLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1260 assert(getTsvFieldValue!wstring(stringLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1261 assert(getTsvFieldValue!double(stringLine, 2, '\t') == 88.5); 1262 1263 assert(getTsvFieldValue!string(charLine, 0, '\t') == "orange and black"); 1264 assert(getTsvFieldValue!string(charLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1265 assert(getTsvFieldValue!wstring(charLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1266 assert(getTsvFieldValue!double(charLine, 2, '\t') == 88.5); 1267 1268 assert(getTsvFieldValue!string(dcharLine, 0, '\t') == "orange and black"); 1269 assert(getTsvFieldValue!string(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1270 assert(getTsvFieldValue!wstring(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1271 assert(getTsvFieldValue!double(dcharLine, 2, '\t') == 88.5); 1272 1273 assert(getTsvFieldValue!string(wcharLine, 0, '\t') == "orange and black"); 1274 assert(getTsvFieldValue!string(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1275 assert(getTsvFieldValue!wstring(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1276 assert(getTsvFieldValue!double(wcharLine, 2, '\t') == 88.5); 1277 1278 /* Conversion errors. */ 1279 assertThrown!ConvException(getTsvFieldValue!double("", 0, '\t')); 1280 assertThrown!ConvException(getTsvFieldValue!double("abc", 0, '|')); 1281 assertThrown!ConvException(getTsvFieldValue!size_t("-1", 0, '|')); 1282 assertThrown!ConvException(getTsvFieldValue!size_t("a23|23.4", 1, '|')); 1283 assertThrown!ConvException(getTsvFieldValue!double("23.5|def", 1, '|')); 1284 1285 /* Not enough field errors. These should throw, but not a ConvException.*/ 1286 assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("", 1, '\t'))); 1287 assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc", 1, '\t'))); 1288 assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc\tdef", 2, '\t'))); 1289 } 1290 1291 /** 1292 Field-lists - A field-list is a string entered on the command line identifying one or more 1293 field numbers. They are used by the majority of the tsv utility applications. There are 1294 two helper functions, makeFieldListOptionHandler and parseFieldList. Most applications 1295 will use makeFieldListOptionHandler, it creates a delegate that can be passed to 1296 std.getopt to process the command option. Actual processing of the option text is done by 1297 parseFieldList. It can be called directly when the text of the option value contains more 1298 than just the field number. 1299 1300 Syntax and behavior: 1301 1302 A 'field-list' is a list of numeric field numbers entered on the command line. Fields are 1303 1-upped integers representing locations in an input line, in the traditional meaning of 1304 Unix command line tools. Fields can be entered as single numbers or a range. Multiple 1305 entries are separated by commas. Some examples (with 'fields' as the command line option): 1306 1307 --fields 3 // Single field 1308 --fields 4,1 // Two fields 1309 --fields 3-9 // A range, fields 3 to 9 inclusive 1310 --fields 1,2,7-34,11 // A mix of ranges and fields 1311 --fields 15-5,3-1 // Two ranges in reverse order. 1312 1313 Incomplete ranges are not supported, for example, '6-'. Zero is disallowed as a field 1314 value by default, but can be enabled to support the notion of zero as representing the 1315 entire line. However, zero cannot be part of a range. Field numbers are one-based by 1316 default, but can be converted to zero-based. If conversion to zero-based is enabled, field 1317 number zero must be disallowed or a signed integer type specified for the returned range. 1318 1319 An error is thrown if an invalid field specification is encountered. Error text is 1320 intended for display. Error conditions include: 1321 - Empty fields list 1322 - Empty value, e.g. Two consequtive commas, a trailing comma, or a leading comma 1323 - String that does not parse as a valid integer 1324 - Negative integers, or zero if zero is disallowed. 1325 - An incomplete range 1326 - Zero used as part of a range. 1327 1328 No other behaviors are enforced. Repeated values are accepted. If zero is allowed, other 1329 field numbers can be entered as well. Additional restrictions need to be applied by the 1330 caller. 1331 1332 Notes: 1333 - The data type determines the max field number that can be entered. Enabling conversion 1334 to zero restricts to the signed version of the data type. 1335 - Use 'import std.typecons : Yes, No' to use the convertToZeroBasedIndex and 1336 allowFieldNumZero template parameters. 1337 */ 1338 1339 /** [Yes|No].convertToZeroBasedIndex parameter controls whether field numbers are 1340 * converted to zero-based indices by makeFieldListOptionHander and parseFieldList. 1341 */ 1342 alias ConvertToZeroBasedIndex = Flag!"convertToZeroBasedIndex"; 1343 1344 /** [Yes|No].allowFieldNumZero parameter controls whether zero is a valid field. This is 1345 * used by makeFieldListOptionHander and parseFieldList. 1346 */ 1347 alias AllowFieldNumZero = Flag!"allowFieldNumZero"; 1348 1349 alias OptionHandlerDelegate = void delegate(string option, string value); 1350 1351 /** 1352 makeFieldListOptionHandler creates a std.getopt option hander for processing field lists 1353 entered on the command line. A field list is as defined by parseFieldList. 1354 */ 1355 OptionHandlerDelegate makeFieldListOptionHandler( 1356 T, 1357 ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex, 1358 AllowFieldNumZero allowZero = No.allowFieldNumZero) 1359 (ref T[] fieldsArray) 1360 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T)) 1361 { 1362 void fieldListOptionHandler(ref T[] fieldArray, string option, string value) pure @safe 1363 { 1364 import std.algorithm : each; 1365 try value.parseFieldList!(T, convertToZero, allowZero).each!(x => fieldArray ~= x); 1366 catch (Exception exc) 1367 { 1368 import std.format : format; 1369 exc.msg = format("[--%s] %s", option, exc.msg); 1370 throw exc; 1371 } 1372 } 1373 1374 return (option, value) => fieldListOptionHandler(fieldsArray, option, value); 1375 } 1376 1377 unittest 1378 { 1379 import std.exception : assertThrown, assertNotThrown; 1380 import std.getopt; 1381 1382 { 1383 size_t[] fields; 1384 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1385 getopt(args, "f|fields", fields.makeFieldListOptionHandler); 1386 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1387 } 1388 { 1389 size_t[] fields; 1390 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1391 getopt(args, 1392 "f|fields", fields.makeFieldListOptionHandler!(size_t, Yes.convertToZeroBasedIndex)); 1393 assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]); 1394 } 1395 { 1396 size_t[] fields; 1397 auto args = ["program", "-f", "0"]; 1398 getopt(args, 1399 "f|fields", fields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1400 assert(fields == [0]); 1401 } 1402 { 1403 size_t[] fields; 1404 auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1"]; 1405 getopt(args, 1406 "f|fields", fields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1407 assert(fields == [0, 1, 0, 0, 1]); 1408 } 1409 { 1410 size_t[] ints; 1411 size_t[] fields; 1412 auto args = ["program", "--ints", "1,2,3", "--fields", "1", "--ints", "4,5,6", "--fields", "2,4,7-9,23-21"]; 1413 std.getopt.arraySep = ","; 1414 getopt(args, 1415 "i|ints", "Built-in list of integers.", &ints, 1416 "f|fields", "Field-list style integers.", fields.makeFieldListOptionHandler); 1417 assert(ints == [1, 2, 3, 4, 5, 6]); 1418 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1419 } 1420 1421 /* Basic cases involved unsinged types smaller than size_t. */ 1422 { 1423 uint[] fields; 1424 auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1", "-f", "55-58"]; 1425 getopt(args, 1426 "f|fields", fields.makeFieldListOptionHandler!(uint, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1427 assert(fields == [0, 1, 0, 0, 1, 55, 56, 57, 58]); 1428 } 1429 { 1430 ushort[] fields; 1431 auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1", "-f", "55-58"]; 1432 getopt(args, 1433 "f|fields", fields.makeFieldListOptionHandler!(ushort, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1434 assert(fields == [0, 1, 0, 0, 1, 55, 56, 57, 58]); 1435 } 1436 1437 /* Basic cases involving unsigned types. */ 1438 { 1439 long[] fields; 1440 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1441 getopt(args, "f|fields", fields.makeFieldListOptionHandler); 1442 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1443 } 1444 { 1445 long[] fields; 1446 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1447 getopt(args, 1448 "f|fields", fields.makeFieldListOptionHandler!(long, Yes.convertToZeroBasedIndex)); 1449 assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]); 1450 } 1451 { 1452 long[] fields; 1453 auto args = ["program", "-f", "0"]; 1454 getopt(args, 1455 "f|fields", fields.makeFieldListOptionHandler!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1456 assert(fields == [-1]); 1457 } 1458 { 1459 int[] fields; 1460 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1461 getopt(args, "f|fields", fields.makeFieldListOptionHandler); 1462 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1463 } 1464 { 1465 int[] fields; 1466 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1467 getopt(args, 1468 "f|fields", fields.makeFieldListOptionHandler!(int, Yes.convertToZeroBasedIndex)); 1469 assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]); 1470 } 1471 { 1472 int[] fields; 1473 auto args = ["program", "-f", "0"]; 1474 getopt(args, 1475 "f|fields", fields.makeFieldListOptionHandler!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1476 assert(fields == [-1]); 1477 } 1478 { 1479 short[] fields; 1480 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1481 getopt(args, "f|fields", fields.makeFieldListOptionHandler); 1482 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1483 } 1484 { 1485 short[] fields; 1486 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1487 getopt(args, 1488 "f|fields", fields.makeFieldListOptionHandler!(short, Yes.convertToZeroBasedIndex)); 1489 assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]); 1490 } 1491 { 1492 short[] fields; 1493 auto args = ["program", "-f", "0"]; 1494 getopt(args, 1495 "f|fields", fields.makeFieldListOptionHandler!(short, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1496 assert(fields == [-1]); 1497 } 1498 1499 { 1500 /* Error cases. */ 1501 size_t[] fields; 1502 auto args = ["program", "-f", "0"]; 1503 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1504 1505 args = ["program", "-f", "-1"]; 1506 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1507 1508 args = ["program", "-f", "--fields", "1"]; 1509 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1510 1511 args = ["program", "-f", "a"]; 1512 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1513 1514 args = ["program", "-f", "1.5"]; 1515 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1516 1517 args = ["program", "-f", "2-"]; 1518 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1519 1520 args = ["program", "-f", "3,5,-7"]; 1521 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1522 1523 args = ["program", "-f", "3,5,"]; 1524 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1525 1526 args = ["program", "-f", "-1"]; 1527 assertThrown(getopt(args, 1528 "f|fields", fields.makeFieldListOptionHandler!( 1529 size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero))); 1530 } 1531 } 1532 1533 /** 1534 parseFieldList lazily generates a range of fields numbers from a 'field-list' string. 1535 */ 1536 auto parseFieldList(T = size_t, 1537 ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex, 1538 AllowFieldNumZero allowZero = No.allowFieldNumZero) 1539 (string fieldList, char delim = ',') 1540 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T)) 1541 { 1542 import std.algorithm : splitter; 1543 1544 auto _splitFieldList = fieldList.splitter(delim); 1545 auto _currFieldParse = 1546 (_splitFieldList.empty ? "" : _splitFieldList.front) 1547 .parseFieldRange!(T, convertToZero, allowZero); 1548 1549 if (!_splitFieldList.empty) _splitFieldList.popFront; 1550 1551 struct Result 1552 { 1553 @property bool empty() pure nothrow @safe @nogc 1554 { 1555 return _currFieldParse.empty; 1556 } 1557 1558 @property T front() pure @safe 1559 { 1560 import std.conv : to; 1561 1562 assert(!empty, "Attempting to fetch the front of an empty field-list."); 1563 assert(!_currFieldParse.empty, "Internal error. Call to front with an empty _currFieldParse."); 1564 1565 return _currFieldParse.front.to!T; 1566 } 1567 1568 void popFront() pure @safe 1569 { 1570 assert(!empty, "Attempting to popFront an empty field-list."); 1571 1572 _currFieldParse.popFront; 1573 if (_currFieldParse.empty && !_splitFieldList.empty) 1574 { 1575 _currFieldParse = _splitFieldList.front.parseFieldRange!(T, convertToZero, allowZero); 1576 _splitFieldList.popFront; 1577 } 1578 } 1579 } 1580 1581 return Result(); 1582 } 1583 1584 @safe unittest 1585 { 1586 import std.algorithm : each, equal; 1587 import std.exception : assertThrown, assertNotThrown; 1588 1589 /* Basic tests. */ 1590 assert("1".parseFieldList.equal([1])); 1591 assert("1,2".parseFieldList.equal([1, 2])); 1592 assert("1,2,3".parseFieldList.equal([1, 2, 3])); 1593 assert("1-2".parseFieldList.equal([1, 2])); 1594 assert("1-2,6-4".parseFieldList.equal([1, 2, 6, 5, 4])); 1595 assert("1-2,1,1-2,2,2-1".parseFieldList.equal([1, 2, 1, 1, 2, 2, 2, 1])); 1596 assert("1-2,5".parseFieldList!size_t.equal([1, 2, 5])); 1597 1598 /* Signed Int tests */ 1599 assert("1".parseFieldList!int.equal([1])); 1600 assert("1,2,3".parseFieldList!int.equal([1, 2, 3])); 1601 assert("1-2".parseFieldList!int.equal([1, 2])); 1602 assert("1-2,6-4".parseFieldList!int.equal([1, 2, 6, 5, 4])); 1603 assert("1-2,5".parseFieldList!int.equal([1, 2, 5])); 1604 1605 /* Convert to zero tests */ 1606 assert("1".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0])); 1607 assert("1,2,3".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 2])); 1608 assert("1-2".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1])); 1609 assert("1-2,6-4".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 5, 4, 3])); 1610 assert("1-2,5".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 4])); 1611 1612 assert("1".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0])); 1613 assert("1,2,3".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 2])); 1614 assert("1-2".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1])); 1615 assert("1-2,6-4".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 5, 4, 3])); 1616 assert("1-2,5".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 4])); 1617 1618 /* Allow zero tests. */ 1619 assert("0".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1620 assert("1,0,3".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 0, 3])); 1621 assert("1-2,5".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 2, 5])); 1622 assert("0".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1623 assert("1,0,3".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 0, 3])); 1624 assert("1-2,5".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 2, 5])); 1625 assert("0".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1])); 1626 assert("1,0,3".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0, -1, 2])); 1627 assert("1-2,5".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0, 1, 4])); 1628 1629 /* Error cases. */ 1630 assertThrown("".parseFieldList.each); 1631 assertThrown(" ".parseFieldList.each); 1632 assertThrown(",".parseFieldList.each); 1633 assertThrown("5 6".parseFieldList.each); 1634 assertThrown(",7".parseFieldList.each); 1635 assertThrown("8,".parseFieldList.each); 1636 assertThrown("8,9,".parseFieldList.each); 1637 assertThrown("10,,11".parseFieldList.each); 1638 assertThrown("".parseFieldList!(long, Yes.convertToZeroBasedIndex).each); 1639 assertThrown("1,2-3,".parseFieldList!(long, Yes.convertToZeroBasedIndex).each); 1640 assertThrown("2-,4".parseFieldList!(long, Yes.convertToZeroBasedIndex).each); 1641 assertThrown("1,2,3,,4".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1642 assertThrown(",7".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1643 assertThrown("8,".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1644 assertThrown("10,0,,11".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1645 assertThrown("8,9,".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1646 1647 assertThrown("0".parseFieldList.each); 1648 assertThrown("1,0,3".parseFieldList.each); 1649 assertThrown("0".parseFieldList!(int, Yes.convertToZeroBasedIndex, No.allowFieldNumZero).each); 1650 assertThrown("1,0,3".parseFieldList!(int, Yes.convertToZeroBasedIndex, No.allowFieldNumZero).each); 1651 assertThrown("0-2,6-0".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1652 assertThrown("0-2,6-0".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1653 assertThrown("0-2,6-0".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1654 } 1655 1656 /* parseFieldRange parses a single number or number range. E.g. '5' or '5-8'. These are 1657 * the values in a field-list separated by a comma or other delimiter. It returns a range 1658 * that iterates over all the values in the range. 1659 */ 1660 private auto parseFieldRange(T = size_t, 1661 ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex, 1662 AllowFieldNumZero allowZero = No.allowFieldNumZero) 1663 (string fieldRange) 1664 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T)) 1665 { 1666 import std.algorithm : findSplit; 1667 import std.conv : to; 1668 import std.format : format; 1669 import std.range : iota; 1670 import std.traits : Signed; 1671 1672 /* Pick the largest compatible integral type for the IOTA range. This must be the 1673 * signed type if convertToZero is true, as a reverse order range may end at -1. 1674 */ 1675 static if (convertToZero) alias S = Signed!T; 1676 else alias S = T; 1677 1678 if (fieldRange.length == 0) throw new Exception("Empty field number."); 1679 1680 auto rangeSplit = findSplit(fieldRange, "-"); 1681 1682 if (!rangeSplit[1].empty && (rangeSplit[0].empty || rangeSplit[2].empty)) 1683 { 1684 // Range starts or ends with a dash. 1685 throw new Exception(format("Incomplete ranges are not supported: '%s'", fieldRange)); 1686 } 1687 1688 S start = rangeSplit[0].to!S; 1689 S last = rangeSplit[1].empty ? start : rangeSplit[2].to!S; 1690 Signed!T increment = (start <= last) ? 1 : -1; 1691 1692 static if (allowZero) 1693 { 1694 if (start == 0 && !rangeSplit[1].empty) 1695 { 1696 throw new Exception(format("Zero cannot be used as part of a range: '%s'", fieldRange)); 1697 } 1698 } 1699 1700 static if (allowZero) 1701 { 1702 if (start < 0 || last < 0) 1703 { 1704 throw new Exception(format("Field numbers must be non-negative integers: '%d'", 1705 (start < 0) ? start : last)); 1706 } 1707 } 1708 else 1709 { 1710 if (start < 1 || last < 1) 1711 { 1712 throw new Exception(format("Field numbers must be greater than zero: '%d'", 1713 (start < 1) ? start : last)); 1714 } 1715 } 1716 1717 static if (convertToZero) 1718 { 1719 start--; 1720 last--; 1721 } 1722 1723 return iota(start, last + increment, increment); 1724 } 1725 1726 @safe unittest // parseFieldRange 1727 { 1728 import std.algorithm : equal; 1729 import std.exception : assertThrown, assertNotThrown; 1730 1731 /* Basic cases */ 1732 assert(parseFieldRange("1").equal([1])); 1733 assert("2".parseFieldRange.equal([2])); 1734 assert("3-4".parseFieldRange.equal([3, 4])); 1735 assert("3-5".parseFieldRange.equal([3, 4, 5])); 1736 assert("4-3".parseFieldRange.equal([4, 3])); 1737 assert("10-1".parseFieldRange.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1738 1739 /* Convert to zero-based indices */ 1740 assert(parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)("1").equal([0])); 1741 assert("2".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([1])); 1742 assert("3-4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([2, 3])); 1743 assert("3-5".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([2, 3, 4])); 1744 assert("4-3".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([3, 2])); 1745 assert("10-1".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])); 1746 1747 /* Allow zero. */ 1748 assert("0".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1749 assert(parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)("1").equal([1])); 1750 assert("3-4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([3, 4])); 1751 assert("10-1".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1752 1753 /* Allow zero, convert to zero-based index. */ 1754 assert("0".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1])); 1755 assert(parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)("1").equal([0])); 1756 assert("3-4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([2, 3])); 1757 assert("10-1".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])); 1758 1759 /* Alternate integer types. */ 1760 assert("2".parseFieldRange!uint.equal([2])); 1761 assert("3-5".parseFieldRange!uint.equal([3, 4, 5])); 1762 assert("10-1".parseFieldRange!uint.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1763 assert("2".parseFieldRange!int.equal([2])); 1764 assert("3-5".parseFieldRange!int.equal([3, 4, 5])); 1765 assert("10-1".parseFieldRange!int.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1766 assert("2".parseFieldRange!ushort.equal([2])); 1767 assert("3-5".parseFieldRange!ushort.equal([3, 4, 5])); 1768 assert("10-1".parseFieldRange!ushort.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1769 assert("2".parseFieldRange!short.equal([2])); 1770 assert("3-5".parseFieldRange!short.equal([3, 4, 5])); 1771 assert("10-1".parseFieldRange!short.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1772 1773 assert("0".parseFieldRange!(long, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1774 assert("0".parseFieldRange!(uint, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1775 assert("0".parseFieldRange!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1776 assert("0".parseFieldRange!(ushort, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1777 assert("0".parseFieldRange!(short, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1778 assert("0".parseFieldRange!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1])); 1779 assert("0".parseFieldRange!(short, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1])); 1780 1781 /* Max field value cases. */ 1782 assert("65535".parseFieldRange!ushort.equal([65535])); // ushort max 1783 assert("65533-65535".parseFieldRange!ushort.equal([65533, 65534, 65535])); 1784 assert("32767".parseFieldRange!short.equal([32767])); // short max 1785 assert("32765-32767".parseFieldRange!short.equal([32765, 32766, 32767])); 1786 assert("32767".parseFieldRange!(short, Yes.convertToZeroBasedIndex).equal([32766])); 1787 1788 /* Error cases. */ 1789 assertThrown("".parseFieldRange); 1790 assertThrown(" ".parseFieldRange); 1791 assertThrown("-".parseFieldRange); 1792 assertThrown(" -".parseFieldRange); 1793 assertThrown("- ".parseFieldRange); 1794 assertThrown("1-".parseFieldRange); 1795 assertThrown("-2".parseFieldRange); 1796 assertThrown("-1".parseFieldRange); 1797 assertThrown("1.0".parseFieldRange); 1798 assertThrown("0".parseFieldRange); 1799 assertThrown("0-3".parseFieldRange); 1800 assertThrown("-2-4".parseFieldRange); 1801 assertThrown("2--4".parseFieldRange); 1802 assertThrown("2-".parseFieldRange); 1803 assertThrown("a".parseFieldRange); 1804 assertThrown("0x3".parseFieldRange); 1805 assertThrown("3U".parseFieldRange); 1806 assertThrown("1_000".parseFieldRange); 1807 assertThrown(".".parseFieldRange); 1808 1809 assertThrown("".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1810 assertThrown(" ".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1811 assertThrown("-".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1812 assertThrown("1-".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1813 assertThrown("-2".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1814 assertThrown("-1".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1815 assertThrown("0".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1816 assertThrown("0-3".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1817 assertThrown("-2-4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1818 assertThrown("2--4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1819 1820 assertThrown("".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1821 assertThrown(" ".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1822 assertThrown("-".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1823 assertThrown("1-".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1824 assertThrown("-2".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1825 assertThrown("-1".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1826 assertThrown("0-3".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1827 assertThrown("-2-4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1828 1829 assertThrown("".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1830 assertThrown(" ".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1831 assertThrown("-".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1832 assertThrown("1-".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1833 assertThrown("-2".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1834 assertThrown("-1".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1835 assertThrown("0-3".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1836 assertThrown("-2-4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1837 1838 /* Value out of range cases. */ 1839 assertThrown("65536".parseFieldRange!ushort); // One more than ushort max. 1840 assertThrown("65535-65536".parseFieldRange!ushort); 1841 assertThrown("32768".parseFieldRange!short); // One more than short max. 1842 assertThrown("32765-32768".parseFieldRange!short); 1843 // Convert to zero limits signed range. 1844 assertThrown("32768".parseFieldRange!(ushort, Yes.convertToZeroBasedIndex)); 1845 assert("32767".parseFieldRange!(ushort, Yes.convertToZeroBasedIndex).equal([32766])); 1846 } 1847 1848 /** [Yes|No.newlineWasRemoved] is a template parameter to throwIfWindowsNewlineOnUnix. 1849 * A Yes value indicates the Unix newline was already removed, as might be done via 1850 * std.File.byLine or similar mechanism. 1851 */ 1852 alias NewlineWasRemoved = Flag!"newlineWasRemoved"; 1853 1854 /** 1855 throwIfWindowsLineNewlineOnUnix is used to throw an exception if a Windows/DOS 1856 line ending is found on a build compiled for a Unix platform. This is used by 1857 the TSV Utilities to detect Window/DOS line endings and terminate processing 1858 with an error message to the user. 1859 */ 1860 void throwIfWindowsNewlineOnUnix 1861 (NewlineWasRemoved nlWasRemoved = Yes.newlineWasRemoved) 1862 (const char[] line, const char[] filename, size_t lineNum) 1863 { 1864 version(Posix) 1865 { 1866 static if (nlWasRemoved) 1867 { 1868 immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r'; 1869 } 1870 else 1871 { 1872 immutable bool hasWindowsLineEnding = 1873 line.length > 1 && 1874 line[$ - 2] == '\r' && 1875 line[$ - 1] == '\n'; 1876 } 1877 1878 if (hasWindowsLineEnding) 1879 { 1880 import std.format; 1881 throw new Exception( 1882 format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n File: %s, Line: %s", 1883 (filename == "-") ? "Standard Input" : filename, lineNum)); 1884 } 1885 } 1886 } 1887 1888 @safe unittest 1889 { 1890 /* Note: Currently only building on Posix. Need to add non-Posix test cases 1891 * if Windows builds are ever done. 1892 */ 1893 version(Posix) 1894 { 1895 import std.exception; 1896 1897 assertNotThrown(throwIfWindowsNewlineOnUnix("", "afile.tsv", 1)); 1898 assertNotThrown(throwIfWindowsNewlineOnUnix("a", "afile.tsv", 2)); 1899 assertNotThrown(throwIfWindowsNewlineOnUnix("ab", "afile.tsv", 3)); 1900 assertNotThrown(throwIfWindowsNewlineOnUnix("abc", "afile.tsv", 4)); 1901 1902 assertThrown(throwIfWindowsNewlineOnUnix("\r", "afile.tsv", 1)); 1903 assertThrown(throwIfWindowsNewlineOnUnix("a\r", "afile.tsv", 2)); 1904 assertThrown(throwIfWindowsNewlineOnUnix("ab\r", "afile.tsv", 3)); 1905 assertThrown(throwIfWindowsNewlineOnUnix("abc\r", "afile.tsv", 4)); 1906 1907 assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\n", "afile.tsv", 1)); 1908 assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\n", "afile.tsv", 2)); 1909 assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3)); 1910 assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4)); 1911 1912 assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5)); 1913 assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6)); 1914 assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7)); 1915 assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8)); 1916 1917 /* Standard Input formatting. */ 1918 import std.algorithm : endsWith; 1919 bool exceptionCaught = false; 1920 1921 try (throwIfWindowsNewlineOnUnix("\r", "-", 99)); 1922 catch (Exception e) 1923 { 1924 assert(e.msg.endsWith("File: Standard Input, Line: 99")); 1925 exceptionCaught = true; 1926 } 1927 finally 1928 { 1929 assert(exceptionCaught); 1930 exceptionCaught = false; 1931 } 1932 1933 try (throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "-", 99)); 1934 catch (Exception e) 1935 { 1936 assert(e.msg.endsWith("File: Standard Input, Line: 99")); 1937 exceptionCaught = true; 1938 } 1939 finally 1940 { 1941 assert(exceptionCaught); 1942 exceptionCaught = false; 1943 } 1944 } 1945 }