1 /** 2 Utilities used by tsv-utils applications. InputFieldReordering, BufferedOututRange, 3 and a several others. 4 5 Utilities in this file: 6 $(LIST 7 * [InputFieldReordering] - A class that creates a reordered subset of fields from 8 an input line. Fields in the subset are accessed by array indicies. This is 9 especially useful when processing the subset in a specific order, such as the 10 order listed on the command-line at run-time. 11 12 * [BufferedOutputRange] - An OutputRange with an internal buffer used to buffer 13 output. Intended for use with stdout, it is a significant performance benefit. 14 15 * [bufferedByLine] - An input range that reads from a File handle line by line. 16 It is similar to the standard library method std.stdio.File.byLine, but quite a 17 bit faster. This is achieved by reading in larger blocks and buffering. 18 19 * [joinAppend] - A function that performs a join, but appending the join output to 20 an output stream. It is a performance improvement over using join or joiner with 21 writeln. 22 23 * [getTsvFieldValue] - A convenience function when only a single value is needed from 24 an input line. 25 26 * Field-lists: [parseFieldList], [makeFieldListOptionHandler] - Helper functions for 27 parsing field-lists entered on the command line. 28 29 * [throwIfWindowsNewlineOnUnix] - A utility for Unix platform builds to detecting 30 Windows newlines in input. 31 ) 32 33 Copyright (c) 2015-2020, eBay Inc. 34 Initially written by Jon Degenhardt 35 36 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt) 37 */ 38 39 module tsv_utils.common.utils; 40 41 import std.range; 42 import std.traits : isIntegral, isSomeChar, isSomeString, isUnsigned; 43 import std.typecons : Flag, No, Yes; 44 45 // InputFieldReording class. 46 47 /** Flag used by the InputFieldReordering template. */ 48 alias EnablePartialLines = Flag!"enablePartialLines"; 49 50 /** 51 InputFieldReordering - Move select fields from an input line to an output array, 52 reordering along the way. 53 54 The InputFieldReordering class is used to reorder a subset of fields from an input line. 55 The caller instantiates an InputFieldReordering object at the start of input processing. 56 The instance contains a mapping from input index to output index, plus a buffer holding 57 the reordered fields. The caller processes each input line by calling initNewLine, 58 splitting the line into fields, and calling processNextField on each field. The output 59 buffer is ready when the allFieldsFilled method returns true. 60 61 Fields are not copied, instead the output buffer points to the fields passed by the caller. 62 The caller needs to use or copy the output buffer while the fields are still valid, which 63 is normally until reading the next input line. The program below illustrates the basic use 64 case. It reads stdin and outputs fields [3, 0, 2], in that order. (See also joinAppend, 65 below, which has a performance improvement over join used here.) 66 67 --- 68 int main(string[] args) 69 { 70 import tsv_utils.common.utils; 71 import std.algorithm, std.array, std.range, std.stdio; 72 size_t[] fieldIndicies = [3, 0, 2]; 73 auto fieldReordering = new InputFieldReordering!char(fieldIndicies); 74 foreach (line; stdin.byLine) 75 { 76 fieldReordering.initNewLine; 77 foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate) 78 { 79 fieldReordering.processNextField(fieldIndex, fieldValue); 80 if (fieldReordering.allFieldsFilled) break; 81 } 82 if (fieldReordering.allFieldsFilled) 83 { 84 writeln(fieldReordering.outputFields.join('\t')); 85 } 86 else 87 { 88 writeln("Error: Insufficient number of field on the line."); 89 } 90 } 91 return 0; 92 } 93 --- 94 95 Field indicies are zero-based. An individual field can be listed multiple times. The 96 outputFields array is not valid until all the specified fields have been processed. The 97 allFieldsFilled method tests this. If a line does not have enough fields the outputFields 98 buffer cannot be used. For most TSV applications this is okay, as it means the line is 99 invalid and cannot be used. However, if partial lines are okay, the template can be 100 instantiated with EnablePartialLines.yes. This will ensure that any fields not filled-in 101 are empty strings in the outputFields return. 102 */ 103 final class InputFieldReordering(C, EnablePartialLines partialLinesOk = EnablePartialLines.no) 104 if (isSomeChar!C) 105 { 106 /* Implementation: The class works by creating an array of tuples mapping the input 107 * field index to the location in the outputFields array. The 'fromToMap' array is 108 * sorted in input field order, enabling placement in the outputFields buffer during a 109 * pass over the input fields. The map is created by the constructor. An example: 110 * 111 * inputFieldIndicies: [3, 0, 7, 7, 1, 0, 9] 112 * fromToMap: [<0,1>, <0,5>, <1,4>, <3,0>, <7,2>, <7,3>, <9,6>] 113 * 114 * During processing of an a line, an array slice, mapStack, is used to track how 115 * much of the fromToMap remains to be processed. 116 */ 117 import std.range; 118 import std.typecons : Tuple; 119 120 alias TupleFromTo = Tuple!(size_t, "from", size_t, "to"); 121 122 private C[][] outputFieldsBuf; 123 private TupleFromTo[] fromToMap; 124 private TupleFromTo[] mapStack; 125 126 final this(const ref size_t[] inputFieldIndicies, size_t start = 0) pure nothrow @safe 127 { 128 import std.algorithm : sort; 129 130 outputFieldsBuf = new C[][](inputFieldIndicies.length); 131 fromToMap.reserve(inputFieldIndicies.length); 132 133 foreach (to, from; inputFieldIndicies.enumerate(start)) 134 { 135 fromToMap ~= TupleFromTo(from, to); 136 } 137 138 sort(fromToMap); 139 initNewLine; 140 } 141 142 /** initNewLine initializes the object for a new line. */ 143 final void initNewLine() pure nothrow @safe 144 { 145 mapStack = fromToMap; 146 static if (partialLinesOk) 147 { 148 import std.algorithm : each; 149 outputFieldsBuf.each!((ref s) => s.length = 0); 150 } 151 } 152 153 /** processNextField maps an input field to the correct locations in the outputFields 154 * array. It should be called once for each field on the line, in the order found. 155 */ 156 final size_t processNextField(size_t fieldIndex, C[] fieldValue) pure nothrow @safe @nogc 157 { 158 size_t numFilled = 0; 159 while (!mapStack.empty && fieldIndex == mapStack.front.from) 160 { 161 outputFieldsBuf[mapStack.front.to] = fieldValue; 162 mapStack.popFront; 163 numFilled++; 164 } 165 return numFilled; 166 } 167 168 /** allFieldsFilled returned true if all fields expected have been processed. */ 169 final bool allFieldsFilled() const pure nothrow @safe @nogc 170 { 171 return mapStack.empty; 172 } 173 174 /** outputFields is the assembled output fields. Unless partial lines are enabled, 175 * it is only valid after allFieldsFilled is true. 176 */ 177 final C[][] outputFields() pure nothrow @safe @nogc 178 { 179 return outputFieldsBuf[]; 180 } 181 } 182 183 /* Tests using different character types. */ 184 @safe unittest 185 { 186 import std.conv : to; 187 188 auto inputLines = [["r1f0", "r1f1", "r1f2", "r1f3"], 189 ["r2f0", "abc", "ÀBCßßZ", "ghi"], 190 ["r3f0", "123", "456", "789"]]; 191 192 size_t[] fields_2_0 = [2, 0]; 193 194 auto expected_2_0 = [["r1f2", "r1f0"], 195 ["ÀBCßßZ", "r2f0"], 196 ["456", "r3f0"]]; 197 198 char[][][] charExpected_2_0 = to!(char[][][])(expected_2_0); 199 wchar[][][] wcharExpected_2_0 = to!(wchar[][][])(expected_2_0); 200 dchar[][][] dcharExpected_2_0 = to!(dchar[][][])(expected_2_0); 201 dstring[][] dstringExpected_2_0 = to!(dstring[][])(expected_2_0); 202 203 auto charIFR = new InputFieldReordering!char(fields_2_0); 204 auto wcharIFR = new InputFieldReordering!wchar(fields_2_0); 205 auto dcharIFR = new InputFieldReordering!dchar(fields_2_0); 206 207 foreach (lineIndex, line; inputLines) 208 { 209 charIFR.initNewLine; 210 wcharIFR.initNewLine; 211 dcharIFR.initNewLine; 212 213 foreach (fieldIndex, fieldValue; line) 214 { 215 charIFR.processNextField(fieldIndex, to!(char[])(fieldValue)); 216 wcharIFR.processNextField(fieldIndex, to!(wchar[])(fieldValue)); 217 dcharIFR.processNextField(fieldIndex, to!(dchar[])(fieldValue)); 218 219 assert ((fieldIndex >= 2) == charIFR.allFieldsFilled); 220 assert ((fieldIndex >= 2) == wcharIFR.allFieldsFilled); 221 assert ((fieldIndex >= 2) == dcharIFR.allFieldsFilled); 222 } 223 assert(charIFR.allFieldsFilled); 224 assert(wcharIFR.allFieldsFilled); 225 assert(dcharIFR.allFieldsFilled); 226 227 assert(charIFR.outputFields == charExpected_2_0[lineIndex]); 228 assert(wcharIFR.outputFields == wcharExpected_2_0[lineIndex]); 229 assert(dcharIFR.outputFields == dcharExpected_2_0[lineIndex]); 230 } 231 } 232 233 /* Test of partial line support. */ 234 @safe unittest 235 { 236 import std.conv : to; 237 238 auto inputLines = [["r1f0", "r1f1", "r1f2", "r1f3"], 239 ["r2f0", "abc", "ÀBCßßZ", "ghi"], 240 ["r3f0", "123", "456", "789"]]; 241 242 size_t[] fields_2_0 = [2, 0]; 243 244 // The expected states of the output field while each line and field are processed. 245 auto expectedBylineByfield_2_0 = 246 [ 247 [["", "r1f0"], ["", "r1f0"], ["r1f2", "r1f0"], ["r1f2", "r1f0"]], 248 [["", "r2f0"], ["", "r2f0"], ["ÀBCßßZ", "r2f0"], ["ÀBCßßZ", "r2f0"]], 249 [["", "r3f0"], ["", "r3f0"], ["456", "r3f0"], ["456", "r3f0"]], 250 ]; 251 252 char[][][][] charExpectedBylineByfield_2_0 = to!(char[][][][])(expectedBylineByfield_2_0); 253 254 auto charIFR = new InputFieldReordering!(char, EnablePartialLines.yes)(fields_2_0); 255 256 foreach (lineIndex, line; inputLines) 257 { 258 charIFR.initNewLine; 259 foreach (fieldIndex, fieldValue; line) 260 { 261 charIFR.processNextField(fieldIndex, to!(char[])(fieldValue)); 262 assert(charIFR.outputFields == charExpectedBylineByfield_2_0[lineIndex][fieldIndex]); 263 } 264 } 265 } 266 267 /* Field combination tests. */ 268 @safe unittest 269 { 270 import std.conv : to; 271 import std.stdio; 272 273 auto inputLines = [["00", "01", "02", "03"], 274 ["10", "11", "12", "13"], 275 ["20", "21", "22", "23"]]; 276 277 size_t[] fields_0 = [0]; 278 size_t[] fields_3 = [3]; 279 size_t[] fields_01 = [0, 1]; 280 size_t[] fields_10 = [1, 0]; 281 size_t[] fields_03 = [0, 3]; 282 size_t[] fields_30 = [3, 0]; 283 size_t[] fields_0123 = [0, 1, 2, 3]; 284 size_t[] fields_3210 = [3, 2, 1, 0]; 285 size_t[] fields_03001 = [0, 3, 0, 0, 1]; 286 287 auto expected_0 = to!(char[][][])([["00"], 288 ["10"], 289 ["20"]]); 290 291 auto expected_3 = to!(char[][][])([["03"], 292 ["13"], 293 ["23"]]); 294 295 auto expected_01 = to!(char[][][])([["00", "01"], 296 ["10", "11"], 297 ["20", "21"]]); 298 299 auto expected_10 = to!(char[][][])([["01", "00"], 300 ["11", "10"], 301 ["21", "20"]]); 302 303 auto expected_03 = to!(char[][][])([["00", "03"], 304 ["10", "13"], 305 ["20", "23"]]); 306 307 auto expected_30 = to!(char[][][])([["03", "00"], 308 ["13", "10"], 309 ["23", "20"]]); 310 311 auto expected_0123 = to!(char[][][])([["00", "01", "02", "03"], 312 ["10", "11", "12", "13"], 313 ["20", "21", "22", "23"]]); 314 315 auto expected_3210 = to!(char[][][])([["03", "02", "01", "00"], 316 ["13", "12", "11", "10"], 317 ["23", "22", "21", "20"]]); 318 319 auto expected_03001 = to!(char[][][])([["00", "03", "00", "00", "01"], 320 ["10", "13", "10", "10", "11"], 321 ["20", "23", "20", "20", "21"]]); 322 323 auto ifr_0 = new InputFieldReordering!char(fields_0); 324 auto ifr_3 = new InputFieldReordering!char(fields_3); 325 auto ifr_01 = new InputFieldReordering!char(fields_01); 326 auto ifr_10 = new InputFieldReordering!char(fields_10); 327 auto ifr_03 = new InputFieldReordering!char(fields_03); 328 auto ifr_30 = new InputFieldReordering!char(fields_30); 329 auto ifr_0123 = new InputFieldReordering!char(fields_0123); 330 auto ifr_3210 = new InputFieldReordering!char(fields_3210); 331 auto ifr_03001 = new InputFieldReordering!char(fields_03001); 332 333 foreach (lineIndex, line; inputLines) 334 { 335 ifr_0.initNewLine; 336 ifr_3.initNewLine; 337 ifr_01.initNewLine; 338 ifr_10.initNewLine; 339 ifr_03.initNewLine; 340 ifr_30.initNewLine; 341 ifr_0123.initNewLine; 342 ifr_3210.initNewLine; 343 ifr_03001.initNewLine; 344 345 foreach (fieldIndex, fieldValue; line) 346 { 347 ifr_0.processNextField(fieldIndex, to!(char[])(fieldValue)); 348 ifr_3.processNextField(fieldIndex, to!(char[])(fieldValue)); 349 ifr_01.processNextField(fieldIndex, to!(char[])(fieldValue)); 350 ifr_10.processNextField(fieldIndex, to!(char[])(fieldValue)); 351 ifr_03.processNextField(fieldIndex, to!(char[])(fieldValue)); 352 ifr_30.processNextField(fieldIndex, to!(char[])(fieldValue)); 353 ifr_0123.processNextField(fieldIndex, to!(char[])(fieldValue)); 354 ifr_3210.processNextField(fieldIndex, to!(char[])(fieldValue)); 355 ifr_03001.processNextField(fieldIndex, to!(char[])(fieldValue)); 356 } 357 358 assert(ifr_0.outputFields == expected_0[lineIndex]); 359 assert(ifr_3.outputFields == expected_3[lineIndex]); 360 assert(ifr_01.outputFields == expected_01[lineIndex]); 361 assert(ifr_10.outputFields == expected_10[lineIndex]); 362 assert(ifr_03.outputFields == expected_03[lineIndex]); 363 assert(ifr_30.outputFields == expected_30[lineIndex]); 364 assert(ifr_0123.outputFields == expected_0123[lineIndex]); 365 assert(ifr_3210.outputFields == expected_3210[lineIndex]); 366 assert(ifr_03001.outputFields == expected_03001[lineIndex]); 367 } 368 } 369 370 371 import std.stdio : File, isFileHandle, KeepTerminator; 372 import std.range : isOutputRange; 373 import std.traits : Unqual; 374 375 /** 376 BufferedOutputRange is a performance enhancement over writing directly to an output 377 stream. It holds a File open for write or an OutputRange. Ouput is accumulated in an 378 internal buffer and written to the output stream as a block. 379 380 Writing to stdout is a key use case. BufferedOutputRange is often dramatically faster 381 than writing to stdout directly. This is especially noticable for outputs with short 382 lines, as it blocks many writes together in a single write. 383 384 The internal buffer is written to the output stream after flushSize has been reached. 385 This is checked at newline boundaries, when appendln is called or when put is called 386 with a single newline character. Other writes check maxSize, which is used to avoid 387 runaway buffers. 388 389 390 BufferedOutputRange has a put method allowing it to be used a range. It has a number 391 of other methods providing additional control. 392 393 $(LIST 394 * `this(outputStream [, flushSize, reserveSize, maxSize])` - Constructor. Takes the 395 output stream, e.g. stdout. Other arguments are optional, defaults normally suffice. 396 397 * `append(stuff)` - Append to the internal buffer. 398 399 * `appendln(stuff)` - Append to the internal buffer, followed by a newline. The buffer 400 is flushed to the output stream if is has reached flushSize. 401 402 * `appendln()` - Append a newline to the internal buffer. The buffer is flushed to the 403 output stream if is has reached flushSize. 404 405 * `joinAppend(inputRange, delim)` - An optimization of `append(inputRange.joiner(delim))`. 406 For reasons that are not clear, joiner is quite slow. 407 408 * `flushIfFull()` - Flush the internal buffer to the output stream if flushSize has been 409 reached. 410 411 * `flush()` - Write the internal buffer to the output stream. 412 413 * `put(stuff)` - Appends to the internal buffer. Acts as `appendln()` if passed a single 414 newline character, '\n' or "\n". 415 ) 416 417 The internal buffer is automatically flushed when the BufferedOutputRange goes out of 418 scope. 419 */ 420 struct BufferedOutputRange(OutputTarget) 421 if (isFileHandle!(Unqual!OutputTarget) || isOutputRange!(Unqual!OutputTarget, char)) 422 { 423 import std.range : isOutputRange; 424 import std.array : appender; 425 import std.format : format; 426 427 /* Identify the output element type. Only supporting char and ubyte for now. */ 428 static if (isFileHandle!OutputTarget || isOutputRange!(OutputTarget, char)) 429 { 430 alias C = char; 431 } 432 else static if (isOutputRange!(OutputTarget, ubyte)) 433 { 434 alias C = ubyte; 435 } 436 else static assert(false); 437 438 private enum defaultReserveSize = 11264; 439 private enum defaultFlushSize = 10240; 440 private enum defaultMaxSize = 4194304; 441 442 private OutputTarget _outputTarget; 443 private auto _outputBuffer = appender!(C[]); 444 private immutable size_t _flushSize; 445 private immutable size_t _maxSize; 446 447 this(OutputTarget outputTarget, 448 size_t flushSize = defaultFlushSize, 449 size_t reserveSize = defaultReserveSize, 450 size_t maxSize = defaultMaxSize) 451 @safe 452 { 453 assert(flushSize <= maxSize); 454 455 _outputTarget = outputTarget; 456 _flushSize = flushSize; 457 _maxSize = (flushSize <= maxSize) ? maxSize : flushSize; 458 _outputBuffer.reserve(reserveSize); 459 } 460 461 ~this() @safe 462 { 463 flush(); 464 } 465 466 void flush() @safe 467 { 468 static if (isFileHandle!OutputTarget) _outputTarget.write(_outputBuffer.data); 469 else _outputTarget.put(_outputBuffer.data); 470 471 _outputBuffer.clear; 472 } 473 474 bool flushIfFull() @safe 475 { 476 bool isFull = _outputBuffer.data.length >= _flushSize; 477 if (isFull) flush(); 478 return isFull; 479 } 480 481 /* flushIfMaxSize is a safety check to avoid runaway buffer growth. */ 482 void flushIfMaxSize() @safe 483 { 484 if (_outputBuffer.data.length >= _maxSize) flush(); 485 } 486 487 /* maybeFlush is intended for the case where put is called with a trailing newline. 488 * 489 * Flushing occurs if the buffer has a trailing newline and has reached flush size. 490 * Flushing also occurs if the buffer has reached max size. 491 */ 492 private bool maybeFlush() @safe 493 { 494 immutable bool doFlush = 495 _outputBuffer.data.length >= _flushSize && 496 (_outputBuffer.data[$-1] == '\n' || _outputBuffer.data.length >= _maxSize); 497 498 if (doFlush) flush(); 499 return doFlush; 500 } 501 502 503 private void appendRaw(T)(T stuff) pure @safe 504 { 505 import std.range : rangePut = put; 506 rangePut(_outputBuffer, stuff); 507 } 508 509 void append(T)(T stuff) @safe 510 { 511 appendRaw(stuff); 512 maybeFlush(); 513 } 514 515 bool appendln() @safe 516 { 517 appendRaw('\n'); 518 return flushIfFull(); 519 } 520 521 bool appendln(T)(T stuff) 522 { 523 appendRaw(stuff); 524 return appendln(); 525 } 526 527 /* joinAppend is an optimization of append(inputRange.joiner(delimiter). 528 * This form is quite a bit faster, 40%+ on some benchmarks. 529 */ 530 void joinAppend(InputRange, E)(InputRange inputRange, E delimiter) 531 if (isInputRange!InputRange && 532 is(ElementType!InputRange : const C[]) && 533 (is(E : const C[]) || is(E : const C))) 534 { 535 if (!inputRange.empty) 536 { 537 appendRaw(inputRange.front); 538 inputRange.popFront; 539 } 540 foreach (x; inputRange) 541 { 542 appendRaw(delimiter); 543 appendRaw(x); 544 } 545 flushIfMaxSize(); 546 } 547 548 /* Make this an output range. */ 549 void put(T)(T stuff) 550 { 551 import std.traits; 552 import std.stdio; 553 554 static if (isSomeChar!T) 555 { 556 if (stuff == '\n') appendln(); 557 else appendRaw(stuff); 558 } 559 else static if (isSomeString!T) 560 { 561 if (stuff == "\n") appendln(); 562 else append(stuff); 563 } 564 else append(stuff); 565 } 566 } 567 568 unittest 569 { 570 import tsv_utils.common.unittest_utils; 571 import std.file : rmdirRecurse, readText; 572 import std.path : buildPath; 573 574 auto testDir = makeUnittestTempDir("tsv_utils_buffered_output"); 575 scope(exit) testDir.rmdirRecurse; 576 577 import std.algorithm : map, joiner; 578 import std.range : iota; 579 import std.conv : to; 580 581 /* Basic test. Note that exiting the scope triggers flush. */ 582 string filepath1 = buildPath(testDir, "file1.txt"); 583 { 584 import std.stdio : File; 585 586 auto ostream = BufferedOutputRange!File(filepath1.File("w")); 587 ostream.append("file1: "); 588 ostream.append("abc"); 589 ostream.append(["def", "ghi", "jkl"]); 590 ostream.appendln(100.to!string); 591 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 592 ostream.appendln(); 593 } 594 assert(filepath1.readText == "file1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 595 596 /* Test with no reserve and no flush at every line. */ 597 string filepath2 = buildPath(testDir, "file2.txt"); 598 { 599 import std.stdio : File; 600 601 auto ostream = BufferedOutputRange!File(filepath2.File("w"), 0, 0); 602 ostream.append("file2: "); 603 ostream.append("abc"); 604 ostream.append(["def", "ghi", "jkl"]); 605 ostream.appendln("100"); 606 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 607 ostream.appendln(); 608 } 609 assert(filepath2.readText == "file2: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 610 611 /* With a locking text writer. Requires version 2.078.0 612 See: https://issues.dlang.org/show_bug.cgi?id=9661 613 */ 614 static if (__VERSION__ >= 2078) 615 { 616 string filepath3 = buildPath(testDir, "file3.txt"); 617 { 618 import std.stdio : File; 619 620 auto ltw = filepath3.File("w").lockingTextWriter; 621 { 622 auto ostream = BufferedOutputRange!(typeof(ltw))(ltw); 623 ostream.append("file3: "); 624 ostream.append("abc"); 625 ostream.append(["def", "ghi", "jkl"]); 626 ostream.appendln("100"); 627 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 628 ostream.appendln(); 629 } 630 } 631 assert(filepath3.readText == "file3: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 632 } 633 634 /* With an Appender. */ 635 import std.array : appender; 636 auto app1 = appender!(char[]); 637 { 638 auto ostream = BufferedOutputRange!(typeof(app1))(app1); 639 ostream.append("appender1: "); 640 ostream.append("abc"); 641 ostream.append(["def", "ghi", "jkl"]); 642 ostream.appendln("100"); 643 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 644 ostream.appendln(); 645 } 646 assert(app1.data == "appender1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 647 648 /* With an Appender, but checking flush boundaries. */ 649 auto app2 = appender!(char[]); 650 { 651 auto ostream = BufferedOutputRange!(typeof(app2))(app2, 10, 0); // Flush if 10+ 652 bool wasFlushed = false; 653 654 assert(app2.data == ""); 655 656 ostream.append("12345678"); // Not flushed yet. 657 assert(app2.data == ""); 658 659 wasFlushed = ostream.appendln; // Nineth char, not flushed yet. 660 assert(!wasFlushed); 661 assert(app2.data == ""); 662 663 wasFlushed = ostream.appendln; // Tenth char, now flushed. 664 assert(wasFlushed); 665 assert(app2.data == "12345678\n\n"); 666 667 app2.clear; 668 assert(app2.data == ""); 669 670 ostream.append("12345678"); 671 672 wasFlushed = ostream.flushIfFull; 673 assert(!wasFlushed); 674 assert(app2.data == ""); 675 676 ostream.flush; 677 assert(app2.data == "12345678"); 678 679 app2.clear; 680 assert(app2.data == ""); 681 682 ostream.append("123456789012345"); 683 assert(app2.data == ""); 684 } 685 assert(app2.data == "123456789012345"); 686 687 /* Using joinAppend. */ 688 auto app1b = appender!(char[]); 689 { 690 auto ostream = BufferedOutputRange!(typeof(app1b))(app1b); 691 ostream.append("appenderB: "); 692 ostream.joinAppend(["a", "bc", "def"], '-'); 693 ostream.append(':'); 694 ostream.joinAppend(["g", "hi", "jkl"], '-'); 695 ostream.appendln("*100*"); 696 ostream.joinAppend(iota(0, 6).map!(x => x.to!string), ' '); 697 ostream.append(' '); 698 ostream.joinAppend(iota(6, 10).map!(x => x.to!string), " "); 699 ostream.appendln(); 700 } 701 assert(app1b.data == "appenderB: a-bc-def:g-hi-jkl*100*\n0 1 2 3 4 5 6 7 8 9\n", 702 "app1b.data: |" ~app1b.data ~ "|"); 703 704 /* Operating as an output range. When passed to a function as a ref, exiting 705 * the function does not flush. When passed as a value, it get flushed when 706 * the function returns. Also test both UCFS and non-UFCS styles. 707 */ 708 709 void outputStuffAsRef(T)(ref T range) 710 if (isOutputRange!(T, char)) 711 { 712 range.put('1'); 713 put(range, "23"); 714 range.put('\n'); 715 range.put(["5", "67"]); 716 put(range, iota(8, 10).map!(x => x.to!string)); 717 put(range, "\n"); 718 } 719 720 void outputStuffAsVal(T)(T range) 721 if (isOutputRange!(T, char)) 722 { 723 put(range, '1'); 724 range.put("23"); 725 put(range, '\n'); 726 put(range, ["5", "67"]); 727 range.put(iota(8, 10).map!(x => x.to!string)); 728 range.put("\n"); 729 } 730 731 auto app3 = appender!(char[]); 732 { 733 auto ostream = BufferedOutputRange!(typeof(app3))(app3, 12, 0); 734 outputStuffAsRef(ostream); 735 assert(app3.data == "", "app3.data: |" ~app3.data ~ "|"); 736 outputStuffAsRef(ostream); 737 assert(app3.data == "123\n56789\n123\n", "app3.data: |" ~app3.data ~ "|"); 738 } 739 assert(app3.data == "123\n56789\n123\n56789\n", "app3.data: |" ~app3.data ~ "|"); 740 741 auto app4 = appender!(char[]); 742 { 743 auto ostream = BufferedOutputRange!(typeof(app4))(app4, 12, 0); 744 outputStuffAsVal(ostream); 745 assert(app4.data == "123\n56789\n", "app4.data: |" ~app4.data ~ "|"); 746 outputStuffAsVal(ostream); 747 assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|"); 748 } 749 assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|"); 750 751 /* Test maxSize. */ 752 auto app5 = appender!(char[]); 753 { 754 auto ostream = BufferedOutputRange!(typeof(app5))(app5, 5, 0, 10); // maxSize 10 755 assert(app5.data == ""); 756 757 ostream.append("1234567"); // Not flushed yet (no newline). 758 assert(app5.data == ""); 759 760 ostream.append("89012"); // Flushed by maxSize 761 assert(app5.data == "123456789012"); 762 763 ostream.put("1234567"); // Not flushed yet (no newline). 764 assert(app5.data == "123456789012"); 765 766 ostream.put("89012"); // Flushed by maxSize 767 assert(app5.data == "123456789012123456789012"); 768 769 ostream.joinAppend(["ab", "cd"], '-'); // Not flushed yet 770 ostream.joinAppend(["de", "gh", "ij"], '-'); // Flushed by maxSize 771 assert(app5.data == "123456789012123456789012ab-cdde-gh-ij"); 772 } 773 assert(app5.data == "123456789012123456789012ab-cdde-gh-ij"); 774 } 775 776 /** 777 bufferedByLine is a performance enhancement over std.stdio.File.byLine. It works by 778 reading a large buffer from the input stream rather than just a single line. 779 780 The file argument needs to be a File object open for reading, typically a filesystem 781 file or standard input. Use the Yes.keepTerminator template parameter to keep the 782 newline. This is similar to stdio.File.byLine, except specified as a template paramter 783 rather than a runtime parameter. 784 785 Reading in blocks does mean that input is not read until a full buffer is available or 786 end-of-file is reached. For this reason, bufferedByLine is not appropriate for 787 interactive input. 788 */ 789 790 auto bufferedByLine(KeepTerminator keepTerminator = No.keepTerminator, Char = char, 791 ubyte terminator = '\n', size_t readSize = 1024 * 128, size_t growSize = 1024 * 16) 792 (File file) 793 if (is(Char == char) || is(Char == ubyte)) 794 { 795 static assert(0 < growSize && growSize <= readSize); 796 797 static final class BufferedByLineImpl 798 { 799 /* Buffer state variables 800 * - _buffer.length - Full length of allocated buffer. 801 * - _dataEnd - End of currently valid data (end of last read). 802 * - _lineStart - Start of current line. 803 * - _lineEnd - End of current line. 804 */ 805 private File _file; 806 private ubyte[] _buffer; 807 private size_t _lineStart = 0; 808 private size_t _lineEnd = 0; 809 private size_t _dataEnd = 0; 810 811 this (File f) @safe 812 { 813 _file = f; 814 _buffer = new ubyte[readSize + growSize]; 815 } 816 817 bool empty() const pure @safe 818 { 819 return _file.eof && _lineStart == _dataEnd; 820 } 821 822 Char[] front() pure @safe 823 { 824 assert(!empty, "Attempt to take the front of an empty bufferedByLine."); 825 826 static if (keepTerminator == Yes.keepTerminator) 827 { 828 return cast(Char[]) _buffer[_lineStart .. _lineEnd]; 829 } 830 else 831 { 832 assert(_lineStart < _lineEnd); 833 immutable end = (_buffer[_lineEnd - 1] == terminator) ? _lineEnd - 1 : _lineEnd; 834 return cast(Char[]) _buffer[_lineStart .. end]; 835 } 836 } 837 838 /* Note: Call popFront at initialization to do the initial read. */ 839 void popFront() @safe 840 { 841 import std.algorithm: copy, find; 842 assert(!empty, "Attempt to popFront an empty bufferedByLine."); 843 844 /* Pop the current line. */ 845 _lineStart = _lineEnd; 846 847 /* Set up the next line if more data is available, either in the buffer or 848 * the file. The next line ends at the next newline, if there is one. 849 * 850 * Notes: 851 * - 'find' returns the slice starting with the character searched for, or 852 * an empty range if not found. 853 * - _lineEnd is set to _dataEnd both when the current buffer does not have 854 * a newline and when it ends with one. 855 */ 856 auto found = _buffer[_lineStart .. _dataEnd].find(terminator); 857 _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1; 858 859 if (found.empty && !_file.eof) 860 { 861 /* No newline in current buffer. Read from the file until the next 862 * newline is found. 863 */ 864 assert(_lineEnd == _dataEnd); 865 866 if (_lineStart > 0) 867 { 868 /* Move remaining data to the start of the buffer. */ 869 immutable remainingLength = _dataEnd - _lineStart; 870 copy(_buffer[_lineStart .. _dataEnd], _buffer[0 .. remainingLength]); 871 _lineStart = 0; 872 _lineEnd = _dataEnd = remainingLength; 873 } 874 875 do 876 { 877 /* Grow the buffer if necessary. */ 878 immutable availableSize = _buffer.length - _dataEnd; 879 if (availableSize < readSize) 880 { 881 size_t growBy = growSize; 882 while (availableSize + growBy < readSize) growBy += growSize; 883 _buffer.length += growBy; 884 } 885 886 /* Read the next block. */ 887 _dataEnd += 888 _file.rawRead(_buffer[_dataEnd .. _dataEnd + readSize]) 889 .length; 890 891 found = _buffer[_lineEnd .. _dataEnd].find(terminator); 892 _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1; 893 894 } while (found.empty && !_file.eof); 895 } 896 } 897 } 898 899 assert(file.isOpen, "bufferedByLine passed a closed file."); 900 901 auto r = new BufferedByLineImpl(file); 902 r.popFront; 903 return r; 904 } 905 906 unittest 907 { 908 import std.array : appender; 909 import std.conv : to; 910 import std.file : rmdirRecurse, readText; 911 import std.path : buildPath; 912 import std.range : lockstep; 913 import std.stdio; 914 import tsv_utils.common.unittest_utils; 915 916 auto testDir = makeUnittestTempDir("tsv_utils_buffered_byline"); 917 scope(exit) testDir.rmdirRecurse; 918 919 /* Create two data files with the same data. Read both in parallel with byLine and 920 * bufferedByLine and compare each line. 921 */ 922 auto data1 = appender!(char[])(); 923 924 foreach (i; 1 .. 1001) data1.put('\n'); 925 foreach (i; 1 .. 1001) data1.put("a\n"); 926 foreach (i; 1 .. 1001) { data1.put(i.to!string); data1.put('\n'); } 927 foreach (i; 1 .. 1001) 928 { 929 foreach (j; 1 .. i+1) data1.put('x'); 930 data1.put('\n'); 931 } 932 933 string file1a = buildPath(testDir, "file1a.txt"); 934 string file1b = buildPath(testDir, "file1b.txt"); 935 { 936 937 file1a.File("w").write(data1.data); 938 file1b.File("w").write(data1.data); 939 } 940 941 /* Default parameters. */ 942 { 943 auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator); 944 auto f1bIn = file1b.File().byLine(No.keepTerminator); 945 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 946 } 947 { 948 auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator); 949 auto f1bIn = file1b.File().byLine(Yes.keepTerminator); 950 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 951 } 952 953 /* Smaller read size. This will trigger buffer growth. */ 954 { 955 auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', 512, 256); 956 auto f1bIn = file1b.File().byLine(No.keepTerminator); 957 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 958 } 959 960 /* Exercise boundary cases in buffer growth. 961 * Note: static-foreach requires DMD 2.076 / LDC 1.6 962 */ 963 static foreach (readSize; [1, 2, 4]) 964 { 965 static foreach (growSize; 1 .. readSize + 1) 966 {{ 967 auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 968 auto f1bIn = file1b.File().byLine(No.keepTerminator); 969 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 970 }} 971 static foreach (growSize; 1 .. readSize + 1) 972 {{ 973 auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 974 auto f1bIn = file1b.File().byLine(Yes.keepTerminator); 975 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 976 }} 977 } 978 979 980 /* Files that do not end in a newline. */ 981 982 string file2a = buildPath(testDir, "file2a.txt"); 983 string file2b = buildPath(testDir, "file2b.txt"); 984 string file3a = buildPath(testDir, "file3a.txt"); 985 string file3b = buildPath(testDir, "file3b.txt"); 986 string file4a = buildPath(testDir, "file4a.txt"); 987 string file4b = buildPath(testDir, "file4b.txt"); 988 { 989 file1a.File("w").write("a"); 990 file1b.File("w").write("a"); 991 file2a.File("w").write("ab"); 992 file2b.File("w").write("ab"); 993 file3a.File("w").write("abc"); 994 file3b.File("w").write("abc"); 995 } 996 997 static foreach (readSize; [1, 2, 4]) 998 { 999 static foreach (growSize; 1 .. readSize + 1) 1000 {{ 1001 auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 1002 auto f1bIn = file1b.File().byLine(No.keepTerminator); 1003 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1004 1005 auto f2aIn = file2a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 1006 auto f2bIn = file2b.File().byLine(No.keepTerminator); 1007 foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1008 1009 auto f3aIn = file3a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 1010 auto f3bIn = file3b.File().byLine(No.keepTerminator); 1011 foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1012 }} 1013 static foreach (growSize; 1 .. readSize + 1) 1014 {{ 1015 auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 1016 auto f1bIn = file1b.File().byLine(Yes.keepTerminator); 1017 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1018 1019 auto f2aIn = file2a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 1020 auto f2bIn = file2b.File().byLine(Yes.keepTerminator); 1021 foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1022 1023 auto f3aIn = file3a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 1024 auto f3bIn = file3b.File().byLine(Yes.keepTerminator); 1025 foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1026 }} 1027 } 1028 } 1029 1030 /** 1031 joinAppend performs a join operation on an input range, appending the results to 1032 an output range. 1033 1034 joinAppend was written as a performance enhancement over using std.algorithm.joiner 1035 or std.array.join with writeln. Using joiner with writeln is quite slow, 3-4x slower 1036 than std.array.join with writeln. The joiner performance may be due to interaction 1037 with writeln, this was not investigated. Using joiner with stdout.lockingTextWriter 1038 is better, but still substantially slower than join. Using join works reasonably well, 1039 but is allocating memory unnecessarily. 1040 1041 Using joinAppend with Appender is a bit faster than join, and allocates less memory. 1042 The Appender re-uses the underlying data buffer, saving memory. The example below 1043 illustrates. It is a modification of the InputFieldReordering example. The role 1044 Appender plus joinAppend are playing is to buffer the output. BufferedOutputRange 1045 uses a similar technique to buffer multiple lines. 1046 1047 Note: The original uses joinAppend have been replaced by BufferedOutputRange, which has 1048 its own joinAppend method. However, joinAppend remains useful when constructing internal 1049 buffers where BufferedOutputRange is not appropriate. 1050 1051 --- 1052 int main(string[] args) 1053 { 1054 import tsvutil; 1055 import std.algorithm, std.array, std.range, std.stdio; 1056 size_t[] fieldIndicies = [3, 0, 2]; 1057 auto fieldReordering = new InputFieldReordering!char(fieldIndicies); 1058 auto outputBuffer = appender!(char[]); 1059 foreach (line; stdin.byLine) 1060 { 1061 fieldReordering.initNewLine; 1062 foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate) 1063 { 1064 fieldReordering.processNextField(fieldIndex, fieldValue); 1065 if (fieldReordering.allFieldsFilled) break; 1066 } 1067 if (fieldReordering.allFieldsFilled) 1068 { 1069 outputBuffer.clear; 1070 writeln(fieldReordering.outputFields.joinAppend(outputBuffer, ('\t'))); 1071 } 1072 else 1073 { 1074 writeln("Error: Insufficient number of field on the line."); 1075 } 1076 } 1077 return 0; 1078 } 1079 --- 1080 */ 1081 OutputRange joinAppend(InputRange, OutputRange, E) 1082 (InputRange inputRange, ref OutputRange outputRange, E delimiter) 1083 if (isInputRange!InputRange && 1084 (is(ElementType!InputRange : const E[]) && 1085 isOutputRange!(OutputRange, E[])) 1086 || 1087 (is(ElementType!InputRange : const E) && 1088 isOutputRange!(OutputRange, E)) 1089 ) 1090 { 1091 if (!inputRange.empty) 1092 { 1093 outputRange.put(inputRange.front); 1094 inputRange.popFront; 1095 } 1096 foreach (x; inputRange) 1097 { 1098 outputRange.put(delimiter); 1099 outputRange.put(x); 1100 } 1101 return outputRange; 1102 } 1103 1104 @safe unittest 1105 { 1106 import std.array : appender; 1107 import std.algorithm : equal; 1108 1109 char[] c1 = ['a', 'b', 'c']; 1110 char[] c2 = ['d', 'e', 'f']; 1111 char[] c3 = ['g', 'h', 'i']; 1112 auto cvec = [c1, c2, c3]; 1113 1114 auto s1 = "abc"; 1115 auto s2 = "def"; 1116 auto s3 = "ghi"; 1117 auto svec = [s1, s2, s3]; 1118 1119 auto charAppender = appender!(char[])(); 1120 1121 assert(cvec.joinAppend(charAppender, '_').data == "abc_def_ghi"); 1122 assert(equal(cvec, [c1, c2, c3])); 1123 1124 charAppender.put('$'); 1125 assert(svec.joinAppend(charAppender, '|').data == "abc_def_ghi$abc|def|ghi"); 1126 assert(equal(cvec, [s1, s2, s3])); 1127 1128 charAppender.clear; 1129 assert(svec.joinAppend(charAppender, '|').data == "abc|def|ghi"); 1130 1131 auto intAppender = appender!(int[])(); 1132 1133 auto i1 = [100, 101, 102]; 1134 auto i2 = [200, 201, 202]; 1135 auto i3 = [300, 301, 302]; 1136 auto ivec = [i1, i2, i3]; 1137 1138 assert(ivec.joinAppend(intAppender, 0).data == 1139 [100, 101, 102, 0, 200, 201, 202, 0, 300, 301, 302]); 1140 1141 intAppender.clear; 1142 assert(i1.joinAppend(intAppender, 0).data == 1143 [100, 0, 101, 0, 102]); 1144 assert(i2.joinAppend(intAppender, 1).data == 1145 [100, 0, 101, 0, 102, 1146 200, 1, 201, 1, 202]); 1147 assert(i3.joinAppend(intAppender, 2).data == 1148 [100, 0, 101, 0, 102, 1149 200, 1, 201, 1, 202, 1150 300, 2, 301, 2, 302]); 1151 } 1152 1153 /** 1154 getTsvFieldValue extracts the value of a single field from a delimited text string. 1155 1156 This is a convenience function intended for cases when only a single field from an 1157 input line is needed. If multiple values are needed, it will be more efficient to 1158 work directly with std.algorithm.splitter or the InputFieldReordering class. 1159 1160 The input text is split by a delimiter character. The specified field is converted 1161 to the desired type and the value returned. 1162 1163 An exception is thrown if there are not enough fields on the line or if conversion 1164 fails. Conversion is done with std.conv.to, it throws a std.conv.ConvException on 1165 failure. If not enough fields, the exception text is generated referencing 1-upped 1166 field numbers as would be provided by command line users. 1167 */ 1168 T getTsvFieldValue(T, C)(const C[] line, size_t fieldIndex, C delim) 1169 if (isSomeChar!C) 1170 { 1171 import std.algorithm : splitter; 1172 import std.conv : to; 1173 import std.format : format; 1174 import std.range; 1175 1176 auto splitLine = line.splitter(delim); 1177 size_t atField = 0; 1178 1179 while (atField < fieldIndex && !splitLine.empty) 1180 { 1181 splitLine.popFront; 1182 atField++; 1183 } 1184 1185 T val; 1186 if (splitLine.empty) 1187 { 1188 if (fieldIndex == 0) 1189 { 1190 /* This is a workaround to a splitter special case - If the input is empty, 1191 * the returned split range is empty. This doesn't properly represent a single 1192 * column file. More correct mathematically, and for this case, would be a 1193 * single value representing an empty string. The input line is a convenient 1194 * source of an empty line. Info: 1195 * Bug: https://issues.dlang.org/show_bug.cgi?id=15735 1196 * Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030 1197 */ 1198 assert(line.empty); 1199 val = line.to!T; 1200 } 1201 else 1202 { 1203 throw new Exception( 1204 format("Not enough fields on line. Number required: %d; Number found: %d", 1205 fieldIndex + 1, atField)); 1206 } 1207 } 1208 else 1209 { 1210 val = splitLine.front.to!T; 1211 } 1212 1213 return val; 1214 } 1215 1216 @safe unittest 1217 { 1218 import std.conv : ConvException, to; 1219 import std.exception; 1220 1221 /* Common cases. */ 1222 assert(getTsvFieldValue!double("123", 0, '\t') == 123.0); 1223 assert(getTsvFieldValue!double("-10.5", 0, '\t') == -10.5); 1224 assert(getTsvFieldValue!size_t("abc|123", 1, '|') == 123); 1225 assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99); 1226 assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99); 1227 assert(getTsvFieldValue!string("紅\t红\t99", 2, '\t') == "99"); 1228 assert(getTsvFieldValue!string("紅\t红\t99", 1, '\t') == "红"); 1229 assert(getTsvFieldValue!string("紅\t红\t99", 0, '\t') == "紅"); 1230 assert(getTsvFieldValue!string("红色和绿色\tred and green\t赤と緑\t10.5", 2, '\t') == "赤と緑"); 1231 assert(getTsvFieldValue!double("红色和绿色\tred and green\t赤と緑\t10.5", 3, '\t') == 10.5); 1232 1233 /* The empty field cases. */ 1234 assert(getTsvFieldValue!string("", 0, '\t') == ""); 1235 assert(getTsvFieldValue!string("\t", 0, '\t') == ""); 1236 assert(getTsvFieldValue!string("\t", 1, '\t') == ""); 1237 assert(getTsvFieldValue!string("", 0, ':') == ""); 1238 assert(getTsvFieldValue!string(":", 0, ':') == ""); 1239 assert(getTsvFieldValue!string(":", 1, ':') == ""); 1240 1241 /* Tests with different data types. */ 1242 string stringLine = "orange and black\tნარინჯისფერი და შავი\t88.5"; 1243 char[] charLine = "orange and black\tნარინჯისფერი და შავი\t88.5".to!(char[]); 1244 dchar[] dcharLine = stringLine.to!(dchar[]); 1245 wchar[] wcharLine = stringLine.to!(wchar[]); 1246 1247 assert(getTsvFieldValue!string(stringLine, 0, '\t') == "orange and black"); 1248 assert(getTsvFieldValue!string(stringLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1249 assert(getTsvFieldValue!wstring(stringLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1250 assert(getTsvFieldValue!double(stringLine, 2, '\t') == 88.5); 1251 1252 assert(getTsvFieldValue!string(charLine, 0, '\t') == "orange and black"); 1253 assert(getTsvFieldValue!string(charLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1254 assert(getTsvFieldValue!wstring(charLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1255 assert(getTsvFieldValue!double(charLine, 2, '\t') == 88.5); 1256 1257 assert(getTsvFieldValue!string(dcharLine, 0, '\t') == "orange and black"); 1258 assert(getTsvFieldValue!string(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1259 assert(getTsvFieldValue!wstring(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1260 assert(getTsvFieldValue!double(dcharLine, 2, '\t') == 88.5); 1261 1262 assert(getTsvFieldValue!string(wcharLine, 0, '\t') == "orange and black"); 1263 assert(getTsvFieldValue!string(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1264 assert(getTsvFieldValue!wstring(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1265 assert(getTsvFieldValue!double(wcharLine, 2, '\t') == 88.5); 1266 1267 /* Conversion errors. */ 1268 assertThrown!ConvException(getTsvFieldValue!double("", 0, '\t')); 1269 assertThrown!ConvException(getTsvFieldValue!double("abc", 0, '|')); 1270 assertThrown!ConvException(getTsvFieldValue!size_t("-1", 0, '|')); 1271 assertThrown!ConvException(getTsvFieldValue!size_t("a23|23.4", 1, '|')); 1272 assertThrown!ConvException(getTsvFieldValue!double("23.5|def", 1, '|')); 1273 1274 /* Not enough field errors. These should throw, but not a ConvException.*/ 1275 assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("", 1, '\t'))); 1276 assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc", 1, '\t'))); 1277 assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc\tdef", 2, '\t'))); 1278 } 1279 1280 /** 1281 Field-lists - A field-list is a string entered on the command line identifying one or more 1282 field numbers. They are used by the majority of the tsv utility applications. There are 1283 two helper functions, makeFieldListOptionHandler and parseFieldList. Most applications 1284 will use makeFieldListOptionHandler, it creates a delegate that can be passed to 1285 std.getopt to process the command option. Actual processing of the option text is done by 1286 parseFieldList. It can be called directly when the text of the option value contains more 1287 than just the field number. 1288 1289 Syntax and behavior: 1290 1291 A 'field-list' is a list of numeric field numbers entered on the command line. Fields are 1292 1-upped integers representing locations in an input line, in the traditional meaning of 1293 Unix command line tools. Fields can be entered as single numbers or a range. Multiple 1294 entries are separated by commas. Some examples (with 'fields' as the command line option): 1295 1296 --fields 3 // Single field 1297 --fields 4,1 // Two fields 1298 --fields 3-9 // A range, fields 3 to 9 inclusive 1299 --fields 1,2,7-34,11 // A mix of ranges and fields 1300 --fields 15-5,3-1 // Two ranges in reverse order. 1301 1302 Incomplete ranges are not supported, for example, '6-'. Zero is disallowed as a field 1303 value by default, but can be enabled to support the notion of zero as representing the 1304 entire line. However, zero cannot be part of a range. Field numbers are one-based by 1305 default, but can be converted to zero-based. If conversion to zero-based is enabled, field 1306 number zero must be disallowed or a signed integer type specified for the returned range. 1307 1308 An error is thrown if an invalid field specification is encountered. Error text is 1309 intended for display. Error conditions include: 1310 - Empty fields list 1311 - Empty value, e.g. Two consequtive commas, a trailing comma, or a leading comma 1312 - String that does not parse as a valid integer 1313 - Negative integers, or zero if zero is disallowed. 1314 - An incomplete range 1315 - Zero used as part of a range. 1316 1317 No other behaviors are enforced. Repeated values are accepted. If zero is allowed, other 1318 field numbers can be entered as well. Additional restrictions need to be applied by the 1319 caller. 1320 1321 Notes: 1322 - The data type determines the max field number that can be entered. Enabling conversion 1323 to zero restricts to the signed version of the data type. 1324 - Use 'import std.typecons : Yes, No' to use the convertToZeroBasedIndex and 1325 allowFieldNumZero template parameters. 1326 */ 1327 1328 /** [Yes|No].convertToZeroBasedIndex parameter controls whether field numbers are 1329 * converted to zero-based indices by makeFieldListOptionHander and parseFieldList. 1330 */ 1331 alias ConvertToZeroBasedIndex = Flag!"convertToZeroBasedIndex"; 1332 1333 /** [Yes|No].allowFieldNumZero parameter controls whether zero is a valid field. This is 1334 * used by makeFieldListOptionHander and parseFieldList. 1335 */ 1336 alias AllowFieldNumZero = Flag!"allowFieldNumZero"; 1337 1338 alias OptionHandlerDelegate = void delegate(string option, string value); 1339 1340 /** 1341 makeFieldListOptionHandler creates a std.getopt option hander for processing field lists 1342 entered on the command line. A field list is as defined by parseFieldList. 1343 */ 1344 OptionHandlerDelegate makeFieldListOptionHandler( 1345 T, 1346 ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex, 1347 AllowFieldNumZero allowZero = No.allowFieldNumZero) 1348 (ref T[] fieldsArray) 1349 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T)) 1350 { 1351 void fieldListOptionHandler(ref T[] fieldArray, string option, string value) pure @safe 1352 { 1353 import std.algorithm : each; 1354 try value.parseFieldList!(T, convertToZero, allowZero).each!(x => fieldArray ~= x); 1355 catch (Exception exc) 1356 { 1357 import std.format : format; 1358 exc.msg = format("[--%s] %s", option, exc.msg); 1359 throw exc; 1360 } 1361 } 1362 1363 return (option, value) => fieldListOptionHandler(fieldsArray, option, value); 1364 } 1365 1366 unittest 1367 { 1368 import std.exception : assertThrown, assertNotThrown; 1369 import std.getopt; 1370 1371 { 1372 size_t[] fields; 1373 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1374 getopt(args, "f|fields", fields.makeFieldListOptionHandler); 1375 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1376 } 1377 { 1378 size_t[] fields; 1379 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1380 getopt(args, 1381 "f|fields", fields.makeFieldListOptionHandler!(size_t, Yes.convertToZeroBasedIndex)); 1382 assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]); 1383 } 1384 { 1385 size_t[] fields; 1386 auto args = ["program", "-f", "0"]; 1387 getopt(args, 1388 "f|fields", fields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1389 assert(fields == [0]); 1390 } 1391 { 1392 size_t[] fields; 1393 auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1"]; 1394 getopt(args, 1395 "f|fields", fields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1396 assert(fields == [0, 1, 0, 0, 1]); 1397 } 1398 { 1399 size_t[] ints; 1400 size_t[] fields; 1401 auto args = ["program", "--ints", "1,2,3", "--fields", "1", "--ints", "4,5,6", "--fields", "2,4,7-9,23-21"]; 1402 std.getopt.arraySep = ","; 1403 getopt(args, 1404 "i|ints", "Built-in list of integers.", &ints, 1405 "f|fields", "Field-list style integers.", fields.makeFieldListOptionHandler); 1406 assert(ints == [1, 2, 3, 4, 5, 6]); 1407 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1408 } 1409 1410 /* Basic cases involved unsinged types smaller than size_t. */ 1411 { 1412 uint[] fields; 1413 auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1", "-f", "55-58"]; 1414 getopt(args, 1415 "f|fields", fields.makeFieldListOptionHandler!(uint, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1416 assert(fields == [0, 1, 0, 0, 1, 55, 56, 57, 58]); 1417 } 1418 { 1419 ushort[] fields; 1420 auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1", "-f", "55-58"]; 1421 getopt(args, 1422 "f|fields", fields.makeFieldListOptionHandler!(ushort, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1423 assert(fields == [0, 1, 0, 0, 1, 55, 56, 57, 58]); 1424 } 1425 1426 /* Basic cases involving unsigned types. */ 1427 { 1428 long[] fields; 1429 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1430 getopt(args, "f|fields", fields.makeFieldListOptionHandler); 1431 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1432 } 1433 { 1434 long[] fields; 1435 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1436 getopt(args, 1437 "f|fields", fields.makeFieldListOptionHandler!(long, Yes.convertToZeroBasedIndex)); 1438 assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]); 1439 } 1440 { 1441 long[] fields; 1442 auto args = ["program", "-f", "0"]; 1443 getopt(args, 1444 "f|fields", fields.makeFieldListOptionHandler!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1445 assert(fields == [-1]); 1446 } 1447 { 1448 int[] fields; 1449 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1450 getopt(args, "f|fields", fields.makeFieldListOptionHandler); 1451 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1452 } 1453 { 1454 int[] fields; 1455 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1456 getopt(args, 1457 "f|fields", fields.makeFieldListOptionHandler!(int, Yes.convertToZeroBasedIndex)); 1458 assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]); 1459 } 1460 { 1461 int[] fields; 1462 auto args = ["program", "-f", "0"]; 1463 getopt(args, 1464 "f|fields", fields.makeFieldListOptionHandler!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1465 assert(fields == [-1]); 1466 } 1467 { 1468 short[] fields; 1469 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1470 getopt(args, "f|fields", fields.makeFieldListOptionHandler); 1471 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1472 } 1473 { 1474 short[] fields; 1475 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1476 getopt(args, 1477 "f|fields", fields.makeFieldListOptionHandler!(short, Yes.convertToZeroBasedIndex)); 1478 assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]); 1479 } 1480 { 1481 short[] fields; 1482 auto args = ["program", "-f", "0"]; 1483 getopt(args, 1484 "f|fields", fields.makeFieldListOptionHandler!(short, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1485 assert(fields == [-1]); 1486 } 1487 1488 { 1489 /* Error cases. */ 1490 size_t[] fields; 1491 auto args = ["program", "-f", "0"]; 1492 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1493 1494 args = ["program", "-f", "-1"]; 1495 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1496 1497 args = ["program", "-f", "--fields", "1"]; 1498 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1499 1500 args = ["program", "-f", "a"]; 1501 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1502 1503 args = ["program", "-f", "1.5"]; 1504 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1505 1506 args = ["program", "-f", "2-"]; 1507 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1508 1509 args = ["program", "-f", "3,5,-7"]; 1510 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1511 1512 args = ["program", "-f", "3,5,"]; 1513 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1514 1515 args = ["program", "-f", "-1"]; 1516 assertThrown(getopt(args, 1517 "f|fields", fields.makeFieldListOptionHandler!( 1518 size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero))); 1519 } 1520 } 1521 1522 /** 1523 parseFieldList lazily generates a range of fields numbers from a 'field-list' string. 1524 */ 1525 auto parseFieldList(T = size_t, 1526 ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex, 1527 AllowFieldNumZero allowZero = No.allowFieldNumZero) 1528 (string fieldList, char delim = ',') 1529 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T)) 1530 { 1531 import std.algorithm : splitter; 1532 1533 auto _splitFieldList = fieldList.splitter(delim); 1534 auto _currFieldParse = 1535 (_splitFieldList.empty ? "" : _splitFieldList.front) 1536 .parseFieldRange!(T, convertToZero, allowZero); 1537 1538 if (!_splitFieldList.empty) _splitFieldList.popFront; 1539 1540 struct Result 1541 { 1542 @property bool empty() pure nothrow @safe @nogc 1543 { 1544 return _currFieldParse.empty; 1545 } 1546 1547 @property T front() pure @safe 1548 { 1549 import std.conv : to; 1550 1551 assert(!empty, "Attempting to fetch the front of an empty field-list."); 1552 assert(!_currFieldParse.empty, "Internal error. Call to front with an empty _currFieldParse."); 1553 1554 return _currFieldParse.front.to!T; 1555 } 1556 1557 void popFront() pure @safe 1558 { 1559 assert(!empty, "Attempting to popFront an empty field-list."); 1560 1561 _currFieldParse.popFront; 1562 if (_currFieldParse.empty && !_splitFieldList.empty) 1563 { 1564 _currFieldParse = _splitFieldList.front.parseFieldRange!(T, convertToZero, allowZero); 1565 _splitFieldList.popFront; 1566 } 1567 } 1568 } 1569 1570 return Result(); 1571 } 1572 1573 @safe unittest 1574 { 1575 import std.algorithm : each, equal; 1576 import std.exception : assertThrown, assertNotThrown; 1577 1578 /* Basic tests. */ 1579 assert("1".parseFieldList.equal([1])); 1580 assert("1,2".parseFieldList.equal([1, 2])); 1581 assert("1,2,3".parseFieldList.equal([1, 2, 3])); 1582 assert("1-2".parseFieldList.equal([1, 2])); 1583 assert("1-2,6-4".parseFieldList.equal([1, 2, 6, 5, 4])); 1584 assert("1-2,1,1-2,2,2-1".parseFieldList.equal([1, 2, 1, 1, 2, 2, 2, 1])); 1585 assert("1-2,5".parseFieldList!size_t.equal([1, 2, 5])); 1586 1587 /* Signed Int tests */ 1588 assert("1".parseFieldList!int.equal([1])); 1589 assert("1,2,3".parseFieldList!int.equal([1, 2, 3])); 1590 assert("1-2".parseFieldList!int.equal([1, 2])); 1591 assert("1-2,6-4".parseFieldList!int.equal([1, 2, 6, 5, 4])); 1592 assert("1-2,5".parseFieldList!int.equal([1, 2, 5])); 1593 1594 /* Convert to zero tests */ 1595 assert("1".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0])); 1596 assert("1,2,3".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 2])); 1597 assert("1-2".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1])); 1598 assert("1-2,6-4".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 5, 4, 3])); 1599 assert("1-2,5".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 4])); 1600 1601 assert("1".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0])); 1602 assert("1,2,3".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 2])); 1603 assert("1-2".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1])); 1604 assert("1-2,6-4".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 5, 4, 3])); 1605 assert("1-2,5".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 4])); 1606 1607 /* Allow zero tests. */ 1608 assert("0".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1609 assert("1,0,3".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 0, 3])); 1610 assert("1-2,5".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 2, 5])); 1611 assert("0".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1612 assert("1,0,3".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 0, 3])); 1613 assert("1-2,5".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 2, 5])); 1614 assert("0".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1])); 1615 assert("1,0,3".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0, -1, 2])); 1616 assert("1-2,5".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0, 1, 4])); 1617 1618 /* Error cases. */ 1619 assertThrown("".parseFieldList.each); 1620 assertThrown(" ".parseFieldList.each); 1621 assertThrown(",".parseFieldList.each); 1622 assertThrown("5 6".parseFieldList.each); 1623 assertThrown(",7".parseFieldList.each); 1624 assertThrown("8,".parseFieldList.each); 1625 assertThrown("8,9,".parseFieldList.each); 1626 assertThrown("10,,11".parseFieldList.each); 1627 assertThrown("".parseFieldList!(long, Yes.convertToZeroBasedIndex).each); 1628 assertThrown("1,2-3,".parseFieldList!(long, Yes.convertToZeroBasedIndex).each); 1629 assertThrown("2-,4".parseFieldList!(long, Yes.convertToZeroBasedIndex).each); 1630 assertThrown("1,2,3,,4".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1631 assertThrown(",7".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1632 assertThrown("8,".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1633 assertThrown("10,0,,11".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1634 assertThrown("8,9,".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1635 1636 assertThrown("0".parseFieldList.each); 1637 assertThrown("1,0,3".parseFieldList.each); 1638 assertThrown("0".parseFieldList!(int, Yes.convertToZeroBasedIndex, No.allowFieldNumZero).each); 1639 assertThrown("1,0,3".parseFieldList!(int, Yes.convertToZeroBasedIndex, No.allowFieldNumZero).each); 1640 assertThrown("0-2,6-0".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1641 assertThrown("0-2,6-0".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1642 assertThrown("0-2,6-0".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1643 } 1644 1645 /* parseFieldRange parses a single number or number range. E.g. '5' or '5-8'. These are 1646 * the values in a field-list separated by a comma or other delimiter. It returns a range 1647 * that iterates over all the values in the range. 1648 */ 1649 private auto parseFieldRange(T = size_t, 1650 ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex, 1651 AllowFieldNumZero allowZero = No.allowFieldNumZero) 1652 (string fieldRange) 1653 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T)) 1654 { 1655 import std.algorithm : findSplit; 1656 import std.conv : to; 1657 import std.format : format; 1658 import std.range : iota; 1659 import std.traits : Signed; 1660 1661 /* Pick the largest compatible integral type for the IOTA range. This must be the 1662 * signed type if convertToZero is true, as a reverse order range may end at -1. 1663 */ 1664 static if (convertToZero) alias S = Signed!T; 1665 else alias S = T; 1666 1667 if (fieldRange.length == 0) throw new Exception("Empty field number."); 1668 1669 auto rangeSplit = findSplit(fieldRange, "-"); 1670 1671 if (!rangeSplit[1].empty && (rangeSplit[0].empty || rangeSplit[2].empty)) 1672 { 1673 // Range starts or ends with a dash. 1674 throw new Exception(format("Incomplete ranges are not supported: '%s'", fieldRange)); 1675 } 1676 1677 S start = rangeSplit[0].to!S; 1678 S last = rangeSplit[1].empty ? start : rangeSplit[2].to!S; 1679 Signed!T increment = (start <= last) ? 1 : -1; 1680 1681 static if (allowZero) 1682 { 1683 if (start == 0 && !rangeSplit[1].empty) 1684 { 1685 throw new Exception(format("Zero cannot be used as part of a range: '%s'", fieldRange)); 1686 } 1687 } 1688 1689 static if (allowZero) 1690 { 1691 if (start < 0 || last < 0) 1692 { 1693 throw new Exception(format("Field numbers must be non-negative integers: '%d'", 1694 (start < 0) ? start : last)); 1695 } 1696 } 1697 else 1698 { 1699 if (start < 1 || last < 1) 1700 { 1701 throw new Exception(format("Field numbers must be greater than zero: '%d'", 1702 (start < 1) ? start : last)); 1703 } 1704 } 1705 1706 static if (convertToZero) 1707 { 1708 start--; 1709 last--; 1710 } 1711 1712 return iota(start, last + increment, increment); 1713 } 1714 1715 @safe unittest // parseFieldRange 1716 { 1717 import std.algorithm : equal; 1718 import std.exception : assertThrown, assertNotThrown; 1719 1720 /* Basic cases */ 1721 assert(parseFieldRange("1").equal([1])); 1722 assert("2".parseFieldRange.equal([2])); 1723 assert("3-4".parseFieldRange.equal([3, 4])); 1724 assert("3-5".parseFieldRange.equal([3, 4, 5])); 1725 assert("4-3".parseFieldRange.equal([4, 3])); 1726 assert("10-1".parseFieldRange.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1727 1728 /* Convert to zero-based indices */ 1729 assert(parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)("1").equal([0])); 1730 assert("2".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([1])); 1731 assert("3-4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([2, 3])); 1732 assert("3-5".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([2, 3, 4])); 1733 assert("4-3".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([3, 2])); 1734 assert("10-1".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])); 1735 1736 /* Allow zero. */ 1737 assert("0".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1738 assert(parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)("1").equal([1])); 1739 assert("3-4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([3, 4])); 1740 assert("10-1".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1741 1742 /* Allow zero, convert to zero-based index. */ 1743 assert("0".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1])); 1744 assert(parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)("1").equal([0])); 1745 assert("3-4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([2, 3])); 1746 assert("10-1".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])); 1747 1748 /* Alternate integer types. */ 1749 assert("2".parseFieldRange!uint.equal([2])); 1750 assert("3-5".parseFieldRange!uint.equal([3, 4, 5])); 1751 assert("10-1".parseFieldRange!uint.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1752 assert("2".parseFieldRange!int.equal([2])); 1753 assert("3-5".parseFieldRange!int.equal([3, 4, 5])); 1754 assert("10-1".parseFieldRange!int.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1755 assert("2".parseFieldRange!ushort.equal([2])); 1756 assert("3-5".parseFieldRange!ushort.equal([3, 4, 5])); 1757 assert("10-1".parseFieldRange!ushort.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1758 assert("2".parseFieldRange!short.equal([2])); 1759 assert("3-5".parseFieldRange!short.equal([3, 4, 5])); 1760 assert("10-1".parseFieldRange!short.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1761 1762 assert("0".parseFieldRange!(long, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1763 assert("0".parseFieldRange!(uint, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1764 assert("0".parseFieldRange!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1765 assert("0".parseFieldRange!(ushort, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1766 assert("0".parseFieldRange!(short, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1767 assert("0".parseFieldRange!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1])); 1768 assert("0".parseFieldRange!(short, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1])); 1769 1770 /* Max field value cases. */ 1771 assert("65535".parseFieldRange!ushort.equal([65535])); // ushort max 1772 assert("65533-65535".parseFieldRange!ushort.equal([65533, 65534, 65535])); 1773 assert("32767".parseFieldRange!short.equal([32767])); // short max 1774 assert("32765-32767".parseFieldRange!short.equal([32765, 32766, 32767])); 1775 assert("32767".parseFieldRange!(short, Yes.convertToZeroBasedIndex).equal([32766])); 1776 1777 /* Error cases. */ 1778 assertThrown("".parseFieldRange); 1779 assertThrown(" ".parseFieldRange); 1780 assertThrown("-".parseFieldRange); 1781 assertThrown(" -".parseFieldRange); 1782 assertThrown("- ".parseFieldRange); 1783 assertThrown("1-".parseFieldRange); 1784 assertThrown("-2".parseFieldRange); 1785 assertThrown("-1".parseFieldRange); 1786 assertThrown("1.0".parseFieldRange); 1787 assertThrown("0".parseFieldRange); 1788 assertThrown("0-3".parseFieldRange); 1789 assertThrown("-2-4".parseFieldRange); 1790 assertThrown("2--4".parseFieldRange); 1791 assertThrown("2-".parseFieldRange); 1792 assertThrown("a".parseFieldRange); 1793 assertThrown("0x3".parseFieldRange); 1794 assertThrown("3U".parseFieldRange); 1795 assertThrown("1_000".parseFieldRange); 1796 assertThrown(".".parseFieldRange); 1797 1798 assertThrown("".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1799 assertThrown(" ".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1800 assertThrown("-".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1801 assertThrown("1-".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1802 assertThrown("-2".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1803 assertThrown("-1".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1804 assertThrown("0".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1805 assertThrown("0-3".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1806 assertThrown("-2-4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1807 assertThrown("2--4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1808 1809 assertThrown("".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1810 assertThrown(" ".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1811 assertThrown("-".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1812 assertThrown("1-".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1813 assertThrown("-2".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1814 assertThrown("-1".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1815 assertThrown("0-3".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1816 assertThrown("-2-4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1817 1818 assertThrown("".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1819 assertThrown(" ".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1820 assertThrown("-".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1821 assertThrown("1-".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1822 assertThrown("-2".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1823 assertThrown("-1".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1824 assertThrown("0-3".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1825 assertThrown("-2-4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1826 1827 /* Value out of range cases. */ 1828 assertThrown("65536".parseFieldRange!ushort); // One more than ushort max. 1829 assertThrown("65535-65536".parseFieldRange!ushort); 1830 assertThrown("32768".parseFieldRange!short); // One more than short max. 1831 assertThrown("32765-32768".parseFieldRange!short); 1832 // Convert to zero limits signed range. 1833 assertThrown("32768".parseFieldRange!(ushort, Yes.convertToZeroBasedIndex)); 1834 assert("32767".parseFieldRange!(ushort, Yes.convertToZeroBasedIndex).equal([32766])); 1835 } 1836 1837 /** [Yes|No.newlineWasRemoved] is a template parameter to throwIfWindowsNewlineOnUnix. 1838 * A Yes value indicates the Unix newline was already removed, as might be done via 1839 * std.File.byLine or similar mechanism. 1840 */ 1841 alias NewlineWasRemoved = Flag!"newlineWasRemoved"; 1842 1843 /** 1844 throwIfWindowsLineNewlineOnUnix is used to throw an exception if a Windows/DOS 1845 line ending is found on a build compiled for a Unix platform. This is used by 1846 the TSV Utilities to detect Window/DOS line endings and terminate processing 1847 with an error message to the user. 1848 */ 1849 void throwIfWindowsNewlineOnUnix 1850 (NewlineWasRemoved nlWasRemoved = Yes.newlineWasRemoved) 1851 (const char[] line, const char[] filename, size_t lineNum) 1852 { 1853 version(Posix) 1854 { 1855 static if (nlWasRemoved) 1856 { 1857 immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r'; 1858 } 1859 else 1860 { 1861 immutable bool hasWindowsLineEnding = 1862 line.length > 1 && 1863 line[$ - 2] == '\r' && 1864 line[$ - 1] == '\n'; 1865 } 1866 1867 if (hasWindowsLineEnding) 1868 { 1869 import std.format; 1870 throw new Exception( 1871 format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n File: %s, Line: %s", 1872 (filename == "-") ? "Standard Input" : filename, lineNum)); 1873 } 1874 } 1875 } 1876 1877 @safe unittest 1878 { 1879 /* Note: Currently only building on Posix. Need to add non-Posix test cases 1880 * if Windows builds are ever done. 1881 */ 1882 version(Posix) 1883 { 1884 import std.exception; 1885 1886 assertNotThrown(throwIfWindowsNewlineOnUnix("", "afile.tsv", 1)); 1887 assertNotThrown(throwIfWindowsNewlineOnUnix("a", "afile.tsv", 2)); 1888 assertNotThrown(throwIfWindowsNewlineOnUnix("ab", "afile.tsv", 3)); 1889 assertNotThrown(throwIfWindowsNewlineOnUnix("abc", "afile.tsv", 4)); 1890 1891 assertThrown(throwIfWindowsNewlineOnUnix("\r", "afile.tsv", 1)); 1892 assertThrown(throwIfWindowsNewlineOnUnix("a\r", "afile.tsv", 2)); 1893 assertThrown(throwIfWindowsNewlineOnUnix("ab\r", "afile.tsv", 3)); 1894 assertThrown(throwIfWindowsNewlineOnUnix("abc\r", "afile.tsv", 4)); 1895 1896 assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\n", "afile.tsv", 1)); 1897 assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\n", "afile.tsv", 2)); 1898 assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3)); 1899 assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4)); 1900 1901 assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5)); 1902 assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6)); 1903 assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7)); 1904 assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8)); 1905 1906 /* Standard Input formatting. */ 1907 import std.algorithm : endsWith; 1908 bool exceptionCaught = false; 1909 1910 try (throwIfWindowsNewlineOnUnix("\r", "-", 99)); 1911 catch (Exception e) 1912 { 1913 assert(e.msg.endsWith("File: Standard Input, Line: 99")); 1914 exceptionCaught = true; 1915 } 1916 finally 1917 { 1918 assert(exceptionCaught); 1919 exceptionCaught = false; 1920 } 1921 1922 try (throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "-", 99)); 1923 catch (Exception e) 1924 { 1925 assert(e.msg.endsWith("File: Standard Input, Line: 99")); 1926 exceptionCaught = true; 1927 } 1928 finally 1929 { 1930 assert(exceptionCaught); 1931 exceptionCaught = false; 1932 } 1933 } 1934 }