1 /** 2 Utilities used by tsv-utils applications. InputFieldReordering, BufferedOututRange, 3 and a several others. 4 5 Utilities in this file: 6 $(LIST 7 * [InputFieldReordering] - A class that creates a reordered subset of fields from 8 an input line. Fields in the subset are accessed by array indicies. This is 9 especially useful when processing the subset in a specific order, such as the 10 order listed on the command-line at run-time. 11 12 * [BufferedOutputRange] - An OutputRange with an internal buffer used to buffer 13 output. Intended for use with stdout, it is a significant performance benefit. 14 15 * [bufferedByLine] - An input range that reads from a File handle line by line. 16 It is similar to the standard library method std.stdio.File.byLine, but quite a 17 bit faster. This is achieved by reading in larger blocks and buffering. 18 19 * [joinAppend] - A function that performs a join, but appending the join output to 20 an output stream. It is a performance improvement over using join or joiner with 21 writeln. 22 23 * [getTsvFieldValue] - A convenience function when only a single value is needed from 24 an input line. 25 26 * Field-lists: [parseFieldList], [makeFieldListOptionHandler] - Helper functions for 27 parsing field-lists entered on the command line. 28 29 * [throwIfWindowsNewlineOnUnix] - A utility for Unix platform builds to detecting 30 Windows newlines in input. 31 ) 32 33 Copyright (c) 2015-2019, eBay Software Foundation 34 Initially written by Jon Degenhardt 35 36 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt) 37 */ 38 39 module tsv_utils.common.utils; 40 41 import std.range; 42 import std.traits : isIntegral, isSomeChar, isSomeString, isUnsigned; 43 import std.typecons : Flag, No, Yes; 44 45 // InputFieldReording class. 46 47 /** Flag used by the InputFieldReordering template. */ 48 alias EnablePartialLines = Flag!"enablePartialLines"; 49 50 /** 51 InputFieldReordering - Move select fields from an input line to an output array, 52 reordering along the way. 53 54 The InputFieldReordering class is used to reorder a subset of fields from an input line. 55 The caller instantiates an InputFieldReordering object at the start of input processing. 56 The instance contains a mapping from input index to output index, plus a buffer holding 57 the reordered fields. The caller processes each input line by calling initNewLine, 58 splitting the line into fields, and calling processNextField on each field. The output 59 buffer is ready when the allFieldsFilled method returns true. 60 61 Fields are not copied, instead the output buffer points to the fields passed by the caller. 62 The caller needs to use or copy the output buffer while the fields are still valid, which 63 is normally until reading the next input line. The program below illustrates the basic use 64 case. It reads stdin and outputs fields [3, 0, 2], in that order. (See also joinAppend, 65 below, which has a performance improvement over join used here.) 66 67 --- 68 int main(string[] args) 69 { 70 import tsv_utils.common.utils; 71 import std.algorithm, std.array, std.range, std.stdio; 72 size_t[] fieldIndicies = [3, 0, 2]; 73 auto fieldReordering = new InputFieldReordering!char(fieldIndicies); 74 foreach (line; stdin.byLine) 75 { 76 fieldReordering.initNewLine; 77 foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate) 78 { 79 fieldReordering.processNextField(fieldIndex, fieldValue); 80 if (fieldReordering.allFieldsFilled) break; 81 } 82 if (fieldReordering.allFieldsFilled) 83 { 84 writeln(fieldReordering.outputFields.join('\t')); 85 } 86 else 87 { 88 writeln("Error: Insufficient number of field on the line."); 89 } 90 } 91 return 0; 92 } 93 --- 94 95 Field indicies are zero-based. An individual field can be listed multiple times. The 96 outputFields array is not valid until all the specified fields have been processed. The 97 allFieldsFilled method tests this. If a line does not have enough fields the outputFields 98 buffer cannot be used. For most TSV applications this is okay, as it means the line is 99 invalid and cannot be used. However, if partial lines are okay, the template can be 100 instantiated with EnablePartialLines.yes. This will ensure that any fields not filled-in 101 are empty strings in the outputFields return. 102 */ 103 final class InputFieldReordering(C, EnablePartialLines partialLinesOk = EnablePartialLines.no) 104 if (isSomeChar!C) 105 { 106 /* Implementation: The class works by creating an array of tuples mapping the input 107 * field index to the location in the outputFields array. The 'fromToMap' array is 108 * sorted in input field order, enabling placement in the outputFields buffer during a 109 * pass over the input fields. The map is created by the constructor. An example: 110 * 111 * inputFieldIndicies: [3, 0, 7, 7, 1, 0, 9] 112 * fromToMap: [<0,1>, <0,5>, <1,4>, <3,0>, <7,2>, <7,3>, <9,6>] 113 * 114 * During processing of an a line, an array slice, mapStack, is used to track how 115 * much of the fromToMap remains to be processed. 116 */ 117 import std.range; 118 import std.typecons : Tuple; 119 120 alias TupleFromTo = Tuple!(size_t, "from", size_t, "to"); 121 122 private C[][] outputFieldsBuf; 123 private TupleFromTo[] fromToMap; 124 private TupleFromTo[] mapStack; 125 126 final this(const ref size_t[] inputFieldIndicies, size_t start = 0) pure nothrow @safe 127 { 128 import std.algorithm : sort; 129 130 outputFieldsBuf = new C[][](inputFieldIndicies.length); 131 fromToMap.reserve(inputFieldIndicies.length); 132 133 foreach (to, from; inputFieldIndicies.enumerate(start)) 134 { 135 fromToMap ~= TupleFromTo(from, to); 136 } 137 138 sort(fromToMap); 139 initNewLine; 140 } 141 142 /** initNewLine initializes the object for a new line. */ 143 final void initNewLine() pure nothrow @safe 144 { 145 mapStack = fromToMap; 146 static if (partialLinesOk) 147 { 148 import std.algorithm : each; 149 outputFieldsBuf.each!((ref s) => s.length = 0); 150 } 151 } 152 153 /** processNextField maps an input field to the correct locations in the outputFields 154 * array. It should be called once for each field on the line, in the order found. 155 */ 156 final size_t processNextField(size_t fieldIndex, C[] fieldValue) pure nothrow @safe @nogc 157 { 158 size_t numFilled = 0; 159 while (!mapStack.empty && fieldIndex == mapStack.front.from) 160 { 161 outputFieldsBuf[mapStack.front.to] = fieldValue; 162 mapStack.popFront; 163 numFilled++; 164 } 165 return numFilled; 166 } 167 168 /** allFieldsFilled returned true if all fields expected have been processed. */ 169 final bool allFieldsFilled() const pure nothrow @safe @nogc 170 { 171 return mapStack.empty; 172 } 173 174 /** outputFields is the assembled output fields. Unless partial lines are enabled, 175 * it is only valid after allFieldsFilled is true. 176 */ 177 final C[][] outputFields() pure nothrow @safe @nogc 178 { 179 return outputFieldsBuf[]; 180 } 181 } 182 183 /* Tests using different character types. */ 184 unittest 185 { 186 import std.conv : to; 187 188 auto inputLines = [["r1f0", "r1f1", "r1f2", "r1f3"], 189 ["r2f0", "abc", "ÀBCßßZ", "ghi"], 190 ["r3f0", "123", "456", "789"]]; 191 192 size_t[] fields_2_0 = [2, 0]; 193 194 auto expected_2_0 = [["r1f2", "r1f0"], 195 ["ÀBCßßZ", "r2f0"], 196 ["456", "r3f0"]]; 197 198 char[][][] charExpected_2_0 = to!(char[][][])(expected_2_0); 199 wchar[][][] wcharExpected_2_0 = to!(wchar[][][])(expected_2_0); 200 dchar[][][] dcharExpected_2_0 = to!(dchar[][][])(expected_2_0); 201 dstring[][] dstringExpected_2_0 = to!(dstring[][])(expected_2_0); 202 203 auto charIFR = new InputFieldReordering!char(fields_2_0); 204 auto wcharIFR = new InputFieldReordering!wchar(fields_2_0); 205 auto dcharIFR = new InputFieldReordering!dchar(fields_2_0); 206 207 foreach (lineIndex, line; inputLines) 208 { 209 charIFR.initNewLine; 210 wcharIFR.initNewLine; 211 dcharIFR.initNewLine; 212 213 foreach (fieldIndex, fieldValue; line) 214 { 215 charIFR.processNextField(fieldIndex, to!(char[])(fieldValue)); 216 wcharIFR.processNextField(fieldIndex, to!(wchar[])(fieldValue)); 217 dcharIFR.processNextField(fieldIndex, to!(dchar[])(fieldValue)); 218 219 assert ((fieldIndex >= 2) == charIFR.allFieldsFilled); 220 assert ((fieldIndex >= 2) == wcharIFR.allFieldsFilled); 221 assert ((fieldIndex >= 2) == dcharIFR.allFieldsFilled); 222 } 223 assert(charIFR.allFieldsFilled); 224 assert(wcharIFR.allFieldsFilled); 225 assert(dcharIFR.allFieldsFilled); 226 227 assert(charIFR.outputFields == charExpected_2_0[lineIndex]); 228 assert(wcharIFR.outputFields == wcharExpected_2_0[lineIndex]); 229 assert(dcharIFR.outputFields == dcharExpected_2_0[lineIndex]); 230 } 231 } 232 233 /* Test of partial line support. */ 234 unittest 235 { 236 import std.conv : to; 237 238 auto inputLines = [["r1f0", "r1f1", "r1f2", "r1f3"], 239 ["r2f0", "abc", "ÀBCßßZ", "ghi"], 240 ["r3f0", "123", "456", "789"]]; 241 242 size_t[] fields_2_0 = [2, 0]; 243 244 // The expected states of the output field while each line and field are processed. 245 auto expectedBylineByfield_2_0 = 246 [ 247 [["", "r1f0"], ["", "r1f0"], ["r1f2", "r1f0"], ["r1f2", "r1f0"]], 248 [["", "r2f0"], ["", "r2f0"], ["ÀBCßßZ", "r2f0"], ["ÀBCßßZ", "r2f0"]], 249 [["", "r3f0"], ["", "r3f0"], ["456", "r3f0"], ["456", "r3f0"]], 250 ]; 251 252 char[][][][] charExpectedBylineByfield_2_0 = to!(char[][][][])(expectedBylineByfield_2_0); 253 254 auto charIFR = new InputFieldReordering!(char, EnablePartialLines.yes)(fields_2_0); 255 256 foreach (lineIndex, line; inputLines) 257 { 258 charIFR.initNewLine; 259 foreach (fieldIndex, fieldValue; line) 260 { 261 charIFR.processNextField(fieldIndex, to!(char[])(fieldValue)); 262 assert(charIFR.outputFields == charExpectedBylineByfield_2_0[lineIndex][fieldIndex]); 263 } 264 } 265 } 266 267 /* Field combination tests. */ 268 unittest 269 { 270 import std.conv : to; 271 import std.stdio; 272 273 auto inputLines = [["00", "01", "02", "03"], 274 ["10", "11", "12", "13"], 275 ["20", "21", "22", "23"]]; 276 277 size_t[] fields_0 = [0]; 278 size_t[] fields_3 = [3]; 279 size_t[] fields_01 = [0, 1]; 280 size_t[] fields_10 = [1, 0]; 281 size_t[] fields_03 = [0, 3]; 282 size_t[] fields_30 = [3, 0]; 283 size_t[] fields_0123 = [0, 1, 2, 3]; 284 size_t[] fields_3210 = [3, 2, 1, 0]; 285 size_t[] fields_03001 = [0, 3, 0, 0, 1]; 286 287 auto expected_0 = to!(char[][][])([["00"], 288 ["10"], 289 ["20"]]); 290 291 auto expected_3 = to!(char[][][])([["03"], 292 ["13"], 293 ["23"]]); 294 295 auto expected_01 = to!(char[][][])([["00", "01"], 296 ["10", "11"], 297 ["20", "21"]]); 298 299 auto expected_10 = to!(char[][][])([["01", "00"], 300 ["11", "10"], 301 ["21", "20"]]); 302 303 auto expected_03 = to!(char[][][])([["00", "03"], 304 ["10", "13"], 305 ["20", "23"]]); 306 307 auto expected_30 = to!(char[][][])([["03", "00"], 308 ["13", "10"], 309 ["23", "20"]]); 310 311 auto expected_0123 = to!(char[][][])([["00", "01", "02", "03"], 312 ["10", "11", "12", "13"], 313 ["20", "21", "22", "23"]]); 314 315 auto expected_3210 = to!(char[][][])([["03", "02", "01", "00"], 316 ["13", "12", "11", "10"], 317 ["23", "22", "21", "20"]]); 318 319 auto expected_03001 = to!(char[][][])([["00", "03", "00", "00", "01"], 320 ["10", "13", "10", "10", "11"], 321 ["20", "23", "20", "20", "21"]]); 322 323 auto ifr_0 = new InputFieldReordering!char(fields_0); 324 auto ifr_3 = new InputFieldReordering!char(fields_3); 325 auto ifr_01 = new InputFieldReordering!char(fields_01); 326 auto ifr_10 = new InputFieldReordering!char(fields_10); 327 auto ifr_03 = new InputFieldReordering!char(fields_03); 328 auto ifr_30 = new InputFieldReordering!char(fields_30); 329 auto ifr_0123 = new InputFieldReordering!char(fields_0123); 330 auto ifr_3210 = new InputFieldReordering!char(fields_3210); 331 auto ifr_03001 = new InputFieldReordering!char(fields_03001); 332 333 foreach (lineIndex, line; inputLines) 334 { 335 ifr_0.initNewLine; 336 ifr_3.initNewLine; 337 ifr_01.initNewLine; 338 ifr_10.initNewLine; 339 ifr_03.initNewLine; 340 ifr_30.initNewLine; 341 ifr_0123.initNewLine; 342 ifr_3210.initNewLine; 343 ifr_03001.initNewLine; 344 345 foreach (fieldIndex, fieldValue; line) 346 { 347 ifr_0.processNextField(fieldIndex, to!(char[])(fieldValue)); 348 ifr_3.processNextField(fieldIndex, to!(char[])(fieldValue)); 349 ifr_01.processNextField(fieldIndex, to!(char[])(fieldValue)); 350 ifr_10.processNextField(fieldIndex, to!(char[])(fieldValue)); 351 ifr_03.processNextField(fieldIndex, to!(char[])(fieldValue)); 352 ifr_30.processNextField(fieldIndex, to!(char[])(fieldValue)); 353 ifr_0123.processNextField(fieldIndex, to!(char[])(fieldValue)); 354 ifr_3210.processNextField(fieldIndex, to!(char[])(fieldValue)); 355 ifr_03001.processNextField(fieldIndex, to!(char[])(fieldValue)); 356 } 357 358 assert(ifr_0.outputFields == expected_0[lineIndex]); 359 assert(ifr_3.outputFields == expected_3[lineIndex]); 360 assert(ifr_01.outputFields == expected_01[lineIndex]); 361 assert(ifr_10.outputFields == expected_10[lineIndex]); 362 assert(ifr_03.outputFields == expected_03[lineIndex]); 363 assert(ifr_30.outputFields == expected_30[lineIndex]); 364 assert(ifr_0123.outputFields == expected_0123[lineIndex]); 365 assert(ifr_3210.outputFields == expected_3210[lineIndex]); 366 assert(ifr_03001.outputFields == expected_03001[lineIndex]); 367 } 368 } 369 370 371 import std.stdio : File, isFileHandle, KeepTerminator; 372 import std.range : isOutputRange; 373 import std.traits : Unqual; 374 375 /** 376 BufferedOutputRange is a performance enhancement over writing directly to an output 377 stream. It holds a File open for write or an OutputRange. Ouput is accumulated in an 378 internal buffer and written to the output stream as a block. 379 380 Writing to stdout is a key use case. BufferedOutputRange is often dramatically faster 381 than writing to stdout directly. This is especially noticable for outputs with short 382 lines, as it blocks many writes together in a single write. 383 384 The internal buffer is written to the output stream after flushSize has been reached. 385 This is checked at newline boundaries, when appendln is called or when put is called 386 with a single newline character. Other writes check maxSize, which is used to avoid 387 runaway buffers. 388 389 390 BufferedOutputRange has a put method allowing it to be used a range. It has a number 391 of other methods providing additional control. 392 393 $(LIST 394 * `this(outputStream [, flushSize, reserveSize, maxSize])` - Constructor. Takes the 395 output stream, e.g. stdout. Other arguments are optional, defaults normally suffice. 396 397 * `append(stuff)` - Append to the internal buffer. 398 399 * `appendln(stuff)` - Append to the internal buffer, followed by a newline. The buffer 400 is flushed to the output stream if is has reached flushSize. 401 402 * `appendln()` - Append a newline to the internal buffer. The buffer is flushed to the 403 output stream if is has reached flushSize. 404 405 * `joinAppend(inputRange, delim)` - An optimization of `append(inputRange.joiner(delim))`. 406 For reasons that are not clear, joiner is quite slow. 407 408 * `flushIfFull()` - Flush the internal buffer to the output stream if flushSize has been 409 reached. 410 411 * `flush()` - Write the internal buffer to the output stream. 412 413 * `put(stuff)` - Appends to the internal buffer. Acts as `appendln()` if passed a single 414 newline character, '\n' or "\n". 415 ) 416 417 The internal buffer is automatically flushed when the BufferedOutputRange goes out of 418 scope. 419 */ 420 struct BufferedOutputRange(OutputTarget) 421 if (isFileHandle!(Unqual!OutputTarget) || isOutputRange!(Unqual!OutputTarget, char)) 422 { 423 import std.range : isOutputRange; 424 import std.array : appender; 425 import std.format : format; 426 427 /* Identify the output element type. Only supporting char and ubyte for now. */ 428 static if (isFileHandle!OutputTarget || isOutputRange!(OutputTarget, char)) 429 { 430 alias C = char; 431 } 432 else static if (isOutputRange!(OutputTarget, ubyte)) 433 { 434 alias C = ubyte; 435 } 436 else static assert(false); 437 438 private enum defaultReserveSize = 11264; 439 private enum defaultFlushSize = 10240; 440 private enum defaultMaxSize = 4194304; 441 442 private OutputTarget _outputTarget; 443 private auto _outputBuffer = appender!(C[]); 444 private immutable size_t _flushSize; 445 private immutable size_t _maxSize; 446 447 this(OutputTarget outputTarget, 448 size_t flushSize = defaultFlushSize, 449 size_t reserveSize = defaultReserveSize, 450 size_t maxSize = defaultMaxSize) 451 { 452 assert(flushSize <= maxSize); 453 454 _outputTarget = outputTarget; 455 _flushSize = flushSize; 456 _maxSize = (flushSize <= maxSize) ? maxSize : flushSize; 457 _outputBuffer.reserve(reserveSize); 458 } 459 460 ~this() 461 { 462 flush(); 463 } 464 465 void flush() 466 { 467 static if (isFileHandle!OutputTarget) _outputTarget.write(_outputBuffer.data); 468 else _outputTarget.put(_outputBuffer.data); 469 470 _outputBuffer.clear; 471 } 472 473 bool flushIfFull() 474 { 475 bool isFull = _outputBuffer.data.length >= _flushSize; 476 if (isFull) flush(); 477 return isFull; 478 } 479 480 /* flushIfMaxSize is a safety check to avoid runaway buffer growth. */ 481 void flushIfMaxSize() 482 { 483 if (_outputBuffer.data.length >= _maxSize) flush(); 484 } 485 486 /* maybeFlush is intended for the case where put is called with a trailing newline. 487 * 488 * Flushing occurs if the buffer has a trailing newline and has reached flush size. 489 * Flushing also occurs if the buffer has reached max size. 490 */ 491 private bool maybeFlush() 492 { 493 immutable bool doFlush = 494 _outputBuffer.data.length >= _flushSize && 495 (_outputBuffer.data[$-1] == '\n' || _outputBuffer.data.length >= _maxSize); 496 497 if (doFlush) flush(); 498 return doFlush; 499 } 500 501 502 private void appendRaw(T)(T stuff) 503 { 504 import std.range : rangePut = put; 505 rangePut(_outputBuffer, stuff); 506 } 507 508 void append(T)(T stuff) 509 { 510 appendRaw(stuff); 511 maybeFlush(); 512 } 513 514 bool appendln() 515 { 516 appendRaw('\n'); 517 return flushIfFull(); 518 } 519 520 bool appendln(T)(T stuff) 521 { 522 appendRaw(stuff); 523 return appendln(); 524 } 525 526 /* joinAppend is an optimization of append(inputRange.joiner(delimiter). 527 * This form is quite a bit faster, 40%+ on some benchmarks. 528 */ 529 void joinAppend(InputRange, E)(InputRange inputRange, E delimiter) 530 if (isInputRange!InputRange && 531 is(ElementType!InputRange : const C[]) && 532 (is(E : const C[]) || is(E : const C))) 533 { 534 if (!inputRange.empty) 535 { 536 appendRaw(inputRange.front); 537 inputRange.popFront; 538 } 539 foreach (x; inputRange) 540 { 541 appendRaw(delimiter); 542 appendRaw(x); 543 } 544 flushIfMaxSize(); 545 } 546 547 /* Make this an output range. */ 548 void put(T)(T stuff) 549 { 550 import std.traits; 551 import std.stdio; 552 553 static if (isSomeChar!T) 554 { 555 if (stuff == '\n') appendln(); 556 else appendRaw(stuff); 557 } 558 else static if (isSomeString!T) 559 { 560 if (stuff == "\n") appendln(); 561 else append(stuff); 562 } 563 else append(stuff); 564 } 565 } 566 567 unittest 568 { 569 import tsv_utils.common.unittest_utils; 570 import std.file : rmdirRecurse, readText; 571 import std.path : buildPath; 572 573 auto testDir = makeUnittestTempDir("tsv_utils_buffered_output"); 574 scope(exit) testDir.rmdirRecurse; 575 576 import std.algorithm : map, joiner; 577 import std.range : iota; 578 import std.conv : to; 579 580 /* Basic test. Note that exiting the scope triggers flush. */ 581 string filepath1 = buildPath(testDir, "file1.txt"); 582 { 583 import std.stdio : File; 584 585 auto ostream = BufferedOutputRange!File(filepath1.File("w")); 586 ostream.append("file1: "); 587 ostream.append("abc"); 588 ostream.append(["def", "ghi", "jkl"]); 589 ostream.appendln(100.to!string); 590 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 591 ostream.appendln(); 592 } 593 assert(filepath1.readText == "file1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 594 595 /* Test with no reserve and no flush at every line. */ 596 string filepath2 = buildPath(testDir, "file2.txt"); 597 { 598 import std.stdio : File; 599 600 auto ostream = BufferedOutputRange!File(filepath2.File("w"), 0, 0); 601 ostream.append("file2: "); 602 ostream.append("abc"); 603 ostream.append(["def", "ghi", "jkl"]); 604 ostream.appendln("100"); 605 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 606 ostream.appendln(); 607 } 608 assert(filepath2.readText == "file2: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 609 610 /* With a locking text writer. Requires version 2.078.0 611 See: https://issues.dlang.org/show_bug.cgi?id=9661 612 */ 613 static if (__VERSION__ >= 2078) 614 { 615 string filepath3 = buildPath(testDir, "file3.txt"); 616 { 617 import std.stdio : File; 618 619 auto ltw = filepath3.File("w").lockingTextWriter; 620 { 621 auto ostream = BufferedOutputRange!(typeof(ltw))(ltw); 622 ostream.append("file3: "); 623 ostream.append("abc"); 624 ostream.append(["def", "ghi", "jkl"]); 625 ostream.appendln("100"); 626 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 627 ostream.appendln(); 628 } 629 } 630 assert(filepath3.readText == "file3: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 631 } 632 633 /* With an Appender. */ 634 import std.array : appender; 635 auto app1 = appender!(char[]); 636 { 637 auto ostream = BufferedOutputRange!(typeof(app1))(app1); 638 ostream.append("appender1: "); 639 ostream.append("abc"); 640 ostream.append(["def", "ghi", "jkl"]); 641 ostream.appendln("100"); 642 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 643 ostream.appendln(); 644 } 645 assert(app1.data == "appender1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 646 647 /* With an Appender, but checking flush boundaries. */ 648 auto app2 = appender!(char[]); 649 { 650 auto ostream = BufferedOutputRange!(typeof(app2))(app2, 10, 0); // Flush if 10+ 651 bool wasFlushed = false; 652 653 assert(app2.data == ""); 654 655 ostream.append("12345678"); // Not flushed yet. 656 assert(app2.data == ""); 657 658 wasFlushed = ostream.appendln; // Nineth char, not flushed yet. 659 assert(!wasFlushed); 660 assert(app2.data == ""); 661 662 wasFlushed = ostream.appendln; // Tenth char, now flushed. 663 assert(wasFlushed); 664 assert(app2.data == "12345678\n\n"); 665 666 app2.clear; 667 assert(app2.data == ""); 668 669 ostream.append("12345678"); 670 671 wasFlushed = ostream.flushIfFull; 672 assert(!wasFlushed); 673 assert(app2.data == ""); 674 675 ostream.flush; 676 assert(app2.data == "12345678"); 677 678 app2.clear; 679 assert(app2.data == ""); 680 681 ostream.append("123456789012345"); 682 assert(app2.data == ""); 683 } 684 assert(app2.data == "123456789012345"); 685 686 /* Using joinAppend. */ 687 auto app1b = appender!(char[]); 688 { 689 auto ostream = BufferedOutputRange!(typeof(app1b))(app1b); 690 ostream.append("appenderB: "); 691 ostream.joinAppend(["a", "bc", "def"], '-'); 692 ostream.append(':'); 693 ostream.joinAppend(["g", "hi", "jkl"], '-'); 694 ostream.appendln("*100*"); 695 ostream.joinAppend(iota(0, 6).map!(x => x.to!string), ' '); 696 ostream.append(' '); 697 ostream.joinAppend(iota(6, 10).map!(x => x.to!string), " "); 698 ostream.appendln(); 699 } 700 assert(app1b.data == "appenderB: a-bc-def:g-hi-jkl*100*\n0 1 2 3 4 5 6 7 8 9\n", 701 "app1b.data: |" ~app1b.data ~ "|"); 702 703 /* Operating as an output range. When passed to a function as a ref, exiting 704 * the function does not flush. When passed as a value, it get flushed when 705 * the function returns. Also test both UCFS and non-UFCS styles. 706 */ 707 708 void outputStuffAsRef(T)(ref T range) 709 if (isOutputRange!(T, char)) 710 { 711 range.put('1'); 712 put(range, "23"); 713 range.put('\n'); 714 range.put(["5", "67"]); 715 put(range, iota(8, 10).map!(x => x.to!string)); 716 put(range, "\n"); 717 } 718 719 void outputStuffAsVal(T)(T range) 720 if (isOutputRange!(T, char)) 721 { 722 put(range, '1'); 723 range.put("23"); 724 put(range, '\n'); 725 put(range, ["5", "67"]); 726 range.put(iota(8, 10).map!(x => x.to!string)); 727 range.put("\n"); 728 } 729 730 auto app3 = appender!(char[]); 731 { 732 auto ostream = BufferedOutputRange!(typeof(app3))(app3, 12, 0); 733 outputStuffAsRef(ostream); 734 assert(app3.data == "", "app3.data: |" ~app3.data ~ "|"); 735 outputStuffAsRef(ostream); 736 assert(app3.data == "123\n56789\n123\n", "app3.data: |" ~app3.data ~ "|"); 737 } 738 assert(app3.data == "123\n56789\n123\n56789\n", "app3.data: |" ~app3.data ~ "|"); 739 740 auto app4 = appender!(char[]); 741 { 742 auto ostream = BufferedOutputRange!(typeof(app4))(app4, 12, 0); 743 outputStuffAsVal(ostream); 744 assert(app4.data == "123\n56789\n", "app4.data: |" ~app4.data ~ "|"); 745 outputStuffAsVal(ostream); 746 assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|"); 747 } 748 assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|"); 749 750 /* Test maxSize. */ 751 auto app5 = appender!(char[]); 752 { 753 auto ostream = BufferedOutputRange!(typeof(app5))(app5, 5, 0, 10); // maxSize 10 754 assert(app5.data == ""); 755 756 ostream.append("1234567"); // Not flushed yet (no newline). 757 assert(app5.data == ""); 758 759 ostream.append("89012"); // Flushed by maxSize 760 assert(app5.data == "123456789012"); 761 762 ostream.put("1234567"); // Not flushed yet (no newline). 763 assert(app5.data == "123456789012"); 764 765 ostream.put("89012"); // Flushed by maxSize 766 assert(app5.data == "123456789012123456789012"); 767 768 ostream.joinAppend(["ab", "cd"], '-'); // Not flushed yet 769 ostream.joinAppend(["de", "gh", "ij"], '-'); // Flushed by maxSize 770 assert(app5.data == "123456789012123456789012ab-cdde-gh-ij"); 771 } 772 assert(app5.data == "123456789012123456789012ab-cdde-gh-ij"); 773 } 774 775 /** 776 bufferedByLine is a performance enhancement over std.stdio.File.byLine. It works by 777 reading a large buffer from the input stream rather than just a single line. 778 779 The file argument needs to be a File object open for reading, typically a filesystem 780 file or standard input. Use the Yes.keepTerminator template parameter to keep the 781 newline. This is similar to stdio.File.byLine, except specified as a template paramter 782 rather than a runtime parameter. 783 784 Reading in blocks does mean that input is not read until a full buffer is available or 785 end-of-file is reached. For this reason, bufferedByLine is not appropriate for 786 interactive input. 787 */ 788 789 auto bufferedByLine(KeepTerminator keepTerminator = No.keepTerminator, Char = char, 790 ubyte terminator = '\n', size_t readSize = 1024 * 128, size_t growSize = 1024 * 16) 791 (File file) 792 if (is(Char == char) || is(Char == ubyte)) 793 { 794 static assert(0 < growSize && growSize <= readSize); 795 796 static final class BufferedByLineImpl 797 { 798 /* Buffer state variables 799 * - _buffer.length - Full length of allocated buffer. 800 * - _dataEnd - End of currently valid data (end of last read). 801 * - _lineStart - Start of current line. 802 * - _lineEnd - End of current line. 803 */ 804 private File _file; 805 private ubyte[] _buffer; 806 private size_t _lineStart = 0; 807 private size_t _lineEnd = 0; 808 private size_t _dataEnd = 0; 809 810 this (File f) 811 { 812 _file = f; 813 _buffer = new ubyte[readSize + growSize]; 814 } 815 816 bool empty() const 817 { 818 return _file.eof && _lineStart == _dataEnd; 819 } 820 821 Char[] front() 822 { 823 assert(!empty, "Attempt to take the front of an empty bufferedByLine."); 824 825 static if (keepTerminator == Yes.keepTerminator) 826 { 827 return cast(Char[]) _buffer[_lineStart .. _lineEnd]; 828 } 829 else 830 { 831 assert(_lineStart < _lineEnd); 832 immutable end = (_buffer[_lineEnd - 1] == terminator) ? _lineEnd - 1 : _lineEnd; 833 return cast(Char[]) _buffer[_lineStart .. end]; 834 } 835 } 836 837 /* Note: Call popFront at initialization to do the initial read. */ 838 void popFront() 839 { 840 import std.algorithm: copy, find; 841 assert(!empty, "Attempt to popFront an empty bufferedByLine."); 842 843 /* Pop the current line. */ 844 _lineStart = _lineEnd; 845 846 /* Set up the next line if more data is available, either in the buffer or 847 * the file. The next line ends at the next newline, if there is one. 848 * 849 * Notes: 850 * - 'find' returns the slice starting with the character searched for, or 851 * an empty range if not found. 852 * - _lineEnd is set to _dataEnd both when the current buffer does not have 853 * a newline and when it ends with one. 854 */ 855 auto found = _buffer[_lineStart .. _dataEnd].find(terminator); 856 _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1; 857 858 if (found.empty && !_file.eof) 859 { 860 /* No newline in current buffer. Read from the file until the next 861 * newline is found. 862 */ 863 assert(_lineEnd == _dataEnd); 864 865 if (_lineStart > 0) 866 { 867 /* Move remaining data to the start of the buffer. */ 868 immutable remainingLength = _dataEnd - _lineStart; 869 copy(_buffer[_lineStart .. _dataEnd], _buffer[0 .. remainingLength]); 870 _lineStart = 0; 871 _lineEnd = _dataEnd = remainingLength; 872 } 873 874 do 875 { 876 /* Grow the buffer if necessary. */ 877 immutable availableSize = _buffer.length - _dataEnd; 878 if (availableSize < readSize) 879 { 880 size_t growBy = growSize; 881 while (availableSize + growBy < readSize) growBy += growSize; 882 _buffer.length += growBy; 883 } 884 885 /* Read the next block. */ 886 _dataEnd += 887 _file.rawRead(_buffer[_dataEnd .. _dataEnd + readSize]) 888 .length; 889 890 found = _buffer[_lineEnd .. _dataEnd].find(terminator); 891 _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1; 892 893 } while (found.empty && !_file.eof); 894 } 895 } 896 } 897 898 assert(file.isOpen, "bufferedByLine passed a closed file."); 899 900 auto r = new BufferedByLineImpl(file); 901 r.popFront; 902 return r; 903 } 904 905 unittest 906 { 907 import std.array : appender; 908 import std.conv : to; 909 import std.file : rmdirRecurse, readText; 910 import std.path : buildPath; 911 import std.range : lockstep; 912 import std.stdio; 913 import tsv_utils.common.unittest_utils; 914 915 auto testDir = makeUnittestTempDir("tsv_utils_buffered_byline"); 916 scope(exit) testDir.rmdirRecurse; 917 918 /* Create two data files with the same data. Read both in parallel with byLine and 919 * bufferedByLine and compare each line. 920 */ 921 auto data1 = appender!(char[])(); 922 923 foreach (i; 1 .. 1001) data1.put('\n'); 924 foreach (i; 1 .. 1001) data1.put("a\n"); 925 foreach (i; 1 .. 1001) { data1.put(i.to!string); data1.put('\n'); } 926 foreach (i; 1 .. 1001) 927 { 928 foreach (j; 1 .. i+1) data1.put('x'); 929 data1.put('\n'); 930 } 931 932 string file1a = buildPath(testDir, "file1a.txt"); 933 string file1b = buildPath(testDir, "file1b.txt"); 934 { 935 936 file1a.File("w").write(data1.data); 937 file1b.File("w").write(data1.data); 938 } 939 940 /* Default parameters. */ 941 { 942 auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator); 943 auto f1bIn = file1b.File().byLine(No.keepTerminator); 944 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 945 } 946 { 947 auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator); 948 auto f1bIn = file1b.File().byLine(Yes.keepTerminator); 949 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 950 } 951 952 /* Smaller read size. This will trigger buffer growth. */ 953 { 954 auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', 512, 256); 955 auto f1bIn = file1b.File().byLine(No.keepTerminator); 956 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 957 } 958 959 /* Exercise boundary cases in buffer growth. 960 * Note: static-foreach requires DMD 2.076 / LDC 1.6 961 */ 962 static foreach (readSize; [1, 2, 4]) 963 { 964 static foreach (growSize; 1 .. readSize + 1) 965 {{ 966 auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 967 auto f1bIn = file1b.File().byLine(No.keepTerminator); 968 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 969 }} 970 static foreach (growSize; 1 .. readSize + 1) 971 {{ 972 auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 973 auto f1bIn = file1b.File().byLine(Yes.keepTerminator); 974 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 975 }} 976 } 977 978 979 /* Files that do not end in a newline. */ 980 981 string file2a = buildPath(testDir, "file2a.txt"); 982 string file2b = buildPath(testDir, "file2b.txt"); 983 string file3a = buildPath(testDir, "file3a.txt"); 984 string file3b = buildPath(testDir, "file3b.txt"); 985 string file4a = buildPath(testDir, "file4a.txt"); 986 string file4b = buildPath(testDir, "file4b.txt"); 987 { 988 file1a.File("w").write("a"); 989 file1b.File("w").write("a"); 990 file2a.File("w").write("ab"); 991 file2b.File("w").write("ab"); 992 file3a.File("w").write("abc"); 993 file3b.File("w").write("abc"); 994 } 995 996 static foreach (readSize; [1, 2, 4]) 997 { 998 static foreach (growSize; 1 .. readSize + 1) 999 {{ 1000 auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 1001 auto f1bIn = file1b.File().byLine(No.keepTerminator); 1002 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1003 1004 auto f2aIn = file2a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 1005 auto f2bIn = file2b.File().byLine(No.keepTerminator); 1006 foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1007 1008 auto f3aIn = file3a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 1009 auto f3bIn = file3b.File().byLine(No.keepTerminator); 1010 foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1011 }} 1012 static foreach (growSize; 1 .. readSize + 1) 1013 {{ 1014 auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 1015 auto f1bIn = file1b.File().byLine(Yes.keepTerminator); 1016 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1017 1018 auto f2aIn = file2a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 1019 auto f2bIn = file2b.File().byLine(Yes.keepTerminator); 1020 foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1021 1022 auto f3aIn = file3a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 1023 auto f3bIn = file3b.File().byLine(Yes.keepTerminator); 1024 foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1025 }} 1026 } 1027 } 1028 1029 /** 1030 joinAppend performs a join operation on an input range, appending the results to 1031 an output range. 1032 1033 Note: The main uses of joinAppend have been replaced by BufferedOutputRange, which has 1034 its own joinAppend method. 1035 1036 joinAppend was written as a performance enhancement over using std.algorithm.joiner 1037 or std.array.join with writeln. Using joiner with writeln is quite slow, 3-4x slower 1038 than std.array.join with writeln. The joiner performance may be due to interaction 1039 with writeln, this was not investigated. Using joiner with stdout.lockingTextWriter 1040 is better, but still substantially slower than join. Using join works reasonably well, 1041 but is allocating memory unnecessarily. 1042 1043 Using joinAppend with Appender is a bit faster than join, and allocates less memory. 1044 The Appender re-uses the underlying data buffer, saving memory. The example below 1045 illustrates. It is a modification of the InputFieldReordering example. The role 1046 Appender plus joinAppend are playing is to buffer the output. BufferedOutputRange 1047 uses a similar technique to buffer multiple lines. 1048 1049 --- 1050 int main(string[] args) 1051 { 1052 import tsvutil; 1053 import std.algorithm, std.array, std.range, std.stdio; 1054 size_t[] fieldIndicies = [3, 0, 2]; 1055 auto fieldReordering = new InputFieldReordering!char(fieldIndicies); 1056 auto outputBuffer = appender!(char[]); 1057 foreach (line; stdin.byLine) 1058 { 1059 fieldReordering.initNewLine; 1060 foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate) 1061 { 1062 fieldReordering.processNextField(fieldIndex, fieldValue); 1063 if (fieldReordering.allFieldsFilled) break; 1064 } 1065 if (fieldReordering.allFieldsFilled) 1066 { 1067 outputBuffer.clear; 1068 writeln(fieldReordering.outputFields.joinAppend(outputBuffer, ('\t'))); 1069 } 1070 else 1071 { 1072 writeln("Error: Insufficient number of field on the line."); 1073 } 1074 } 1075 return 0; 1076 } 1077 --- 1078 */ 1079 OutputRange joinAppend(InputRange, OutputRange, E) 1080 (InputRange inputRange, ref OutputRange outputRange, E delimiter) 1081 if (isInputRange!InputRange && 1082 (is(ElementType!InputRange : const E[]) && 1083 isOutputRange!(OutputRange, E[])) 1084 || 1085 (is(ElementType!InputRange : const E) && 1086 isOutputRange!(OutputRange, E)) 1087 ) 1088 { 1089 if (!inputRange.empty) 1090 { 1091 outputRange.put(inputRange.front); 1092 inputRange.popFront; 1093 } 1094 foreach (x; inputRange) 1095 { 1096 outputRange.put(delimiter); 1097 outputRange.put(x); 1098 } 1099 return outputRange; 1100 } 1101 1102 @safe unittest 1103 { 1104 import std.array : appender; 1105 import std.algorithm : equal; 1106 1107 char[] c1 = ['a', 'b', 'c']; 1108 char[] c2 = ['d', 'e', 'f']; 1109 char[] c3 = ['g', 'h', 'i']; 1110 auto cvec = [c1, c2, c3]; 1111 1112 auto s1 = "abc"; 1113 auto s2 = "def"; 1114 auto s3 = "ghi"; 1115 auto svec = [s1, s2, s3]; 1116 1117 auto charAppender = appender!(char[])(); 1118 1119 assert(cvec.joinAppend(charAppender, '_').data == "abc_def_ghi"); 1120 assert(equal(cvec, [c1, c2, c3])); 1121 1122 charAppender.put('$'); 1123 assert(svec.joinAppend(charAppender, '|').data == "abc_def_ghi$abc|def|ghi"); 1124 assert(equal(cvec, [s1, s2, s3])); 1125 1126 charAppender.clear; 1127 assert(svec.joinAppend(charAppender, '|').data == "abc|def|ghi"); 1128 1129 auto intAppender = appender!(int[])(); 1130 1131 auto i1 = [100, 101, 102]; 1132 auto i2 = [200, 201, 202]; 1133 auto i3 = [300, 301, 302]; 1134 auto ivec = [i1, i2, i3]; 1135 1136 assert(ivec.joinAppend(intAppender, 0).data == 1137 [100, 101, 102, 0, 200, 201, 202, 0, 300, 301, 302]); 1138 1139 intAppender.clear; 1140 assert(i1.joinAppend(intAppender, 0).data == 1141 [100, 0, 101, 0, 102]); 1142 assert(i2.joinAppend(intAppender, 1).data == 1143 [100, 0, 101, 0, 102, 1144 200, 1, 201, 1, 202]); 1145 assert(i3.joinAppend(intAppender, 2).data == 1146 [100, 0, 101, 0, 102, 1147 200, 1, 201, 1, 202, 1148 300, 2, 301, 2, 302]); 1149 } 1150 1151 /** 1152 getTsvFieldValue extracts the value of a single field from a delimited text string. 1153 1154 This is a convenience function intended for cases when only a single field from an 1155 input line is needed. If multiple values are needed, it will be more efficient to 1156 work directly with std.algorithm.splitter or the InputFieldReordering class. 1157 1158 The input text is split by a delimiter character. The specified field is converted 1159 to the desired type and the value returned. 1160 1161 An exception is thrown if there are not enough fields on the line or if conversion 1162 fails. Conversion is done with std.conv.to, it throws a std.conv.ConvException on 1163 failure. If not enough fields, the exception text is generated referencing 1-upped 1164 field numbers as would be provided by command line users. 1165 */ 1166 T getTsvFieldValue(T, C)(const C[] line, size_t fieldIndex, C delim) pure @safe 1167 if (isSomeChar!C) 1168 { 1169 import std.algorithm : splitter; 1170 import std.conv : to; 1171 import std.format : format; 1172 import std.range; 1173 1174 auto splitLine = line.splitter(delim); 1175 size_t atField = 0; 1176 1177 while (atField < fieldIndex && !splitLine.empty) 1178 { 1179 splitLine.popFront; 1180 atField++; 1181 } 1182 1183 T val; 1184 if (splitLine.empty) 1185 { 1186 if (fieldIndex == 0) 1187 { 1188 /* This is a workaround to a splitter special case - If the input is empty, 1189 * the returned split range is empty. This doesn't properly represent a single 1190 * column file. More correct mathematically, and for this case, would be a 1191 * single value representing an empty string. The input line is a convenient 1192 * source of an empty line. Info: 1193 * Bug: https://issues.dlang.org/show_bug.cgi?id=15735 1194 * Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030 1195 */ 1196 assert(line.empty); 1197 val = line.to!T; 1198 } 1199 else 1200 { 1201 throw new Exception( 1202 format("Not enough fields on line. Number required: %d; Number found: %d", 1203 fieldIndex + 1, atField)); 1204 } 1205 } 1206 else 1207 { 1208 val = splitLine.front.to!T; 1209 } 1210 1211 return val; 1212 } 1213 1214 unittest 1215 { 1216 import std.conv : ConvException, to; 1217 import std.exception; 1218 1219 /* Common cases. */ 1220 assert(getTsvFieldValue!double("123", 0, '\t') == 123.0); 1221 assert(getTsvFieldValue!double("-10.5", 0, '\t') == -10.5); 1222 assert(getTsvFieldValue!size_t("abc|123", 1, '|') == 123); 1223 assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99); 1224 assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99); 1225 assert(getTsvFieldValue!string("紅\t红\t99", 2, '\t') == "99"); 1226 assert(getTsvFieldValue!string("紅\t红\t99", 1, '\t') == "红"); 1227 assert(getTsvFieldValue!string("紅\t红\t99", 0, '\t') == "紅"); 1228 assert(getTsvFieldValue!string("红色和绿色\tred and green\t赤と緑\t10.5", 2, '\t') == "赤と緑"); 1229 assert(getTsvFieldValue!double("红色和绿色\tred and green\t赤と緑\t10.5", 3, '\t') == 10.5); 1230 1231 /* The empty field cases. */ 1232 assert(getTsvFieldValue!string("", 0, '\t') == ""); 1233 assert(getTsvFieldValue!string("\t", 0, '\t') == ""); 1234 assert(getTsvFieldValue!string("\t", 1, '\t') == ""); 1235 assert(getTsvFieldValue!string("", 0, ':') == ""); 1236 assert(getTsvFieldValue!string(":", 0, ':') == ""); 1237 assert(getTsvFieldValue!string(":", 1, ':') == ""); 1238 1239 /* Tests with different data types. */ 1240 string stringLine = "orange and black\tნარინჯისფერი და შავი\t88.5"; 1241 char[] charLine = "orange and black\tნარინჯისფერი და შავი\t88.5".to!(char[]); 1242 dchar[] dcharLine = stringLine.to!(dchar[]); 1243 wchar[] wcharLine = stringLine.to!(wchar[]); 1244 1245 assert(getTsvFieldValue!string(stringLine, 0, '\t') == "orange and black"); 1246 assert(getTsvFieldValue!string(stringLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1247 assert(getTsvFieldValue!wstring(stringLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1248 assert(getTsvFieldValue!double(stringLine, 2, '\t') == 88.5); 1249 1250 assert(getTsvFieldValue!string(charLine, 0, '\t') == "orange and black"); 1251 assert(getTsvFieldValue!string(charLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1252 assert(getTsvFieldValue!wstring(charLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1253 assert(getTsvFieldValue!double(charLine, 2, '\t') == 88.5); 1254 1255 assert(getTsvFieldValue!string(dcharLine, 0, '\t') == "orange and black"); 1256 assert(getTsvFieldValue!string(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1257 assert(getTsvFieldValue!wstring(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1258 assert(getTsvFieldValue!double(dcharLine, 2, '\t') == 88.5); 1259 1260 assert(getTsvFieldValue!string(wcharLine, 0, '\t') == "orange and black"); 1261 assert(getTsvFieldValue!string(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1262 assert(getTsvFieldValue!wstring(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1263 assert(getTsvFieldValue!double(wcharLine, 2, '\t') == 88.5); 1264 1265 /* Conversion errors. */ 1266 assertThrown!ConvException(getTsvFieldValue!double("", 0, '\t')); 1267 assertThrown!ConvException(getTsvFieldValue!double("abc", 0, '|')); 1268 assertThrown!ConvException(getTsvFieldValue!size_t("-1", 0, '|')); 1269 assertThrown!ConvException(getTsvFieldValue!size_t("a23|23.4", 1, '|')); 1270 assertThrown!ConvException(getTsvFieldValue!double("23.5|def", 1, '|')); 1271 1272 /* Not enough field errors. These should throw, but not a ConvException.*/ 1273 assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("", 1, '\t'))); 1274 assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc", 1, '\t'))); 1275 assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc\tdef", 2, '\t'))); 1276 } 1277 1278 /** 1279 Field-lists - A field-list is a string entered on the command line identifying one or more 1280 field numbers. They are used by the majority of the tsv utility applications. There are 1281 two helper functions, makeFieldListOptionHandler and parseFieldList. Most applications 1282 will use makeFieldListOptionHandler, it creates a delegate that can be passed to 1283 std.getopt to process the command option. Actual processing of the option text is done by 1284 parseFieldList. It can be called directly when the text of the option value contains more 1285 than just the field number. 1286 1287 Syntax and behavior: 1288 1289 A 'field-list' is a list of numeric field numbers entered on the command line. Fields are 1290 1-upped integers representing locations in an input line, in the traditional meaning of 1291 Unix command line tools. Fields can be entered as single numbers or a range. Multiple 1292 entries are separated by commas. Some examples (with 'fields' as the command line option): 1293 1294 --fields 3 // Single field 1295 --fields 4,1 // Two fields 1296 --fields 3-9 // A range, fields 3 to 9 inclusive 1297 --fields 1,2,7-34,11 // A mix of ranges and fields 1298 --fields 15-5,3-1 // Two ranges in reverse order. 1299 1300 Incomplete ranges are not supported, for example, '6-'. Zero is disallowed as a field 1301 value by default, but can be enabled to support the notion of zero as representing the 1302 entire line. However, zero cannot be part of a range. Field numbers are one-based by 1303 default, but can be converted to zero-based. If conversion to zero-based is enabled, field 1304 number zero must be disallowed or a signed integer type specified for the returned range. 1305 1306 An error is thrown if an invalid field specification is encountered. Error text is 1307 intended for display. Error conditions include: 1308 - Empty fields list 1309 - Empty value, e.g. Two consequtive commas, a trailing comma, or a leading comma 1310 - String that does not parse as a valid integer 1311 - Negative integers, or zero if zero is disallowed. 1312 - An incomplete range 1313 - Zero used as part of a range. 1314 1315 No other behaviors are enforced. Repeated values are accepted. If zero is allowed, other 1316 field numbers can be entered as well. Additional restrictions need to be applied by the 1317 caller. 1318 1319 Notes: 1320 - The data type determines the max field number that can be entered. Enabling conversion 1321 to zero restricts to the signed version of the data type. 1322 - Use 'import std.typecons : Yes, No' to use the convertToZeroBasedIndex and 1323 allowFieldNumZero template parameters. 1324 */ 1325 1326 /** [Yes|No].convertToZeroBasedIndex parameter controls whether field numbers are 1327 * converted to zero-based indices by makeFieldListOptionHander and parseFieldList. 1328 */ 1329 alias ConvertToZeroBasedIndex = Flag!"convertToZeroBasedIndex"; 1330 1331 /** [Yes|No].allowFieldNumZero parameter controls whether zero is a valid field. This is 1332 * used by makeFieldListOptionHander and parseFieldList. 1333 */ 1334 alias AllowFieldNumZero = Flag!"allowFieldNumZero"; 1335 1336 alias OptionHandlerDelegate = void delegate(string option, string value); 1337 1338 /** 1339 makeFieldListOptionHandler creates a std.getopt option hander for processing field lists 1340 entered on the command line. A field list is as defined by parseFieldList. 1341 */ 1342 OptionHandlerDelegate makeFieldListOptionHandler( 1343 T, 1344 ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex, 1345 AllowFieldNumZero allowZero = No.allowFieldNumZero) 1346 (ref T[] fieldsArray) 1347 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T)) 1348 { 1349 void fieldListOptionHandler(ref T[] fieldArray, string option, string value) 1350 { 1351 import std.algorithm : each; 1352 try value.parseFieldList!(T, convertToZero, allowZero).each!(x => fieldArray ~= x); 1353 catch (Exception exc) 1354 { 1355 import std.format : format; 1356 exc.msg = format("[--%s] %s", option, exc.msg); 1357 throw exc; 1358 } 1359 } 1360 1361 return (option, value) => fieldListOptionHandler(fieldsArray, option, value); 1362 } 1363 1364 unittest 1365 { 1366 import std.exception : assertThrown, assertNotThrown; 1367 import std.getopt; 1368 1369 { 1370 size_t[] fields; 1371 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1372 getopt(args, "f|fields", fields.makeFieldListOptionHandler); 1373 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1374 } 1375 { 1376 size_t[] fields; 1377 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1378 getopt(args, 1379 "f|fields", fields.makeFieldListOptionHandler!(size_t, Yes.convertToZeroBasedIndex)); 1380 assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]); 1381 } 1382 { 1383 size_t[] fields; 1384 auto args = ["program", "-f", "0"]; 1385 getopt(args, 1386 "f|fields", fields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1387 assert(fields == [0]); 1388 } 1389 { 1390 size_t[] fields; 1391 auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1"]; 1392 getopt(args, 1393 "f|fields", fields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1394 assert(fields == [0, 1, 0, 0, 1]); 1395 } 1396 { 1397 size_t[] ints; 1398 size_t[] fields; 1399 auto args = ["program", "--ints", "1,2,3", "--fields", "1", "--ints", "4,5,6", "--fields", "2,4,7-9,23-21"]; 1400 std.getopt.arraySep = ","; 1401 getopt(args, 1402 "i|ints", "Built-in list of integers.", &ints, 1403 "f|fields", "Field-list style integers.", fields.makeFieldListOptionHandler); 1404 assert(ints == [1, 2, 3, 4, 5, 6]); 1405 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1406 } 1407 1408 /* Basic cases involved unsinged types smaller than size_t. */ 1409 { 1410 uint[] fields; 1411 auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1", "-f", "55-58"]; 1412 getopt(args, 1413 "f|fields", fields.makeFieldListOptionHandler!(uint, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1414 assert(fields == [0, 1, 0, 0, 1, 55, 56, 57, 58]); 1415 } 1416 { 1417 ushort[] fields; 1418 auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1", "-f", "55-58"]; 1419 getopt(args, 1420 "f|fields", fields.makeFieldListOptionHandler!(ushort, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1421 assert(fields == [0, 1, 0, 0, 1, 55, 56, 57, 58]); 1422 } 1423 1424 /* Basic cases involving unsigned types. */ 1425 { 1426 long[] fields; 1427 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1428 getopt(args, "f|fields", fields.makeFieldListOptionHandler); 1429 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1430 } 1431 { 1432 long[] fields; 1433 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1434 getopt(args, 1435 "f|fields", fields.makeFieldListOptionHandler!(long, Yes.convertToZeroBasedIndex)); 1436 assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]); 1437 } 1438 { 1439 long[] fields; 1440 auto args = ["program", "-f", "0"]; 1441 getopt(args, 1442 "f|fields", fields.makeFieldListOptionHandler!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1443 assert(fields == [-1]); 1444 } 1445 { 1446 int[] fields; 1447 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1448 getopt(args, "f|fields", fields.makeFieldListOptionHandler); 1449 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1450 } 1451 { 1452 int[] fields; 1453 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1454 getopt(args, 1455 "f|fields", fields.makeFieldListOptionHandler!(int, Yes.convertToZeroBasedIndex)); 1456 assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]); 1457 } 1458 { 1459 int[] fields; 1460 auto args = ["program", "-f", "0"]; 1461 getopt(args, 1462 "f|fields", fields.makeFieldListOptionHandler!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1463 assert(fields == [-1]); 1464 } 1465 { 1466 short[] fields; 1467 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1468 getopt(args, "f|fields", fields.makeFieldListOptionHandler); 1469 assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]); 1470 } 1471 { 1472 short[] fields; 1473 auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"]; 1474 getopt(args, 1475 "f|fields", fields.makeFieldListOptionHandler!(short, Yes.convertToZeroBasedIndex)); 1476 assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]); 1477 } 1478 { 1479 short[] fields; 1480 auto args = ["program", "-f", "0"]; 1481 getopt(args, 1482 "f|fields", fields.makeFieldListOptionHandler!(short, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1483 assert(fields == [-1]); 1484 } 1485 1486 { 1487 /* Error cases. */ 1488 size_t[] fields; 1489 auto args = ["program", "-f", "0"]; 1490 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1491 1492 args = ["program", "-f", "-1"]; 1493 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1494 1495 args = ["program", "-f", "--fields", "1"]; 1496 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1497 1498 args = ["program", "-f", "a"]; 1499 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1500 1501 args = ["program", "-f", "1.5"]; 1502 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1503 1504 args = ["program", "-f", "2-"]; 1505 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1506 1507 args = ["program", "-f", "3,5,-7"]; 1508 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1509 1510 args = ["program", "-f", "3,5,"]; 1511 assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler)); 1512 1513 args = ["program", "-f", "-1"]; 1514 assertThrown(getopt(args, 1515 "f|fields", fields.makeFieldListOptionHandler!( 1516 size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero))); 1517 } 1518 } 1519 1520 /** 1521 parseFieldList lazily generates a range of fields numbers from a 'field-list' string. 1522 */ 1523 auto parseFieldList(T = size_t, 1524 ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex, 1525 AllowFieldNumZero allowZero = No.allowFieldNumZero) 1526 (string fieldList, char delim = ',') 1527 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T)) 1528 { 1529 import std.algorithm : splitter; 1530 1531 auto _splitFieldList = fieldList.splitter(delim); 1532 auto _currFieldParse = 1533 (_splitFieldList.empty ? "" : _splitFieldList.front) 1534 .parseFieldRange!(T, convertToZero, allowZero); 1535 1536 if (!_splitFieldList.empty) _splitFieldList.popFront; 1537 1538 struct Result 1539 { 1540 @property bool empty() { return _currFieldParse.empty; } 1541 1542 @property T front() 1543 { 1544 import std.conv : to; 1545 1546 assert(!empty, "Attempting to fetch the front of an empty field-list."); 1547 assert(!_currFieldParse.empty, "Internal error. Call to front with an empty _currFieldParse."); 1548 1549 return _currFieldParse.front.to!T; 1550 } 1551 1552 void popFront() 1553 { 1554 assert(!empty, "Attempting to popFront an empty field-list."); 1555 1556 _currFieldParse.popFront; 1557 if (_currFieldParse.empty && !_splitFieldList.empty) 1558 { 1559 _currFieldParse = _splitFieldList.front.parseFieldRange!(T, convertToZero, allowZero); 1560 _splitFieldList.popFront; 1561 } 1562 } 1563 } 1564 1565 return Result(); 1566 } 1567 1568 unittest 1569 { 1570 import std.algorithm : each, equal; 1571 import std.exception : assertThrown, assertNotThrown; 1572 1573 /* Basic tests. */ 1574 assert("1".parseFieldList.equal([1])); 1575 assert("1,2".parseFieldList.equal([1, 2])); 1576 assert("1,2,3".parseFieldList.equal([1, 2, 3])); 1577 assert("1-2".parseFieldList.equal([1, 2])); 1578 assert("1-2,6-4".parseFieldList.equal([1, 2, 6, 5, 4])); 1579 assert("1-2,1,1-2,2,2-1".parseFieldList.equal([1, 2, 1, 1, 2, 2, 2, 1])); 1580 assert("1-2,5".parseFieldList!size_t.equal([1, 2, 5])); 1581 1582 /* Signed Int tests */ 1583 assert("1".parseFieldList!int.equal([1])); 1584 assert("1,2,3".parseFieldList!int.equal([1, 2, 3])); 1585 assert("1-2".parseFieldList!int.equal([1, 2])); 1586 assert("1-2,6-4".parseFieldList!int.equal([1, 2, 6, 5, 4])); 1587 assert("1-2,5".parseFieldList!int.equal([1, 2, 5])); 1588 1589 /* Convert to zero tests */ 1590 assert("1".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0])); 1591 assert("1,2,3".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 2])); 1592 assert("1-2".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1])); 1593 assert("1-2,6-4".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 5, 4, 3])); 1594 assert("1-2,5".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 4])); 1595 1596 assert("1".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0])); 1597 assert("1,2,3".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 2])); 1598 assert("1-2".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1])); 1599 assert("1-2,6-4".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 5, 4, 3])); 1600 assert("1-2,5".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 4])); 1601 1602 /* Allow zero tests. */ 1603 assert("0".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1604 assert("1,0,3".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 0, 3])); 1605 assert("1-2,5".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 2, 5])); 1606 assert("0".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1607 assert("1,0,3".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 0, 3])); 1608 assert("1-2,5".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 2, 5])); 1609 assert("0".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1])); 1610 assert("1,0,3".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0, -1, 2])); 1611 assert("1-2,5".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0, 1, 4])); 1612 1613 /* Error cases. */ 1614 assertThrown("".parseFieldList.each); 1615 assertThrown(" ".parseFieldList.each); 1616 assertThrown(",".parseFieldList.each); 1617 assertThrown("5 6".parseFieldList.each); 1618 assertThrown(",7".parseFieldList.each); 1619 assertThrown("8,".parseFieldList.each); 1620 assertThrown("8,9,".parseFieldList.each); 1621 assertThrown("10,,11".parseFieldList.each); 1622 assertThrown("".parseFieldList!(long, Yes.convertToZeroBasedIndex).each); 1623 assertThrown("1,2-3,".parseFieldList!(long, Yes.convertToZeroBasedIndex).each); 1624 assertThrown("2-,4".parseFieldList!(long, Yes.convertToZeroBasedIndex).each); 1625 assertThrown("1,2,3,,4".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1626 assertThrown(",7".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1627 assertThrown("8,".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1628 assertThrown("10,0,,11".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1629 assertThrown("8,9,".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1630 1631 assertThrown("0".parseFieldList.each); 1632 assertThrown("1,0,3".parseFieldList.each); 1633 assertThrown("0".parseFieldList!(int, Yes.convertToZeroBasedIndex, No.allowFieldNumZero).each); 1634 assertThrown("1,0,3".parseFieldList!(int, Yes.convertToZeroBasedIndex, No.allowFieldNumZero).each); 1635 assertThrown("0-2,6-0".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1636 assertThrown("0-2,6-0".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1637 assertThrown("0-2,6-0".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each); 1638 } 1639 1640 /* parseFieldRange parses a single number or number range. E.g. '5' or '5-8'. These are 1641 * the values in a field-list separated by a comma or other delimiter. It returns a range 1642 * that iterates over all the values in the range. 1643 */ 1644 private auto parseFieldRange(T = size_t, 1645 ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex, 1646 AllowFieldNumZero allowZero = No.allowFieldNumZero) 1647 (string fieldRange) 1648 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T)) 1649 { 1650 import std.algorithm : findSplit; 1651 import std.conv : to; 1652 import std.format : format; 1653 import std.range : iota; 1654 import std.traits : Signed; 1655 1656 /* Pick the largest compatible integral type for the IOTA range. This must be the 1657 * signed type if convertToZero is true, as a reverse order range may end at -1. 1658 */ 1659 static if (convertToZero) alias S = Signed!T; 1660 else alias S = T; 1661 1662 if (fieldRange.length == 0) throw new Exception("Empty field number."); 1663 1664 auto rangeSplit = findSplit(fieldRange, "-"); 1665 1666 if (!rangeSplit[1].empty && (rangeSplit[0].empty || rangeSplit[2].empty)) 1667 { 1668 // Range starts or ends with a dash. 1669 throw new Exception(format("Incomplete ranges are not supported: '%s'", fieldRange)); 1670 } 1671 1672 S start = rangeSplit[0].to!S; 1673 S last = rangeSplit[1].empty ? start : rangeSplit[2].to!S; 1674 Signed!T increment = (start <= last) ? 1 : -1; 1675 1676 static if (allowZero) 1677 { 1678 if (start == 0 && !rangeSplit[1].empty) 1679 { 1680 throw new Exception(format("Zero cannot be used as part of a range: '%s'", fieldRange)); 1681 } 1682 } 1683 1684 static if (allowZero) 1685 { 1686 if (start < 0 || last < 0) 1687 { 1688 throw new Exception(format("Field numbers must be non-negative integers: '%d'", 1689 (start < 0) ? start : last)); 1690 } 1691 } 1692 else 1693 { 1694 if (start < 1 || last < 1) 1695 { 1696 throw new Exception(format("Field numbers must be greater than zero: '%d'", 1697 (start < 1) ? start : last)); 1698 } 1699 } 1700 1701 static if (convertToZero) 1702 { 1703 start--; 1704 last--; 1705 } 1706 1707 return iota(start, last + increment, increment); 1708 } 1709 1710 unittest // parseFieldRange 1711 { 1712 import std.algorithm : equal; 1713 import std.exception : assertThrown, assertNotThrown; 1714 1715 /* Basic cases */ 1716 assert(parseFieldRange("1").equal([1])); 1717 assert("2".parseFieldRange.equal([2])); 1718 assert("3-4".parseFieldRange.equal([3, 4])); 1719 assert("3-5".parseFieldRange.equal([3, 4, 5])); 1720 assert("4-3".parseFieldRange.equal([4, 3])); 1721 assert("10-1".parseFieldRange.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1722 1723 /* Convert to zero-based indices */ 1724 assert(parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)("1").equal([0])); 1725 assert("2".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([1])); 1726 assert("3-4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([2, 3])); 1727 assert("3-5".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([2, 3, 4])); 1728 assert("4-3".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([3, 2])); 1729 assert("10-1".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])); 1730 1731 /* Allow zero. */ 1732 assert("0".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1733 assert(parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)("1").equal([1])); 1734 assert("3-4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([3, 4])); 1735 assert("10-1".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1736 1737 /* Allow zero, convert to zero-based index. */ 1738 assert("0".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1])); 1739 assert(parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)("1").equal([0])); 1740 assert("3-4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([2, 3])); 1741 assert("10-1".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])); 1742 1743 /* Alternate integer types. */ 1744 assert("2".parseFieldRange!uint.equal([2])); 1745 assert("3-5".parseFieldRange!uint.equal([3, 4, 5])); 1746 assert("10-1".parseFieldRange!uint.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1747 assert("2".parseFieldRange!int.equal([2])); 1748 assert("3-5".parseFieldRange!int.equal([3, 4, 5])); 1749 assert("10-1".parseFieldRange!int.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1750 assert("2".parseFieldRange!ushort.equal([2])); 1751 assert("3-5".parseFieldRange!ushort.equal([3, 4, 5])); 1752 assert("10-1".parseFieldRange!ushort.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1753 assert("2".parseFieldRange!short.equal([2])); 1754 assert("3-5".parseFieldRange!short.equal([3, 4, 5])); 1755 assert("10-1".parseFieldRange!short.equal([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])); 1756 1757 assert("0".parseFieldRange!(long, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1758 assert("0".parseFieldRange!(uint, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1759 assert("0".parseFieldRange!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1760 assert("0".parseFieldRange!(ushort, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1761 assert("0".parseFieldRange!(short, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0])); 1762 assert("0".parseFieldRange!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1])); 1763 assert("0".parseFieldRange!(short, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1])); 1764 1765 /* Max field value cases. */ 1766 assert("65535".parseFieldRange!ushort.equal([65535])); // ushort max 1767 assert("65533-65535".parseFieldRange!ushort.equal([65533, 65534, 65535])); 1768 assert("32767".parseFieldRange!short.equal([32767])); // short max 1769 assert("32765-32767".parseFieldRange!short.equal([32765, 32766, 32767])); 1770 assert("32767".parseFieldRange!(short, Yes.convertToZeroBasedIndex).equal([32766])); 1771 1772 /* Error cases. */ 1773 assertThrown("".parseFieldRange); 1774 assertThrown(" ".parseFieldRange); 1775 assertThrown("-".parseFieldRange); 1776 assertThrown(" -".parseFieldRange); 1777 assertThrown("- ".parseFieldRange); 1778 assertThrown("1-".parseFieldRange); 1779 assertThrown("-2".parseFieldRange); 1780 assertThrown("-1".parseFieldRange); 1781 assertThrown("1.0".parseFieldRange); 1782 assertThrown("0".parseFieldRange); 1783 assertThrown("0-3".parseFieldRange); 1784 assertThrown("-2-4".parseFieldRange); 1785 assertThrown("2--4".parseFieldRange); 1786 assertThrown("2-".parseFieldRange); 1787 assertThrown("a".parseFieldRange); 1788 assertThrown("0x3".parseFieldRange); 1789 assertThrown("3U".parseFieldRange); 1790 assertThrown("1_000".parseFieldRange); 1791 assertThrown(".".parseFieldRange); 1792 1793 assertThrown("".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1794 assertThrown(" ".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1795 assertThrown("-".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1796 assertThrown("1-".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1797 assertThrown("-2".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1798 assertThrown("-1".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1799 assertThrown("0".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1800 assertThrown("0-3".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1801 assertThrown("-2-4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1802 assertThrown("2--4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)); 1803 1804 assertThrown("".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1805 assertThrown(" ".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1806 assertThrown("-".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1807 assertThrown("1-".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1808 assertThrown("-2".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1809 assertThrown("-1".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1810 assertThrown("0-3".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1811 assertThrown("-2-4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1812 1813 assertThrown("".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1814 assertThrown(" ".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1815 assertThrown("-".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1816 assertThrown("1-".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1817 assertThrown("-2".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1818 assertThrown("-1".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1819 assertThrown("0-3".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1820 assertThrown("-2-4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)); 1821 1822 /* Value out of range cases. */ 1823 assertThrown("65536".parseFieldRange!ushort); // One more than ushort max. 1824 assertThrown("65535-65536".parseFieldRange!ushort); 1825 assertThrown("32768".parseFieldRange!short); // One more than short max. 1826 assertThrown("32765-32768".parseFieldRange!short); 1827 // Convert to zero limits signed range. 1828 assertThrown("32768".parseFieldRange!(ushort, Yes.convertToZeroBasedIndex)); 1829 assert("32767".parseFieldRange!(ushort, Yes.convertToZeroBasedIndex).equal([32766])); 1830 } 1831 1832 /** [Yes|No.newlineWasRemoved] is a template parameter to throwIfWindowsNewlineOnUnix. 1833 * A Yes value indicates the Unix newline was already removed, as might be done via 1834 * std.File.byLine or similar mechanism. 1835 */ 1836 alias NewlineWasRemoved = Flag!"newlineWasRemoved"; 1837 1838 /** 1839 throwIfWindowsLineNewlineOnUnix is used to throw an exception if a Windows/DOS 1840 line ending is found on a build compiled for a Unix platform. This is used by 1841 the TSV Utilities to detect Window/DOS line endings and terminate processing 1842 with an error message to the user. 1843 */ 1844 void throwIfWindowsNewlineOnUnix 1845 (NewlineWasRemoved nlWasRemoved = Yes.newlineWasRemoved) 1846 (const char[] line, const char[] filename, size_t lineNum) 1847 { 1848 version(Posix) 1849 { 1850 static if (nlWasRemoved) 1851 { 1852 immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r'; 1853 } 1854 else 1855 { 1856 immutable bool hasWindowsLineEnding = 1857 line.length > 1 && 1858 line[$ - 2] == '\r' && 1859 line[$ - 1] == '\n'; 1860 } 1861 1862 if (hasWindowsLineEnding) 1863 { 1864 import std.format; 1865 throw new Exception( 1866 format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n File: %s, Line: %s", 1867 (filename == "-") ? "Standard Input" : filename, lineNum)); 1868 } 1869 } 1870 } 1871 1872 unittest 1873 { 1874 /* Note: Currently only building on Posix. Need to add non-Posix test cases 1875 * if Windows builds are ever done. 1876 */ 1877 version(Posix) 1878 { 1879 import std.exception; 1880 1881 assertNotThrown(throwIfWindowsNewlineOnUnix("", "afile.tsv", 1)); 1882 assertNotThrown(throwIfWindowsNewlineOnUnix("a", "afile.tsv", 2)); 1883 assertNotThrown(throwIfWindowsNewlineOnUnix("ab", "afile.tsv", 3)); 1884 assertNotThrown(throwIfWindowsNewlineOnUnix("abc", "afile.tsv", 4)); 1885 1886 assertThrown(throwIfWindowsNewlineOnUnix("\r", "afile.tsv", 1)); 1887 assertThrown(throwIfWindowsNewlineOnUnix("a\r", "afile.tsv", 2)); 1888 assertThrown(throwIfWindowsNewlineOnUnix("ab\r", "afile.tsv", 3)); 1889 assertThrown(throwIfWindowsNewlineOnUnix("abc\r", "afile.tsv", 4)); 1890 1891 assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\n", "afile.tsv", 1)); 1892 assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\n", "afile.tsv", 2)); 1893 assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3)); 1894 assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4)); 1895 1896 assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5)); 1897 assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6)); 1898 assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7)); 1899 assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8)); 1900 1901 /* Standard Input formatting. */ 1902 import std.algorithm : endsWith; 1903 bool exceptionCaught = false; 1904 1905 try (throwIfWindowsNewlineOnUnix("\r", "-", 99)); 1906 catch (Exception e) 1907 { 1908 assert(e.msg.endsWith("File: Standard Input, Line: 99")); 1909 exceptionCaught = true; 1910 } 1911 finally 1912 { 1913 assert(exceptionCaught); 1914 exceptionCaught = false; 1915 } 1916 1917 try (throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "-", 99)); 1918 catch (Exception e) 1919 { 1920 assert(e.msg.endsWith("File: Standard Input, Line: 99")); 1921 exceptionCaught = true; 1922 } 1923 finally 1924 { 1925 assert(exceptionCaught); 1926 exceptionCaught = false; 1927 } 1928 } 1929 }