1 /** 2 Utilities used by tsv-utils applications. InputFieldReordering, BufferedOutputRange, 3 and a several others. 4 5 Utilities in this file: 6 $(LIST 7 * [InputFieldReordering] - A class that creates a reordered subset of fields from 8 an input line. Fields in the subset are accessed by array indicies. This is 9 especially useful when processing the subset in a specific order, such as the 10 order listed on the command-line at run-time. 11 12 * [BufferedOutputRange] - An OutputRange with an internal buffer used to buffer 13 output. Intended for use with stdout, it is a significant performance benefit. 14 15 * [isFlushableOutputRange] - Tests if something is an OutputRange with a flush 16 member. 17 18 * [bufferedByLine] - An input range that reads from a File handle line by line. 19 It is similar to the standard library method std.stdio.File.byLine, but quite a 20 bit faster. This is achieved by reading in larger blocks and buffering. 21 22 * [InputSourceRange] - An input range that provides open file access to a set of 23 files. It is used to iterate over files passed as command line arguments. This 24 enable reading header line of a file during command line argument process, then 25 passing the open file to the main processing functions. 26 27 * [ByLineSourceRange] - Similar to an InputSourceRange, except that it provides 28 access to a byLine iterator (bufferedByLine) rather than an open file. This is 29 used by tools that run the same processing logic both header non-header lines. 30 31 * [isBufferableInputSource] - Tests if a file or input range can be read in a 32 buffered fashion by inputSourceByChunk. 33 34 * [inputSourceByChunk] - Returns a range that reads from a file handle (File) or 35 a ubyte input range a chunk at a time. 36 37 * [joinAppend] - A function that performs a join, but appending the join output to 38 an output stream. It is a performance improvement over using join or joiner with 39 writeln. 40 41 * [getTsvFieldValue] - A convenience function when only a single value is needed 42 from an input line. 43 44 * [throwIfWindowsNewline] - A utility for detecting Windows newlines in input. 45 ) 46 47 Copyright (c) 2015-2020, eBay Inc. 48 Initially written by Jon Degenhardt 49 50 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt) 51 */ 52 53 module tsv_utils.common.utils; 54 55 import std.range; 56 import std.stdio : File, isFileHandle, KeepTerminator; 57 import std.traits : isIntegral, isSomeChar, isSomeString, isUnsigned, ReturnType, Unqual; 58 import std.typecons : Flag, No, Yes; 59 60 // InputFieldReording class. 61 62 /** Flag used by the InputFieldReordering template. */ 63 alias EnablePartialLines = Flag!"enablePartialLines"; 64 65 /** 66 InputFieldReordering - Move select fields from an input line to an output array, 67 reordering along the way. 68 69 The InputFieldReordering class is used to reorder a subset of fields from an input line. 70 The caller instantiates an InputFieldReordering object at the start of input processing. 71 The instance contains a mapping from input index to output index, plus a buffer holding 72 the reordered fields. The caller processes each input line by calling initNewLine, 73 splitting the line into fields, and calling processNextField on each field. The output 74 buffer is ready when the allFieldsFilled method returns true. 75 76 Fields are not copied, instead the output buffer points to the fields passed by the caller. 77 The caller needs to use or copy the output buffer while the fields are still valid, which 78 is normally until reading the next input line. The program below illustrates the basic use 79 case. It reads stdin and outputs fields [3, 0, 2], in that order. (See also joinAppend, 80 below, which has a performance improvement over join used here.) 81 82 --- 83 int main(string[] args) 84 { 85 import tsv_utils.common.utils; 86 import std.algorithm, std.array, std.range, std.stdio; 87 size_t[] fieldIndicies = [3, 0, 2]; 88 auto fieldReordering = new InputFieldReordering!char(fieldIndicies); 89 foreach (line; stdin.byLine) 90 { 91 fieldReordering.initNewLine; 92 foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate) 93 { 94 fieldReordering.processNextField(fieldIndex, fieldValue); 95 if (fieldReordering.allFieldsFilled) break; 96 } 97 if (fieldReordering.allFieldsFilled) 98 { 99 writeln(fieldReordering.outputFields.join('\t')); 100 } 101 else 102 { 103 writeln("Error: Insufficient number of field on the line."); 104 } 105 } 106 return 0; 107 } 108 --- 109 110 Field indicies are zero-based. An individual field can be listed multiple times. The 111 outputFields array is not valid until all the specified fields have been processed. The 112 allFieldsFilled method tests this. If a line does not have enough fields the outputFields 113 buffer cannot be used. For most TSV applications this is okay, as it means the line is 114 invalid and cannot be used. However, if partial lines are okay, the template can be 115 instantiated with EnablePartialLines.yes. This will ensure that any fields not filled-in 116 are empty strings in the outputFields return. 117 */ 118 final class InputFieldReordering(C, EnablePartialLines partialLinesOk = EnablePartialLines.no) 119 if (isSomeChar!C) 120 { 121 /* Implementation: The class works by creating an array of tuples mapping the input 122 * field index to the location in the outputFields array. The 'fromToMap' array is 123 * sorted in input field order, enabling placement in the outputFields buffer during a 124 * pass over the input fields. The map is created by the constructor. An example: 125 * 126 * inputFieldIndicies: [3, 0, 7, 7, 1, 0, 9] 127 * fromToMap: [<0,1>, <0,5>, <1,4>, <3,0>, <7,2>, <7,3>, <9,6>] 128 * 129 * During processing of an a line, an array slice, mapStack, is used to track how 130 * much of the fromToMap remains to be processed. 131 */ 132 import std.typecons : Tuple; 133 134 alias TupleFromTo = Tuple!(size_t, "from", size_t, "to"); 135 136 private C[][] outputFieldsBuf; 137 private TupleFromTo[] fromToMap; 138 private TupleFromTo[] mapStack; 139 140 final this(const ref size_t[] inputFieldIndicies, size_t start = 0) pure nothrow @safe 141 { 142 import std.algorithm : sort; 143 144 outputFieldsBuf = new C[][](inputFieldIndicies.length); 145 fromToMap.reserve(inputFieldIndicies.length); 146 147 foreach (to, from; inputFieldIndicies.enumerate(start)) 148 { 149 fromToMap ~= TupleFromTo(from, to); 150 } 151 152 sort(fromToMap); 153 initNewLine; 154 } 155 156 /** initNewLine initializes the object for a new line. */ 157 final void initNewLine() pure nothrow @safe 158 { 159 mapStack = fromToMap; 160 static if (partialLinesOk) 161 { 162 import std.algorithm : each; 163 outputFieldsBuf.each!((ref s) => s.length = 0); 164 } 165 } 166 167 /** processNextField maps an input field to the correct locations in the 168 * outputFields array. 169 * 170 * processNextField should be called once for each field on the line, in the order 171 * found. The processing of the line can terminate once allFieldsFilled returns 172 * true. 173 * 174 * The return value is the number of output fields the input field maps to. Zero 175 * means the field is not mapped to the output fields array. 176 * 177 * If, prior to allFieldsProcessed returning true, any fields on the input line 178 * are not passed to processNextField, the caller should either ensure the fields 179 * are not part of the output fields or have partial lines enabled. 180 */ 181 final size_t processNextField(size_t fieldIndex, C[] fieldValue) pure nothrow @safe @nogc 182 { 183 size_t numFilled = 0; 184 while (!mapStack.empty && fieldIndex == mapStack.front.from) 185 { 186 outputFieldsBuf[mapStack.front.to] = fieldValue; 187 mapStack.popFront; 188 numFilled++; 189 } 190 return numFilled; 191 } 192 193 /** allFieldsFilled returned true if all fields expected have been processed. */ 194 final bool allFieldsFilled() const pure nothrow @safe @nogc 195 { 196 return mapStack.empty; 197 } 198 199 /** outputFields is the assembled output fields. Unless partial lines are enabled, 200 * it is only valid after allFieldsFilled is true. 201 */ 202 final C[][] outputFields() pure nothrow @safe @nogc 203 { 204 return outputFieldsBuf[]; 205 } 206 } 207 208 // InputFieldReordering - Tests using different character types. 209 @safe unittest 210 { 211 import std.conv : to; 212 213 auto inputLines = [["r1f0", "r1f1", "r1f2", "r1f3"], 214 ["r2f0", "abc", "ÀBCßßZ", "ghi"], 215 ["r3f0", "123", "456", "789"]]; 216 217 size_t[] fields_2_0 = [2, 0]; 218 219 auto expected_2_0 = [["r1f2", "r1f0"], 220 ["ÀBCßßZ", "r2f0"], 221 ["456", "r3f0"]]; 222 223 char[][][] charExpected_2_0 = to!(char[][][])(expected_2_0); 224 wchar[][][] wcharExpected_2_0 = to!(wchar[][][])(expected_2_0); 225 dchar[][][] dcharExpected_2_0 = to!(dchar[][][])(expected_2_0); 226 dstring[][] dstringExpected_2_0 = to!(dstring[][])(expected_2_0); 227 228 auto charIFR = new InputFieldReordering!char(fields_2_0); 229 auto wcharIFR = new InputFieldReordering!wchar(fields_2_0); 230 auto dcharIFR = new InputFieldReordering!dchar(fields_2_0); 231 232 foreach (lineIndex, line; inputLines) 233 { 234 charIFR.initNewLine; 235 wcharIFR.initNewLine; 236 dcharIFR.initNewLine; 237 238 foreach (fieldIndex, fieldValue; line) 239 { 240 charIFR.processNextField(fieldIndex, to!(char[])(fieldValue)); 241 wcharIFR.processNextField(fieldIndex, to!(wchar[])(fieldValue)); 242 dcharIFR.processNextField(fieldIndex, to!(dchar[])(fieldValue)); 243 244 assert ((fieldIndex >= 2) == charIFR.allFieldsFilled); 245 assert ((fieldIndex >= 2) == wcharIFR.allFieldsFilled); 246 assert ((fieldIndex >= 2) == dcharIFR.allFieldsFilled); 247 } 248 assert(charIFR.allFieldsFilled); 249 assert(wcharIFR.allFieldsFilled); 250 assert(dcharIFR.allFieldsFilled); 251 252 assert(charIFR.outputFields == charExpected_2_0[lineIndex]); 253 assert(wcharIFR.outputFields == wcharExpected_2_0[lineIndex]); 254 assert(dcharIFR.outputFields == dcharExpected_2_0[lineIndex]); 255 } 256 } 257 258 // InputFieldReordering - Test of partial line support. 259 @safe unittest 260 { 261 import std.conv : to; 262 263 auto inputLines = [["r1f0", "r1f1", "r1f2", "r1f3"], 264 ["r2f0", "abc", "ÀBCßßZ", "ghi"], 265 ["r3f0", "123", "456", "789"]]; 266 267 size_t[] fields_2_0 = [2, 0]; 268 269 // The expected states of the output field while each line and field are processed. 270 auto expectedBylineByfield_2_0 = 271 [ 272 [["", "r1f0"], ["", "r1f0"], ["r1f2", "r1f0"], ["r1f2", "r1f0"]], 273 [["", "r2f0"], ["", "r2f0"], ["ÀBCßßZ", "r2f0"], ["ÀBCßßZ", "r2f0"]], 274 [["", "r3f0"], ["", "r3f0"], ["456", "r3f0"], ["456", "r3f0"]], 275 ]; 276 277 char[][][][] charExpectedBylineByfield_2_0 = to!(char[][][][])(expectedBylineByfield_2_0); 278 279 auto charIFR = new InputFieldReordering!(char, EnablePartialLines.yes)(fields_2_0); 280 281 foreach (lineIndex, line; inputLines) 282 { 283 charIFR.initNewLine; 284 foreach (fieldIndex, fieldValue; line) 285 { 286 charIFR.processNextField(fieldIndex, to!(char[])(fieldValue)); 287 assert(charIFR.outputFields == charExpectedBylineByfield_2_0[lineIndex][fieldIndex]); 288 } 289 } 290 } 291 292 // InputFieldReordering - Field combination tests. 293 @safe unittest 294 { 295 import std.conv : to; 296 import std.stdio; 297 298 auto inputLines = [["00", "01", "02", "03"], 299 ["10", "11", "12", "13"], 300 ["20", "21", "22", "23"]]; 301 302 size_t[] fields_0 = [0]; 303 size_t[] fields_3 = [3]; 304 size_t[] fields_01 = [0, 1]; 305 size_t[] fields_10 = [1, 0]; 306 size_t[] fields_03 = [0, 3]; 307 size_t[] fields_30 = [3, 0]; 308 size_t[] fields_0123 = [0, 1, 2, 3]; 309 size_t[] fields_3210 = [3, 2, 1, 0]; 310 size_t[] fields_03001 = [0, 3, 0, 0, 1]; 311 312 auto expected_0 = to!(char[][][])([["00"], 313 ["10"], 314 ["20"]]); 315 316 auto expected_3 = to!(char[][][])([["03"], 317 ["13"], 318 ["23"]]); 319 320 auto expected_01 = to!(char[][][])([["00", "01"], 321 ["10", "11"], 322 ["20", "21"]]); 323 324 auto expected_10 = to!(char[][][])([["01", "00"], 325 ["11", "10"], 326 ["21", "20"]]); 327 328 auto expected_03 = to!(char[][][])([["00", "03"], 329 ["10", "13"], 330 ["20", "23"]]); 331 332 auto expected_30 = to!(char[][][])([["03", "00"], 333 ["13", "10"], 334 ["23", "20"]]); 335 336 auto expected_0123 = to!(char[][][])([["00", "01", "02", "03"], 337 ["10", "11", "12", "13"], 338 ["20", "21", "22", "23"]]); 339 340 auto expected_3210 = to!(char[][][])([["03", "02", "01", "00"], 341 ["13", "12", "11", "10"], 342 ["23", "22", "21", "20"]]); 343 344 auto expected_03001 = to!(char[][][])([["00", "03", "00", "00", "01"], 345 ["10", "13", "10", "10", "11"], 346 ["20", "23", "20", "20", "21"]]); 347 348 auto ifr_0 = new InputFieldReordering!char(fields_0); 349 auto ifr_3 = new InputFieldReordering!char(fields_3); 350 auto ifr_01 = new InputFieldReordering!char(fields_01); 351 auto ifr_10 = new InputFieldReordering!char(fields_10); 352 auto ifr_03 = new InputFieldReordering!char(fields_03); 353 auto ifr_30 = new InputFieldReordering!char(fields_30); 354 auto ifr_0123 = new InputFieldReordering!char(fields_0123); 355 auto ifr_3210 = new InputFieldReordering!char(fields_3210); 356 auto ifr_03001 = new InputFieldReordering!char(fields_03001); 357 358 foreach (lineIndex, line; inputLines) 359 { 360 ifr_0.initNewLine; 361 ifr_3.initNewLine; 362 ifr_01.initNewLine; 363 ifr_10.initNewLine; 364 ifr_03.initNewLine; 365 ifr_30.initNewLine; 366 ifr_0123.initNewLine; 367 ifr_3210.initNewLine; 368 ifr_03001.initNewLine; 369 370 foreach (fieldIndex, fieldValue; line) 371 { 372 ifr_0.processNextField(fieldIndex, to!(char[])(fieldValue)); 373 ifr_3.processNextField(fieldIndex, to!(char[])(fieldValue)); 374 ifr_01.processNextField(fieldIndex, to!(char[])(fieldValue)); 375 ifr_10.processNextField(fieldIndex, to!(char[])(fieldValue)); 376 ifr_03.processNextField(fieldIndex, to!(char[])(fieldValue)); 377 ifr_30.processNextField(fieldIndex, to!(char[])(fieldValue)); 378 ifr_0123.processNextField(fieldIndex, to!(char[])(fieldValue)); 379 ifr_3210.processNextField(fieldIndex, to!(char[])(fieldValue)); 380 ifr_03001.processNextField(fieldIndex, to!(char[])(fieldValue)); 381 } 382 383 assert(ifr_0.outputFields == expected_0[lineIndex]); 384 assert(ifr_3.outputFields == expected_3[lineIndex]); 385 assert(ifr_01.outputFields == expected_01[lineIndex]); 386 assert(ifr_10.outputFields == expected_10[lineIndex]); 387 assert(ifr_03.outputFields == expected_03[lineIndex]); 388 assert(ifr_30.outputFields == expected_30[lineIndex]); 389 assert(ifr_0123.outputFields == expected_0123[lineIndex]); 390 assert(ifr_3210.outputFields == expected_3210[lineIndex]); 391 assert(ifr_03001.outputFields == expected_03001[lineIndex]); 392 } 393 } 394 395 /** 396 BufferedOutputRange is a performance enhancement over writing directly to an output 397 stream. It holds a File open for write or an OutputRange. Ouput is accumulated in an 398 internal buffer and written to the output stream as a block. 399 400 Writing to stdout is a key use case. BufferedOutputRange is often dramatically faster 401 than writing to stdout directly. This is especially noticable for outputs with short 402 lines, as it blocks many writes together in a single write. 403 404 The internal buffer is written to the output stream after flushSize has been reached. 405 This is checked at newline boundaries, when appendln is called or when put is called 406 with a single newline character. Other writes check maxSize, which is used to avoid 407 runaway buffers. 408 409 BufferedOutputRange has a put method allowing it to be used a range. It has a number 410 of other methods providing additional control. 411 412 $(LIST 413 * `this(outputStream [, flushSize, reserveSize, maxSize])` - Constructor. Takes the 414 output stream, e.g. stdout. Other arguments are optional, defaults normally suffice. 415 416 * `append(stuff)` - Append to the internal buffer. 417 418 * `appendln(stuff)` - Append to the internal buffer, followed by a newline. The buffer 419 is flushed to the output stream if is has reached flushSize. 420 421 * `appendln()` - Append a newline to the internal buffer. The buffer is flushed to the 422 output stream if is has reached flushSize. 423 424 * `joinAppend(inputRange, delim)` - An optimization of `append(inputRange.joiner(delim))`. 425 For reasons that are not clear, joiner is quite slow. 426 427 * `flushIfFull()` - Flush the internal buffer to the output stream if flushSize has been 428 reached. 429 430 * `flush()` - Write the internal buffer to the output stream. 431 432 * `put(stuff)` - Appends to the internal buffer. Acts as `appendln()` if passed a single 433 newline character, '\n' or "\n". 434 ) 435 436 The internal buffer is automatically flushed when the BufferedOutputRange goes out of 437 scope. 438 */ 439 struct BufferedOutputRange(OutputTarget) 440 if (isFileHandle!(Unqual!OutputTarget) || isOutputRange!(Unqual!OutputTarget, char)) 441 { 442 import std.array : appender; 443 import std.format : format; 444 445 /* Identify the output element type. Only supporting char and ubyte for now. */ 446 static if (isFileHandle!OutputTarget || isOutputRange!(OutputTarget, char)) 447 { 448 alias C = char; 449 } 450 else static if (isOutputRange!(OutputTarget, ubyte)) 451 { 452 alias C = ubyte; 453 } 454 else static assert(false); 455 456 private enum defaultReserveSize = 11264; 457 private enum defaultFlushSize = 10240; 458 private enum defaultMaxSize = 4194304; 459 460 private OutputTarget _outputTarget; 461 private auto _outputBuffer = appender!(C[]); 462 private immutable size_t _flushSize; 463 private immutable size_t _maxSize; 464 465 this(OutputTarget outputTarget, 466 size_t flushSize = defaultFlushSize, 467 size_t reserveSize = defaultReserveSize, 468 size_t maxSize = defaultMaxSize) 469 { 470 assert(flushSize <= maxSize); 471 472 _outputTarget = outputTarget; 473 _flushSize = flushSize; 474 _maxSize = (flushSize <= maxSize) ? maxSize : flushSize; 475 _outputBuffer.reserve(reserveSize); 476 } 477 478 ~this() 479 { 480 flush(); 481 } 482 483 void flush() 484 { 485 static if (isFileHandle!OutputTarget) _outputTarget.rawWrite(_outputBuffer.data); 486 else _outputTarget.put(_outputBuffer.data); 487 488 _outputBuffer.clear; 489 } 490 491 bool flushIfFull() 492 { 493 bool isFull = _outputBuffer.data.length >= _flushSize; 494 if (isFull) flush(); 495 return isFull; 496 } 497 498 /* flushIfMaxSize is a safety check to avoid runaway buffer growth. */ 499 void flushIfMaxSize() 500 { 501 if (_outputBuffer.data.length >= _maxSize) flush(); 502 } 503 504 /* maybeFlush is intended for the case where put is called with a trailing newline. 505 * 506 * Flushing occurs if the buffer has a trailing newline and has reached flush size. 507 * Flushing also occurs if the buffer has reached max size. 508 */ 509 private bool maybeFlush() 510 { 511 immutable bool doFlush = 512 _outputBuffer.data.length >= _flushSize && 513 (_outputBuffer.data[$-1] == '\n' || _outputBuffer.data.length >= _maxSize); 514 515 if (doFlush) flush(); 516 return doFlush; 517 } 518 519 520 private void appendRaw(T)(T stuff) pure 521 { 522 import std.range : rangePut = put; 523 rangePut(_outputBuffer, stuff); 524 } 525 526 void append(T)(T stuff) 527 { 528 appendRaw(stuff); 529 maybeFlush(); 530 } 531 532 bool appendln() 533 { 534 appendRaw('\n'); 535 return flushIfFull(); 536 } 537 538 bool appendln(T)(T stuff) 539 { 540 appendRaw(stuff); 541 return appendln(); 542 } 543 544 /* joinAppend is an optimization of append(inputRange.joiner(delimiter). 545 * This form is quite a bit faster, 40%+ on some benchmarks. 546 */ 547 void joinAppend(InputRange, E)(InputRange inputRange, E delimiter) 548 if (isInputRange!InputRange && 549 is(ElementType!InputRange : const C[]) && 550 (is(E : const C[]) || is(E : const C))) 551 { 552 if (!inputRange.empty) 553 { 554 appendRaw(inputRange.front); 555 inputRange.popFront; 556 } 557 foreach (x; inputRange) 558 { 559 appendRaw(delimiter); 560 appendRaw(x); 561 } 562 flushIfMaxSize(); 563 } 564 565 /* Make this an output range. */ 566 void put(T)(T stuff) 567 { 568 import std.traits; 569 import std.stdio; 570 571 static if (isSomeChar!T) 572 { 573 if (stuff == '\n') appendln(); 574 else appendRaw(stuff); 575 } 576 else static if (isSomeString!T) 577 { 578 if (stuff == "\n") appendln(); 579 else append(stuff); 580 } 581 else append(stuff); 582 } 583 } 584 585 // BufferedOutputRange. 586 unittest 587 { 588 import tsv_utils.common.unittest_utils; 589 import std.file : rmdirRecurse, readText; 590 import std.path : buildPath; 591 592 auto testDir = makeUnittestTempDir("tsv_utils_buffered_output"); 593 scope(exit) testDir.rmdirRecurse; 594 595 import std.algorithm : map, joiner; 596 import std.range : iota; 597 import std.conv : to; 598 599 /* Basic test. Note that exiting the scope triggers flush. */ 600 string filepath1 = buildPath(testDir, "file1.txt"); 601 { 602 import std.stdio : File; 603 604 auto ostream = BufferedOutputRange!File(filepath1.File("wb")); 605 ostream.append("file1: "); 606 ostream.append("abc"); 607 ostream.append(["def", "ghi", "jkl"]); 608 ostream.appendln(100.to!string); 609 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 610 ostream.appendln(); 611 } 612 assert(filepath1.readText == "file1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 613 614 /* Test with no reserve and no flush at every line. */ 615 string filepath2 = buildPath(testDir, "file2.txt"); 616 { 617 import std.stdio : File; 618 619 auto ostream = BufferedOutputRange!File(filepath2.File("wb"), 0, 0); 620 ostream.append("file2: "); 621 ostream.append("abc"); 622 ostream.append(["def", "ghi", "jkl"]); 623 ostream.appendln("100"); 624 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 625 ostream.appendln(); 626 } 627 assert(filepath2.readText == "file2: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 628 629 /* With a locking text writer. Requires version 2.078.0 630 See: https://issues.dlang.org/show_bug.cgi?id=9661 631 */ 632 static if (__VERSION__ >= 2078) 633 { 634 string filepath3 = buildPath(testDir, "file3.txt"); 635 { 636 import std.stdio : File; 637 638 auto ltw = filepath3.File("wb").lockingTextWriter; 639 { 640 auto ostream = BufferedOutputRange!(typeof(ltw))(ltw); 641 ostream.append("file3: "); 642 ostream.append("abc"); 643 ostream.append(["def", "ghi", "jkl"]); 644 ostream.appendln("100"); 645 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 646 ostream.appendln(); 647 } 648 } 649 assert(filepath3.readText == "file3: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 650 } 651 652 /* With an Appender. */ 653 import std.array : appender; 654 auto app1 = appender!(char[]); 655 { 656 auto ostream = BufferedOutputRange!(typeof(app1))(app1); 657 ostream.append("appender1: "); 658 ostream.append("abc"); 659 ostream.append(["def", "ghi", "jkl"]); 660 ostream.appendln("100"); 661 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" ")); 662 ostream.appendln(); 663 } 664 assert(app1.data == "appender1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n"); 665 666 /* With an Appender, but checking flush boundaries. */ 667 auto app2 = appender!(char[]); 668 { 669 auto ostream = BufferedOutputRange!(typeof(app2))(app2, 10, 0); // Flush if 10+ 670 bool wasFlushed = false; 671 672 assert(app2.data == ""); 673 674 ostream.append("12345678"); // Not flushed yet. 675 assert(app2.data == ""); 676 677 wasFlushed = ostream.appendln; // Nineth char, not flushed yet. 678 assert(!wasFlushed); 679 assert(app2.data == ""); 680 681 wasFlushed = ostream.appendln; // Tenth char, now flushed. 682 assert(wasFlushed); 683 assert(app2.data == "12345678\n\n"); 684 685 app2.clear; 686 assert(app2.data == ""); 687 688 ostream.append("12345678"); 689 690 wasFlushed = ostream.flushIfFull; 691 assert(!wasFlushed); 692 assert(app2.data == ""); 693 694 ostream.flush; 695 assert(app2.data == "12345678"); 696 697 app2.clear; 698 assert(app2.data == ""); 699 700 ostream.append("123456789012345"); 701 assert(app2.data == ""); 702 } 703 assert(app2.data == "123456789012345"); 704 705 /* Using joinAppend. */ 706 auto app1b = appender!(char[]); 707 { 708 auto ostream = BufferedOutputRange!(typeof(app1b))(app1b); 709 ostream.append("appenderB: "); 710 ostream.joinAppend(["a", "bc", "def"], '-'); 711 ostream.append(':'); 712 ostream.joinAppend(["g", "hi", "jkl"], '-'); 713 ostream.appendln("*100*"); 714 ostream.joinAppend(iota(0, 6).map!(x => x.to!string), ' '); 715 ostream.append(' '); 716 ostream.joinAppend(iota(6, 10).map!(x => x.to!string), " "); 717 ostream.appendln(); 718 } 719 assert(app1b.data == "appenderB: a-bc-def:g-hi-jkl*100*\n0 1 2 3 4 5 6 7 8 9\n", 720 "app1b.data: |" ~app1b.data ~ "|"); 721 722 /* Operating as an output range. When passed to a function as a ref, exiting 723 * the function does not flush. When passed as a value, it get flushed when 724 * the function returns. Also test both UCFS and non-UFCS styles. 725 */ 726 727 void outputStuffAsRef(T)(ref T range) 728 if (isOutputRange!(T, char)) 729 { 730 range.put('1'); 731 put(range, "23"); 732 range.put('\n'); 733 range.put(["5", "67"]); 734 put(range, iota(8, 10).map!(x => x.to!string)); 735 put(range, "\n"); 736 } 737 738 void outputStuffAsVal(T)(T range) 739 if (isOutputRange!(T, char)) 740 { 741 put(range, '1'); 742 range.put("23"); 743 put(range, '\n'); 744 put(range, ["5", "67"]); 745 range.put(iota(8, 10).map!(x => x.to!string)); 746 range.put("\n"); 747 } 748 749 auto app3 = appender!(char[]); 750 { 751 auto ostream = BufferedOutputRange!(typeof(app3))(app3, 12, 0); 752 outputStuffAsRef(ostream); 753 assert(app3.data == "", "app3.data: |" ~app3.data ~ "|"); 754 outputStuffAsRef(ostream); 755 assert(app3.data == "123\n56789\n123\n", "app3.data: |" ~app3.data ~ "|"); 756 } 757 assert(app3.data == "123\n56789\n123\n56789\n", "app3.data: |" ~app3.data ~ "|"); 758 759 auto app4 = appender!(char[]); 760 { 761 auto ostream = BufferedOutputRange!(typeof(app4))(app4, 12, 0); 762 outputStuffAsVal(ostream); 763 assert(app4.data == "123\n56789\n", "app4.data: |" ~app4.data ~ "|"); 764 outputStuffAsVal(ostream); 765 assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|"); 766 } 767 assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|"); 768 769 /* Test maxSize. */ 770 auto app5 = appender!(char[]); 771 { 772 auto ostream = BufferedOutputRange!(typeof(app5))(app5, 5, 0, 10); // maxSize 10 773 assert(app5.data == ""); 774 775 ostream.append("1234567"); // Not flushed yet (no newline). 776 assert(app5.data == ""); 777 778 ostream.append("89012"); // Flushed by maxSize 779 assert(app5.data == "123456789012"); 780 781 ostream.put("1234567"); // Not flushed yet (no newline). 782 assert(app5.data == "123456789012"); 783 784 ostream.put("89012"); // Flushed by maxSize 785 assert(app5.data == "123456789012123456789012"); 786 787 ostream.joinAppend(["ab", "cd"], '-'); // Not flushed yet 788 ostream.joinAppend(["de", "gh", "ij"], '-'); // Flushed by maxSize 789 assert(app5.data == "123456789012123456789012ab-cdde-gh-ij"); 790 } 791 assert(app5.data == "123456789012123456789012ab-cdde-gh-ij"); 792 } 793 794 /** 795 isFlushableOutputRange returns true if R is an output range with a flush member. 796 */ 797 enum bool isFlushableOutputRange(R, E=char) = isOutputRange!(R, E) 798 && is(ReturnType!((R r) => r.flush) == void); 799 800 @safe unittest 801 { 802 import std.array; 803 auto app = appender!(char[]); 804 auto ostream = BufferedOutputRange!(typeof(app))(app, 5, 0, 10); // maxSize 10 805 806 static assert(isOutputRange!(typeof(app), char)); 807 static assert(!isFlushableOutputRange!(typeof(app), char)); 808 static assert(!isFlushableOutputRange!(typeof(app))); 809 810 static assert(isOutputRange!(typeof(ostream), char)); 811 static assert(isFlushableOutputRange!(typeof(ostream), char)); 812 static assert(isFlushableOutputRange!(typeof(ostream))); 813 814 static assert(isOutputRange!(Appender!string, string)); 815 static assert(!isFlushableOutputRange!(Appender!string, string)); 816 static assert(!isFlushableOutputRange!(Appender!string)); 817 818 static assert(isOutputRange!(Appender!(char[]), char)); 819 static assert(!isFlushableOutputRange!(Appender!(char[]), char)); 820 static assert(!isFlushableOutputRange!(Appender!(char[]))); 821 822 static assert(isOutputRange!(BufferedOutputRange!(Appender!(char[])), char)); 823 static assert(isFlushableOutputRange!(BufferedOutputRange!(Appender!(char[])))); 824 static assert(isFlushableOutputRange!(BufferedOutputRange!(Appender!(char[])), char)); 825 } 826 827 828 /** 829 bufferedByLine is a performance enhancement over std.stdio.File.byLine. It works by 830 reading a large buffer from the input stream rather than just a single line. 831 832 The file argument needs to be a File object open for reading, typically a filesystem 833 file or standard input. Use the Yes.keepTerminator template parameter to keep the 834 newline. This is similar to stdio.File.byLine, except specified as a template paramter 835 rather than a runtime parameter. 836 837 Reading in blocks does mean that input is not read until a full buffer is available or 838 end-of-file is reached. For this reason, bufferedByLine is not appropriate for 839 interactive input. 840 */ 841 842 auto bufferedByLine(KeepTerminator keepTerminator = No.keepTerminator, Char = char, 843 ubyte terminator = '\n', size_t readSize = 1024 * 128, size_t growSize = 1024 * 16) 844 (File file) 845 if (is(Char == char) || is(Char == ubyte)) 846 { 847 static assert(0 < growSize && growSize <= readSize); 848 849 static final class BufferedByLineImpl 850 { 851 /* Buffer state variables 852 * - _buffer.length - Full length of allocated buffer. 853 * - _dataEnd - End of currently valid data (end of last read). 854 * - _lineStart - Start of current line. 855 * - _lineEnd - End of current line. 856 */ 857 private File _file; 858 private ubyte[] _buffer; 859 private size_t _lineStart = 0; 860 private size_t _lineEnd = 0; 861 private size_t _dataEnd = 0; 862 863 this (File f) 864 { 865 _file = f; 866 _buffer = new ubyte[readSize + growSize]; 867 } 868 869 bool empty() const pure 870 { 871 return _file.eof && _lineStart == _dataEnd; 872 } 873 874 Char[] front() pure 875 { 876 assert(!empty, "Attempt to take the front of an empty bufferedByLine."); 877 878 static if (keepTerminator == Yes.keepTerminator) 879 { 880 return cast(Char[]) _buffer[_lineStart .. _lineEnd]; 881 } 882 else 883 { 884 assert(_lineStart < _lineEnd); 885 immutable end = (_buffer[_lineEnd - 1] == terminator) ? _lineEnd - 1 : _lineEnd; 886 return cast(Char[]) _buffer[_lineStart .. end]; 887 } 888 } 889 890 /* Note: Call popFront at initialization to do the initial read. */ 891 void popFront() 892 { 893 import std.algorithm: copy, find; 894 assert(!empty, "Attempt to popFront an empty bufferedByLine."); 895 896 /* Pop the current line. */ 897 _lineStart = _lineEnd; 898 899 /* Set up the next line if more data is available, either in the buffer or 900 * the file. The next line ends at the next newline, if there is one. 901 * 902 * Notes: 903 * - 'find' returns the slice starting with the character searched for, or 904 * an empty range if not found. 905 * - _lineEnd is set to _dataEnd both when the current buffer does not have 906 * a newline and when it ends with one. 907 */ 908 auto found = _buffer[_lineStart .. _dataEnd].find(terminator); 909 _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1; 910 911 if (found.empty && !_file.eof) 912 { 913 /* No newline in current buffer. Read from the file until the next 914 * newline is found. 915 */ 916 assert(_lineEnd == _dataEnd); 917 918 if (_lineStart > 0) 919 { 920 /* Move remaining data to the start of the buffer. */ 921 immutable remainingLength = _dataEnd - _lineStart; 922 copy(_buffer[_lineStart .. _dataEnd], _buffer[0 .. remainingLength]); 923 _lineStart = 0; 924 _lineEnd = _dataEnd = remainingLength; 925 } 926 927 do 928 { 929 /* Grow the buffer if necessary. */ 930 immutable availableSize = _buffer.length - _dataEnd; 931 if (availableSize < readSize) 932 { 933 size_t growBy = growSize; 934 while (availableSize + growBy < readSize) growBy += growSize; 935 _buffer.length += growBy; 936 } 937 938 /* Read the next block. */ 939 _dataEnd += 940 _file.rawRead(_buffer[_dataEnd .. _dataEnd + readSize]) 941 .length; 942 943 found = _buffer[_lineEnd .. _dataEnd].find(terminator); 944 _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1; 945 946 } while (found.empty && !_file.eof); 947 } 948 } 949 } 950 951 assert(file.isOpen, "bufferedByLine passed a closed file."); 952 953 auto r = new BufferedByLineImpl(file); 954 if (!r.empty) r.popFront; 955 return r; 956 } 957 958 // BufferedByLine. 959 unittest 960 { 961 import std.array : appender; 962 import std.conv : to; 963 import std.file : rmdirRecurse, readText; 964 import std.path : buildPath; 965 import std.range : lockstep; 966 import std.stdio; 967 import tsv_utils.common.unittest_utils; 968 969 auto testDir = makeUnittestTempDir("tsv_utils_buffered_byline"); 970 scope(exit) testDir.rmdirRecurse; 971 972 /* Create two data files with the same data. Read both in parallel with byLine and 973 * bufferedByLine and compare each line. 974 */ 975 auto data1 = appender!(char[])(); 976 977 foreach (i; 1 .. 1001) data1.put('\n'); 978 foreach (i; 1 .. 1001) data1.put("a\n"); 979 foreach (i; 1 .. 1001) { data1.put(i.to!string); data1.put('\n'); } 980 foreach (i; 1 .. 1001) 981 { 982 foreach (j; 1 .. i+1) data1.put('x'); 983 data1.put('\n'); 984 } 985 986 string file1a = buildPath(testDir, "file1a.txt"); 987 string file1b = buildPath(testDir, "file1b.txt"); 988 { 989 auto f1aFH = file1a.File("wb"); 990 f1aFH.write(data1.data); 991 f1aFH.close; 992 993 auto f1bFH = file1b.File("wb"); 994 f1bFH.write(data1.data); 995 f1bFH.close; 996 } 997 998 /* Default parameters. */ 999 { 1000 auto f1aFH = file1a.File(); 1001 auto f1bFH = file1b.File(); 1002 auto f1aIn = f1aFH.bufferedByLine!(No.keepTerminator); 1003 auto f1bIn = f1bFH.byLine(No.keepTerminator); 1004 1005 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1006 1007 f1aFH.close; 1008 f1bFH.close; 1009 } 1010 { 1011 auto f1aFH = file1a.File(); 1012 auto f1bFH = file1b.File(); 1013 auto f1aIn = f1aFH.bufferedByLine!(Yes.keepTerminator); 1014 auto f1bIn = f1bFH.byLine(Yes.keepTerminator); 1015 1016 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1017 1018 f1aFH.close; 1019 f1bFH.close; 1020 } 1021 1022 /* Smaller read size. This will trigger buffer growth. */ 1023 { 1024 auto f1aFH = file1a.File(); 1025 auto f1bFH = file1b.File(); 1026 auto f1aIn = f1aFH.bufferedByLine!(No.keepTerminator, char, '\n', 512, 256); 1027 auto f1bIn = f1bFH.byLine(No.keepTerminator); 1028 1029 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1030 1031 f1aFH.close; 1032 f1bFH.close; 1033 } 1034 1035 /* Exercise boundary cases in buffer growth. 1036 * Note: static-foreach requires DMD 2.076 / LDC 1.6 1037 */ 1038 static foreach (readSize; [1, 2, 4]) 1039 { 1040 static foreach (growSize; 1 .. readSize + 1) 1041 {{ 1042 auto f1aFH = file1a.File(); 1043 auto f1bFH = file1b.File(); 1044 auto f1aIn = f1aFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 1045 auto f1bIn = f1bFH.byLine(No.keepTerminator); 1046 1047 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1048 1049 f1aFH.close; 1050 f1bFH.close; 1051 }} 1052 static foreach (growSize; 1 .. readSize + 1) 1053 {{ 1054 auto f1aFH = file1a.File(); 1055 auto f1bFH = file1b.File(); 1056 auto f1aIn = f1aFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 1057 auto f1bIn = f1bFH.byLine(Yes.keepTerminator); 1058 1059 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1060 1061 f1aFH.close; 1062 f1bFH.close; 1063 }} 1064 } 1065 1066 1067 /* Files that do not end in a newline. */ 1068 1069 string file2a = buildPath(testDir, "file2a.txt"); 1070 string file2b = buildPath(testDir, "file2b.txt"); 1071 string file3a = buildPath(testDir, "file3a.txt"); 1072 string file3b = buildPath(testDir, "file3b.txt"); 1073 string file4a = buildPath(testDir, "file4a.txt"); 1074 string file4b = buildPath(testDir, "file4b.txt"); 1075 1076 { 1077 auto f1aFH = file1a.File("wb"); 1078 f1aFH.write("a"); 1079 f1aFH.close; 1080 } 1081 { 1082 auto f1bFH = file1b.File("wb"); 1083 f1bFH.write("a"); 1084 f1bFH.close; 1085 } 1086 { 1087 auto f2aFH = file2a.File("wb"); 1088 f2aFH.write("ab"); 1089 f2aFH.close; 1090 } 1091 { 1092 auto f2bFH = file2b.File("wb"); 1093 f2bFH.write("ab"); 1094 f2bFH.close; 1095 } 1096 { 1097 auto f3aFH = file3a.File("wb"); 1098 f3aFH.write("abc"); 1099 f3aFH.close; 1100 } 1101 { 1102 auto f3bFH = file3b.File("wb"); 1103 f3bFH.write("abc"); 1104 f3bFH.close; 1105 } 1106 1107 static foreach (readSize; [1, 2, 4]) 1108 { 1109 static foreach (growSize; 1 .. readSize + 1) 1110 {{ 1111 auto f1aFH = file1a.File(); 1112 auto f1bFH = file1b.File(); 1113 auto f1aIn = f1aFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 1114 auto f1bIn = f1bFH.byLine(No.keepTerminator); 1115 1116 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1117 1118 f1aFH.close; 1119 f1bFH.close; 1120 1121 auto f2aFH = file2a.File(); 1122 auto f2bFH = file2b.File(); 1123 auto f2aIn = f2aFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 1124 auto f2bIn = f2bFH.byLine(No.keepTerminator); 1125 1126 foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1127 1128 f2aFH.close; 1129 f2bFH.close; 1130 1131 auto f3aFH = file3a.File(); 1132 auto f3bFH = file3b.File(); 1133 auto f3aIn = f3aFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize); 1134 auto f3bIn = f3bFH.byLine(No.keepTerminator); 1135 1136 foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1137 1138 f3aFH.close; 1139 f3bFH.close; 1140 }} 1141 static foreach (growSize; 1 .. readSize + 1) 1142 {{ 1143 auto f1aFH = file1a.File(); 1144 auto f1bFH = file1b.File(); 1145 auto f1aIn = f1aFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 1146 auto f1bIn = f1bFH.byLine(Yes.keepTerminator); 1147 1148 foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1149 1150 f1aFH.close; 1151 f1bFH.close; 1152 1153 auto f2aFH = file2a.File(); 1154 auto f2bFH = file2b.File(); 1155 auto f2aIn = f2aFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 1156 auto f2bIn = f2bFH.byLine(Yes.keepTerminator); 1157 1158 foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1159 1160 f2aFH.close; 1161 f2bFH.close; 1162 1163 auto f3aFH = file3a.File(); 1164 auto f3bFH = file3b.File(); 1165 auto f3aIn = f3aFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize); 1166 auto f3bIn = f3bFH.byLine(Yes.keepTerminator); 1167 1168 foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b); 1169 1170 f3aFH.close; 1171 f3bFH.close; 1172 }} 1173 } 1174 } 1175 1176 /** 1177 joinAppend performs a join operation on an input range, appending the results to 1178 an output range. 1179 1180 joinAppend was written as a performance enhancement over using std.algorithm.joiner 1181 or std.array.join with writeln. Using joiner with writeln is quite slow, 3-4x slower 1182 than std.array.join with writeln. The joiner performance may be due to interaction 1183 with writeln, this was not investigated. Using joiner with stdout.lockingTextWriter 1184 is better, but still substantially slower than join. Using join works reasonably well, 1185 but is allocating memory unnecessarily. 1186 1187 Using joinAppend with Appender is a bit faster than join, and allocates less memory. 1188 The Appender re-uses the underlying data buffer, saving memory. The example below 1189 illustrates. It is a modification of the InputFieldReordering example. The role 1190 Appender plus joinAppend are playing is to buffer the output. BufferedOutputRange 1191 uses a similar technique to buffer multiple lines. 1192 1193 Note: The original uses joinAppend have been replaced by BufferedOutputRange, which has 1194 its own joinAppend method. However, joinAppend remains useful when constructing internal 1195 buffers where BufferedOutputRange is not appropriate. 1196 1197 --- 1198 int main(string[] args) 1199 { 1200 import tsvutil; 1201 import std.algorithm, std.array, std.range, std.stdio; 1202 size_t[] fieldIndicies = [3, 0, 2]; 1203 auto fieldReordering = new InputFieldReordering!char(fieldIndicies); 1204 auto outputBuffer = appender!(char[]); 1205 foreach (line; stdin.byLine) 1206 { 1207 fieldReordering.initNewLine; 1208 foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate) 1209 { 1210 fieldReordering.processNextField(fieldIndex, fieldValue); 1211 if (fieldReordering.allFieldsFilled) break; 1212 } 1213 if (fieldReordering.allFieldsFilled) 1214 { 1215 outputBuffer.clear; 1216 writeln(fieldReordering.outputFields.joinAppend(outputBuffer, ('\t'))); 1217 } 1218 else 1219 { 1220 writeln("Error: Insufficient number of field on the line."); 1221 } 1222 } 1223 return 0; 1224 } 1225 --- 1226 */ 1227 OutputRange joinAppend(InputRange, OutputRange, E) 1228 (InputRange inputRange, ref OutputRange outputRange, E delimiter) 1229 if (isInputRange!InputRange && 1230 (is(ElementType!InputRange : const E[]) && 1231 isOutputRange!(OutputRange, E[])) 1232 || 1233 (is(ElementType!InputRange : const E) && 1234 isOutputRange!(OutputRange, E)) 1235 ) 1236 { 1237 if (!inputRange.empty) 1238 { 1239 outputRange.put(inputRange.front); 1240 inputRange.popFront; 1241 } 1242 foreach (x; inputRange) 1243 { 1244 outputRange.put(delimiter); 1245 outputRange.put(x); 1246 } 1247 return outputRange; 1248 } 1249 1250 // joinAppend. 1251 @safe unittest 1252 { 1253 import std.array : appender; 1254 import std.algorithm : equal; 1255 1256 char[] c1 = ['a', 'b', 'c']; 1257 char[] c2 = ['d', 'e', 'f']; 1258 char[] c3 = ['g', 'h', 'i']; 1259 auto cvec = [c1, c2, c3]; 1260 1261 auto s1 = "abc"; 1262 auto s2 = "def"; 1263 auto s3 = "ghi"; 1264 auto svec = [s1, s2, s3]; 1265 1266 auto charAppender = appender!(char[])(); 1267 1268 assert(cvec.joinAppend(charAppender, '_').data == "abc_def_ghi"); 1269 assert(equal(cvec, [c1, c2, c3])); 1270 1271 charAppender.put('$'); 1272 assert(svec.joinAppend(charAppender, '|').data == "abc_def_ghi$abc|def|ghi"); 1273 assert(equal(cvec, [s1, s2, s3])); 1274 1275 charAppender.clear; 1276 assert(svec.joinAppend(charAppender, '|').data == "abc|def|ghi"); 1277 1278 auto intAppender = appender!(int[])(); 1279 1280 auto i1 = [100, 101, 102]; 1281 auto i2 = [200, 201, 202]; 1282 auto i3 = [300, 301, 302]; 1283 auto ivec = [i1, i2, i3]; 1284 1285 assert(ivec.joinAppend(intAppender, 0).data == 1286 [100, 101, 102, 0, 200, 201, 202, 0, 300, 301, 302]); 1287 1288 intAppender.clear; 1289 assert(i1.joinAppend(intAppender, 0).data == 1290 [100, 0, 101, 0, 102]); 1291 assert(i2.joinAppend(intAppender, 1).data == 1292 [100, 0, 101, 0, 102, 1293 200, 1, 201, 1, 202]); 1294 assert(i3.joinAppend(intAppender, 2).data == 1295 [100, 0, 101, 0, 102, 1296 200, 1, 201, 1, 202, 1297 300, 2, 301, 2, 302]); 1298 } 1299 1300 /** 1301 getTsvFieldValue extracts the value of a single field from a delimited text string. 1302 1303 This is a convenience function intended for cases when only a single field from an 1304 input line is needed. If multiple values are needed, it will be more efficient to 1305 work directly with std.algorithm.splitter or the InputFieldReordering class. 1306 1307 The input text is split by a delimiter character. The specified field is converted 1308 to the desired type and the value returned. 1309 1310 An exception is thrown if there are not enough fields on the line or if conversion 1311 fails. Conversion is done with std.conv.to, it throws a std.conv.ConvException on 1312 failure. If not enough fields, the exception text is generated referencing 1-upped 1313 field numbers as would be provided by command line users. 1314 */ 1315 T getTsvFieldValue(T, C)(const C[] line, size_t fieldIndex, C delim) 1316 if (isSomeChar!C) 1317 { 1318 import std.algorithm : splitter; 1319 import std.conv : to; 1320 import std.format : format; 1321 import std.range; 1322 1323 auto splitLine = line.splitter(delim); 1324 size_t atField = 0; 1325 1326 while (atField < fieldIndex && !splitLine.empty) 1327 { 1328 splitLine.popFront; 1329 atField++; 1330 } 1331 1332 T val; 1333 if (splitLine.empty) 1334 { 1335 if (fieldIndex == 0) 1336 { 1337 /* This is a workaround to a splitter special case - If the input is empty, 1338 * the returned split range is empty. This doesn't properly represent a single 1339 * column file. More correct mathematically, and for this case, would be a 1340 * single value representing an empty string. The input line is a convenient 1341 * source of an empty line. Info: 1342 * Bug: https://issues.dlang.org/show_bug.cgi?id=15735 1343 * Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030 1344 */ 1345 assert(line.empty); 1346 val = line.to!T; 1347 } 1348 else 1349 { 1350 throw new Exception( 1351 format("Not enough fields on line. Number required: %d; Number found: %d", 1352 fieldIndex + 1, atField)); 1353 } 1354 } 1355 else 1356 { 1357 val = splitLine.front.to!T; 1358 } 1359 1360 return val; 1361 } 1362 1363 // getTsvFieldValue. 1364 @safe unittest 1365 { 1366 import std.conv : ConvException, to; 1367 import std.exception; 1368 1369 /* Common cases. */ 1370 assert(getTsvFieldValue!double("123", 0, '\t') == 123.0); 1371 assert(getTsvFieldValue!double("-10.5", 0, '\t') == -10.5); 1372 assert(getTsvFieldValue!size_t("abc|123", 1, '|') == 123); 1373 assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99); 1374 assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99); 1375 assert(getTsvFieldValue!string("紅\t红\t99", 2, '\t') == "99"); 1376 assert(getTsvFieldValue!string("紅\t红\t99", 1, '\t') == "红"); 1377 assert(getTsvFieldValue!string("紅\t红\t99", 0, '\t') == "紅"); 1378 assert(getTsvFieldValue!string("红色和绿色\tred and green\t赤と緑\t10.5", 2, '\t') == "赤と緑"); 1379 assert(getTsvFieldValue!double("红色和绿色\tred and green\t赤と緑\t10.5", 3, '\t') == 10.5); 1380 1381 /* The empty field cases. */ 1382 assert(getTsvFieldValue!string("", 0, '\t') == ""); 1383 assert(getTsvFieldValue!string("\t", 0, '\t') == ""); 1384 assert(getTsvFieldValue!string("\t", 1, '\t') == ""); 1385 assert(getTsvFieldValue!string("", 0, ':') == ""); 1386 assert(getTsvFieldValue!string(":", 0, ':') == ""); 1387 assert(getTsvFieldValue!string(":", 1, ':') == ""); 1388 1389 /* Tests with different data types. */ 1390 string stringLine = "orange and black\tნარინჯისფერი და შავი\t88.5"; 1391 char[] charLine = "orange and black\tნარინჯისფერი და შავი\t88.5".to!(char[]); 1392 dchar[] dcharLine = stringLine.to!(dchar[]); 1393 wchar[] wcharLine = stringLine.to!(wchar[]); 1394 1395 assert(getTsvFieldValue!string(stringLine, 0, '\t') == "orange and black"); 1396 assert(getTsvFieldValue!string(stringLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1397 assert(getTsvFieldValue!wstring(stringLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1398 assert(getTsvFieldValue!double(stringLine, 2, '\t') == 88.5); 1399 1400 assert(getTsvFieldValue!string(charLine, 0, '\t') == "orange and black"); 1401 assert(getTsvFieldValue!string(charLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1402 assert(getTsvFieldValue!wstring(charLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1403 assert(getTsvFieldValue!double(charLine, 2, '\t') == 88.5); 1404 1405 assert(getTsvFieldValue!string(dcharLine, 0, '\t') == "orange and black"); 1406 assert(getTsvFieldValue!string(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1407 assert(getTsvFieldValue!wstring(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1408 assert(getTsvFieldValue!double(dcharLine, 2, '\t') == 88.5); 1409 1410 assert(getTsvFieldValue!string(wcharLine, 0, '\t') == "orange and black"); 1411 assert(getTsvFieldValue!string(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი"); 1412 assert(getTsvFieldValue!wstring(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring); 1413 assert(getTsvFieldValue!double(wcharLine, 2, '\t') == 88.5); 1414 1415 /* Conversion errors. */ 1416 assertThrown!ConvException(getTsvFieldValue!double("", 0, '\t')); 1417 assertThrown!ConvException(getTsvFieldValue!double("abc", 0, '|')); 1418 assertThrown!ConvException(getTsvFieldValue!size_t("-1", 0, '|')); 1419 assertThrown!ConvException(getTsvFieldValue!size_t("a23|23.4", 1, '|')); 1420 assertThrown!ConvException(getTsvFieldValue!double("23.5|def", 1, '|')); 1421 1422 /* Not enough field errors. These should throw, but not a ConvException.*/ 1423 assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("", 1, '\t'))); 1424 assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc", 1, '\t'))); 1425 assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc\tdef", 2, '\t'))); 1426 } 1427 1428 /** 1429 Yes|No.newlineWasRemoved is a template parameter to throwIfWindowsNewline. A Yes 1430 value indicates the Unix newline was already removed, as might be done via 1431 std.File.byLine or similar mechanism. 1432 */ 1433 alias NewlineWasRemoved = Flag!"newlineWasRemoved"; 1434 1435 /** 1436 throwIfWindowsLineNewline throws an exception if the 'line' argument ends with a 1437 Windows/DOS line ending. This is used by TSV Utilities tools to detect Window/DOS 1438 line endings and terminate processing with an error message to the user. 1439 1440 The 'nlWasRemoved' template parameter can be used if a Unix newline character was 1441 already removed. In this case the CR character from a Windows CRLF remains and can be 1442 detected. This is useful when reading files in binary mode, stripping Unix newlines. 1443 */ 1444 void throwIfWindowsNewline 1445 (NewlineWasRemoved nlWasRemoved = Yes.newlineWasRemoved) 1446 (const char[] line, const char[] filename, size_t lineNum) 1447 { 1448 static if (nlWasRemoved) 1449 { 1450 immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r'; 1451 } 1452 else 1453 { 1454 immutable bool hasWindowsLineEnding = 1455 line.length > 1 && 1456 line[$ - 2] == '\r' && 1457 line[$ - 1] == '\n'; 1458 } 1459 1460 if (hasWindowsLineEnding) 1461 { 1462 import std.format; 1463 throw new Exception( 1464 format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n File: %s, Line: %s", 1465 (filename == "-") ? "Standard Input" : filename, lineNum)); 1466 } 1467 } 1468 1469 // throwIfWindowsNewline 1470 @safe unittest 1471 { 1472 import std.exception; 1473 1474 assertNotThrown(throwIfWindowsNewline("", "afile.tsv", 1)); 1475 assertNotThrown(throwIfWindowsNewline("a", "afile.tsv", 2)); 1476 assertNotThrown(throwIfWindowsNewline("ab", "afile.tsv", 3)); 1477 assertNotThrown(throwIfWindowsNewline("abc", "afile.tsv", 4)); 1478 1479 assertThrown(throwIfWindowsNewline("\r", "afile.tsv", 1)); 1480 assertThrown(throwIfWindowsNewline("a\r", "afile.tsv", 2)); 1481 assertThrown(throwIfWindowsNewline("ab\r", "afile.tsv", 3)); 1482 assertThrown(throwIfWindowsNewline("abc\r", "afile.tsv", 4)); 1483 1484 assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("\n", "afile.tsv", 1)); 1485 assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("a\n", "afile.tsv", 2)); 1486 assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3)); 1487 assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4)); 1488 1489 assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5)); 1490 assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6)); 1491 assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7)); 1492 assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8)); 1493 1494 /* Standard Input formatting. */ 1495 import std.algorithm : endsWith; 1496 bool exceptionCaught = false; 1497 1498 try (throwIfWindowsNewline("\r", "-", 99)); 1499 catch (Exception e) 1500 { 1501 assert(e.msg.endsWith("File: Standard Input, Line: 99")); 1502 exceptionCaught = true; 1503 } 1504 finally 1505 { 1506 assert(exceptionCaught); 1507 exceptionCaught = false; 1508 } 1509 1510 try (throwIfWindowsNewline!(No.newlineWasRemoved)("\r\n", "-", 99)); 1511 catch (Exception e) 1512 { 1513 assert(e.msg.endsWith("File: Standard Input, Line: 99")); 1514 exceptionCaught = true; 1515 } 1516 finally 1517 { 1518 assert(exceptionCaught); 1519 exceptionCaught = false; 1520 } 1521 } 1522 1523 /** Flag used by InputSourceRange to determine if the header line should be when 1524 opening a file. 1525 */ 1526 alias ReadHeader = Flag!"readHeader"; 1527 1528 /** 1529 inputSourceRange is a helper function for creating new InputSourceRange objects. 1530 */ 1531 InputSourceRange inputSourceRange(string[] filepaths, ReadHeader readHeader) 1532 { 1533 return new InputSourceRange(filepaths, readHeader); 1534 } 1535 1536 /** 1537 InputSourceRange is an input range that iterates over a set of input files. 1538 1539 InputSourceRange is used to iterate over a set of files passed on the command line. 1540 Files are automatically opened and closed during iteration. The caller can choose to 1541 have header lines read automatically. 1542 1543 The range is created from a set of filepaths. These filepaths are mapped to 1544 InputSource objects during the iteration. This is what enables automatically opening 1545 and closing files and reading the header line. 1546 1547 The motivation for an InputSourceRange is to provide a standard way to look at the 1548 header line of the first input file during command line argument processing, and then 1549 pass the open input file and the header line along to the main processing functions. 1550 This enables a features like named fields to be implemented in a standard way. 1551 1552 Both InputSourceRange and InputSource are reference objects. This keeps their use 1553 limited to a single iteration over the set of files. The files can be iterated again 1554 by creating a new InputSourceRange against the same filepaths. 1555 1556 Currently, InputSourceRange supports files and standard input. It is possible other 1557 types of input sources will be added in the future. 1558 */ 1559 final class InputSourceRange 1560 { 1561 private string[] _filepaths; 1562 private ReadHeader _readHeader; 1563 private InputSource _front; 1564 1565 this(string[] filepaths, ReadHeader readHeader) 1566 { 1567 _filepaths = filepaths.dup; 1568 _readHeader = readHeader; 1569 _front = null; 1570 1571 if (!_filepaths.empty) 1572 { 1573 _front = new InputSource(_filepaths.front, _readHeader); 1574 _front.open; 1575 _filepaths.popFront; 1576 } 1577 } 1578 1579 size_t length() const pure nothrow @safe 1580 { 1581 return empty ? 0 : _filepaths.length + 1; 1582 } 1583 1584 bool empty() const pure nothrow @safe 1585 { 1586 return _front is null; 1587 } 1588 1589 InputSource front() pure @safe 1590 { 1591 assert(!empty, "Attempt to take the front of an empty InputSourceRange"); 1592 return _front; 1593 } 1594 1595 void popFront() 1596 { 1597 assert(!empty, "Attempt to popFront an empty InputSourceRange"); 1598 1599 _front.close; 1600 1601 if (!_filepaths.empty) 1602 { 1603 _front = new InputSource(_filepaths.front, _readHeader); 1604 _front.open; 1605 _filepaths.popFront; 1606 } 1607 else 1608 { 1609 _front = null; 1610 } 1611 } 1612 } 1613 1614 /** 1615 InputSource is a class of objects produced by iterating over an InputSourceRange. 1616 1617 An InputSource object provides access to the open file currently the front element 1618 of an InputSourceRange. The main methods application code is likely to need are: 1619 1620 $(LIST 1621 * `file()` - Returns the File object. The file will be open for reading as long 1622 InputSource instance is the front element of the InputSourceRange it came from. 1623 1624 * `header(KeepTerminator keepTerminator = No.keepTerminator)` - Returns the 1625 header line from the file. An empty string is returned if InputSource range 1626 was created with readHeader=false. 1627 1628 * `name()` - The name of the input source. The name returned is intended for 1629 user error messages. For files, this is the filepath that was passed to 1630 InputSourceRange. For standard input, it is "Standard Input". 1631 ) 1632 1633 An InputSource is a reference object, so the copies will retain the state of the 1634 InputSourceRange front element. In particular, all copies will have the open 1635 state of the front element of the InputSourceRange. 1636 1637 This class is not intended for use outside the context of an InputSourceRange. 1638 */ 1639 final class InputSource 1640 { 1641 import std.stdio; 1642 1643 private immutable string _filepath; 1644 private immutable bool _isStdin; 1645 private bool _isOpen; 1646 private ReadHeader _readHeader; 1647 private bool _hasBeenOpened; 1648 private string _header; 1649 private File _file; 1650 1651 private this(string filepath, ReadHeader readHeader) pure nothrow @safe 1652 { 1653 _filepath = filepath; 1654 _isStdin = filepath == "-"; 1655 _isOpen = false; 1656 _readHeader = readHeader; 1657 _hasBeenOpened = false; 1658 } 1659 1660 /** file returns the File object held by the InputSource. 1661 * 1662 * The File will be open for reading as long as the InputSource instance is the 1663 * front element of the InputSourceRange it came from. 1664 */ 1665 File file() nothrow @safe 1666 { 1667 return _file; 1668 } 1669 1670 /** isReadHeaderEnabled returns true if the header line is being read. 1671 */ 1672 bool isReadHeaderEnabled() const pure nothrow @safe 1673 { 1674 return _readHeader == Yes.readHeader; 1675 } 1676 1677 /** header returns the header line from the input file. 1678 * 1679 * An empty string is returned if InputSource range was created with 1680 * readHeader=false. 1681 */ 1682 string header(KeepTerminator keepTerminator = No.keepTerminator) const pure nothrow @safe 1683 { 1684 assert(_hasBeenOpened); 1685 return (keepTerminator == Yes.keepTerminator || 1686 _header.length == 0 || 1687 _header[$ - 1] != '\n') ? 1688 _header : _header[0 .. $-1]; 1689 } 1690 1691 /** isHeaderEmpty returns true if there is no data for a header, including the 1692 * terminator. 1693 * 1694 * When headers are being read, this true only if the file is empty. 1695 */ 1696 bool isHeaderEmpty() const pure nothrow @safe 1697 { 1698 assert(_hasBeenOpened); 1699 return _header.empty; 1700 } 1701 1702 /** name returns a user friendly name representing the input source. 1703 * 1704 * For files, it is the filepath provided to InputSourceRange. For standard 1705 * input, it is "Standard Input". (Use isStdin() to test for standard input, 1706 * not name(). 1707 */ 1708 string name() const pure nothrow @safe 1709 { 1710 return _isStdin ? "Standard Input" : _filepath; 1711 } 1712 1713 /** isStdin returns true if the input source is Standard Input, false otherwise. 1714 */ 1715 bool isStdin() const pure nothrow @safe 1716 { 1717 return _isStdin; 1718 } 1719 1720 /** isOpen returns true if the input source is open for reading, false otherwise. 1721 * 1722 * "Open" in this context is whether the InputSource object is currently open, 1723 * meaning that it is the front element of the InputSourceRange that created it. 1724 * 1725 * For files, this is also reflected in the state of the underlying File object. 1726 * However, standard input is never actually closed. 1727 */ 1728 bool isOpen() const pure nothrow @safe 1729 { 1730 return _isOpen; 1731 } 1732 1733 private void open() 1734 { 1735 assert(!_isOpen); 1736 assert(!_hasBeenOpened); 1737 1738 _file = isStdin ? stdin : _filepath.File("rb"); 1739 if (_readHeader) _header = _file.readln; 1740 _isOpen = true; 1741 _hasBeenOpened = true; 1742 } 1743 1744 private void close() 1745 { 1746 if (!_isStdin) _file.close; 1747 _isOpen = false; 1748 } 1749 } 1750 1751 // InputSourceRange and InputSource 1752 unittest 1753 { 1754 import std.algorithm : all, each; 1755 import std.array : appender; 1756 import std.exception : assertThrown; 1757 import std.file : rmdirRecurse; 1758 import std.path : buildPath; 1759 import std.range; 1760 import std.stdio; 1761 import tsv_utils.common.unittest_utils; 1762 1763 auto testDir = makeUnittestTempDir("tsv_utils_input_source_range"); 1764 scope(exit) testDir.rmdirRecurse; 1765 1766 string file0 = buildPath(testDir, "file0.txt"); 1767 string file1 = buildPath(testDir, "file1.txt"); 1768 string file2 = buildPath(testDir, "file2.txt"); 1769 string file3 = buildPath(testDir, "file3.txt"); 1770 1771 string file0Header = ""; 1772 string file1Header = "file 1 header\n"; 1773 string file2Header = "file 2 header\n"; 1774 string file3Header = "file 3 header\n"; 1775 1776 string file0Body = ""; 1777 string file1Body = ""; 1778 string file2Body = "file 2 line 1\n"; 1779 string file3Body = "file 3 line 1\nfile 3 line 2\n"; 1780 1781 string file0Data = file0Header ~ file0Body; 1782 string file1Data = file1Header ~ file1Body; 1783 string file2Data = file2Header ~ file2Body; 1784 string file3Data = file3Header ~ file3Body; 1785 1786 { 1787 file0.File("wb").write(file0Data); 1788 file1.File("wb").write(file1Data); 1789 file2.File("wb").write(file2Data); 1790 file3.File("wb").write(file3Data); 1791 } 1792 1793 auto inputFiles = [file0, file1, file2, file3]; 1794 auto fileHeaders = [file0Header, file1Header, file2Header, file3Header]; 1795 auto fileBodies = [file0Body, file1Body, file2Body, file3Body]; 1796 auto fileData = [file0Data, file1Data, file2Data, file3Data]; 1797 1798 auto readSources = appender!(InputSource[]); 1799 auto buffer = new char[1024]; // Must be large enough to hold the test files. 1800 1801 /* Tests without standard input. Don't want to count on state of standard 1802 * input or modifying it when doing unit tests, so avoid reading from it. 1803 */ 1804 1805 foreach(numFiles; 1 .. inputFiles.length + 1) 1806 { 1807 /* Reading headers. */ 1808 1809 readSources.clear; 1810 auto inputSourcesYesHeader = inputSourceRange(inputFiles[0 .. numFiles], Yes.readHeader); 1811 assert(inputSourcesYesHeader.length == numFiles); 1812 1813 foreach(fileNum, source; inputSourcesYesHeader.enumerate) 1814 { 1815 readSources.put(source); 1816 assert(source.isOpen); 1817 assert(source.file.isOpen); 1818 assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen)); 1819 assert(readSources.data[fileNum].isOpen); 1820 1821 assert(source.header(Yes.keepTerminator) == fileHeaders[fileNum]); 1822 1823 auto headerNoTerminatorLength = fileHeaders[fileNum].length; 1824 if (headerNoTerminatorLength > 0) --headerNoTerminatorLength; 1825 assert(source.header(No.keepTerminator) == 1826 fileHeaders[fileNum][0 .. headerNoTerminatorLength]); 1827 1828 assert(source.name == inputFiles[fileNum]); 1829 assert(!source.isStdin); 1830 assert(source.isReadHeaderEnabled); 1831 1832 assert(source.file.rawRead(buffer) == fileBodies[fileNum]); 1833 } 1834 1835 /* The InputSourceRange is a reference range, consumed by the foreach. */ 1836 assert(inputSourcesYesHeader.empty); 1837 1838 /* Without reading headers. */ 1839 1840 readSources.clear; 1841 auto inputSourcesNoHeader = inputSourceRange(inputFiles[0 .. numFiles], No.readHeader); 1842 assert(inputSourcesNoHeader.length == numFiles); 1843 1844 foreach(fileNum, source; inputSourcesNoHeader.enumerate) 1845 { 1846 readSources.put(source); 1847 assert(source.isOpen); 1848 assert(source.file.isOpen); 1849 assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen)); 1850 assert(readSources.data[fileNum].isOpen); 1851 1852 assert(source.header(Yes.keepTerminator).empty); 1853 assert(source.header(No.keepTerminator).empty); 1854 1855 assert(source.name == inputFiles[fileNum]); 1856 assert(!source.isStdin); 1857 assert(!source.isReadHeaderEnabled); 1858 1859 assert(source.file.rawRead(buffer) == fileData[fileNum]); 1860 } 1861 1862 /* The InputSourceRange is a reference range, consumed by the foreach. */ 1863 assert(inputSourcesNoHeader.empty); 1864 } 1865 1866 /* Tests with standard input. No actual reading in these tests. 1867 */ 1868 1869 readSources.clear; 1870 foreach(fileNum, source; inputSourceRange(["-", "-"], No.readHeader).enumerate) 1871 { 1872 readSources.put(source); 1873 assert(source.isOpen); 1874 assert(source.file.isOpen); 1875 assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen)); // InputSource objects are "closed". 1876 assert(readSources.data[0 .. fileNum].all!(s => s.file.isOpen)); // Actual stdin should not be closed. 1877 assert(readSources.data[fileNum].isOpen); 1878 1879 assert(source.header(Yes.keepTerminator).empty); 1880 assert(source.header(No.keepTerminator).empty); 1881 1882 assert(source.name == "Standard Input"); 1883 assert(source.isStdin); 1884 } 1885 1886 /* Empty filelist. */ 1887 string[] nofiles; 1888 { 1889 auto sources = inputSourceRange(nofiles, No.readHeader); 1890 assert(sources.empty); 1891 } 1892 { 1893 auto sources = inputSourceRange(nofiles, Yes.readHeader); 1894 assert(sources.empty); 1895 } 1896 1897 /* Error cases. */ 1898 assertThrown(inputSourceRange([file0, "no_such_file.txt"], No.readHeader).each); 1899 assertThrown(inputSourceRange(["no_such_file.txt", file1], Yes.readHeader).each); 1900 } 1901 1902 /** 1903 byLineSourceRange is a helper function for creating new byLineSourceRange objects. 1904 */ 1905 auto byLineSourceRange( 1906 KeepTerminator keepTerminator = No.keepTerminator, Char = char, ubyte terminator = '\n') 1907 (string[] filepaths) 1908 if (is(Char == char) || is(Char == ubyte)) 1909 { 1910 return new ByLineSourceRange!(keepTerminator, Char, terminator)(filepaths); 1911 } 1912 1913 /** 1914 ByLineSourceRange is an input range that iterates over a set of input files. It 1915 provides bufferedByLine access to each file. 1916 1917 A ByLineSourceRange is used to iterate over a set of files passed on the command line. 1918 Files are automatically opened and closed during iteration. The front element of the 1919 range provides access to a bufferedByLine for iterating over the lines in the file. 1920 1921 The range is created from a set of filepaths. These filepaths are mapped to 1922 ByLineSource objects during the iteration. This is what enables automatically opening 1923 and closing files and providing bufferedByLine access. 1924 1925 The motivation behind ByLineSourceRange is to provide a standard way to look at the 1926 header line of the first input file during command line argument processing, and then 1927 pass the open input file along to the main processing functions. This enables 1928 features like named fields to be implemented in a standard way. 1929 1930 Access to the first line of the first file is available after creating the 1931 ByLineSourceRange instance. The first file is opened and a bufferedByLine created. 1932 The first line of the first file is via byLine.front (after checking !byLine.empty). 1933 1934 Both ByLineSourceRange and ByLineSource are reference objects. This keeps their use 1935 limited to a single iteration over the set of files. The files can be iterated again 1936 by creating a new InputSourceRange against the same filepaths. 1937 1938 Currently, ByLineSourceRange supports files and standard input. It is possible other 1939 types of input sources will be added in the future. 1940 */ 1941 final class ByLineSourceRange( 1942 KeepTerminator keepTerminator = No.keepTerminator, Char = char, ubyte terminator = '\n') 1943 if (is(Char == char) || is(Char == ubyte)) 1944 { 1945 alias ByLineSourceType = ByLineSource!(keepTerminator, char, terminator); 1946 1947 private string[] _filepaths; 1948 private ByLineSourceType _front; 1949 1950 this(string[] filepaths) 1951 { 1952 _filepaths = filepaths.dup; 1953 _front = null; 1954 1955 if (!_filepaths.empty) 1956 { 1957 _front = new ByLineSourceType(_filepaths.front); 1958 _front.open; 1959 _filepaths.popFront; 1960 } 1961 } 1962 1963 size_t length() const pure nothrow @safe 1964 { 1965 return empty ? 0 : _filepaths.length + 1; 1966 } 1967 1968 bool empty() const pure nothrow @safe 1969 { 1970 return _front is null; 1971 } 1972 1973 ByLineSourceType front() pure @safe 1974 { 1975 assert(!empty, "Attempt to take the front of an empty ByLineSourceRange"); 1976 return _front; 1977 } 1978 1979 void popFront() 1980 { 1981 assert(!empty, "Attempt to popFront an empty ByLineSourceRange"); 1982 1983 _front.close; 1984 1985 if (!_filepaths.empty) 1986 { 1987 _front = new ByLineSourceType(_filepaths.front); 1988 _front.open; 1989 _filepaths.popFront; 1990 } 1991 else 1992 { 1993 _front = null; 1994 } 1995 } 1996 } 1997 1998 /** 1999 ByLineSource is a class of objects produced by iterating over an ByLineSourceRange. 2000 2001 A ByLineSource instance provides a bufferedByLine range for the current the front 2002 element of a ByLineSourceRange. The main methods application code is likely to 2003 need are: 2004 2005 $(LIST 2006 * `byLine()` - Returns the bufferedByLine range accessing the open file. The file 2007 will be open for reading (using the bufferedByLine range) as long as the 2008 ByLineSource instance is the front element of the ByLineSourceRange 2009 it came from. 2010 2011 * `name()` - The name of the input source. The name returned is intended for 2012 user error messages. For files, this is the filepath that was passed to 2013 ByLineSourceRange. For standard input, it is "Standard Input". 2014 ) 2015 2016 A ByLineSource is a reference object, so the copies have the same state as the 2017 ByLineSourceRange front element. In particular, all copies will have the open 2018 state of the front element of the ByLineSourceRange. 2019 2020 This class is not intended for use outside the context of an ByLineSourceRange. 2021 */ 2022 final class ByLineSource( 2023 KeepTerminator keepTerminator, Char = char, ubyte terminator = '\n') 2024 if (is(Char == char) || is(Char == ubyte)) 2025 { 2026 import std.stdio; 2027 import std.traits : ReturnType; 2028 2029 alias newByLineFn = bufferedByLine!(keepTerminator, char, terminator); 2030 alias ByLineType = ReturnType!newByLineFn; 2031 2032 private immutable string _filepath; 2033 private immutable bool _isStdin; 2034 private bool _isOpen; 2035 private bool _hasBeenOpened; 2036 private File _file; 2037 private ByLineType _byLineRange; 2038 2039 private this(string filepath) pure nothrow @safe 2040 { 2041 _filepath = filepath; 2042 _isStdin = filepath == "-"; 2043 _isOpen = false; 2044 _hasBeenOpened = false; 2045 } 2046 2047 /** byLine returns the bufferedByLine object held by the ByLineSource instance. 2048 * 2049 * The File underlying the BufferedByLine object is open for reading as long as 2050 * the ByLineSource instance is the front element of the ByLineSourceRange it 2051 * came from. 2052 */ 2053 ByLineType byLine() nothrow @safe 2054 { 2055 return _byLineRange; 2056 } 2057 2058 /** name returns a user friendly name representing the underlying input source. 2059 * 2060 * For files, it is the filepath provided to ByLineSourceRange. For standard 2061 * input, it is "Standard Input". (Use isStdin() to test for standard input, 2062 * compare against name().) 2063 */ 2064 string name() const pure nothrow @safe 2065 { 2066 return _isStdin ? "Standard Input" : _filepath; 2067 } 2068 2069 /** isStdin returns true if the underlying input source is Standard Input, false 2070 * otherwise. 2071 */ 2072 bool isStdin() const pure nothrow @safe 2073 { 2074 return _isStdin; 2075 } 2076 2077 /** isOpen returns true if the ByLineSource instance is open for reading, false 2078 * otherwise. 2079 * 2080 * "Open" in this context is whether the ByLineSource object is currently "open". 2081 * The underlying input source backing it does not necessarily have the same 2082 * state. The ByLineSource instance is "open" if is the front element of the 2083 * ByLineSourceRange that created it. 2084 * 2085 * The underlying input source object follows the same open/close state as makes 2086 * sense. In particular, real files are closed when the ByLineSource object is 2087 * closed. The exception is standard input, which is never actually closed. 2088 */ 2089 bool isOpen() const pure nothrow @safe 2090 { 2091 return _isOpen; 2092 } 2093 2094 private void open() 2095 { 2096 assert(!_isOpen); 2097 assert(!_hasBeenOpened); 2098 2099 _file = isStdin ? stdin : _filepath.File("rb"); 2100 _byLineRange = newByLineFn(_file); 2101 _isOpen = true; 2102 _hasBeenOpened = true; 2103 } 2104 2105 private void close() 2106 { 2107 if (!_isStdin) _file.close; 2108 _isOpen = false; 2109 } 2110 } 2111 2112 // ByLineSourceRange and ByLineSource 2113 unittest 2114 { 2115 import std.algorithm : all, each; 2116 import std.array : appender; 2117 import std.exception : assertThrown; 2118 import std.file : rmdirRecurse; 2119 import std.path : buildPath; 2120 import std.range; 2121 import std.stdio; 2122 import tsv_utils.common.unittest_utils; 2123 2124 auto testDir = makeUnittestTempDir("tsv_utils_byline_input_source_range"); 2125 scope(exit) testDir.rmdirRecurse; 2126 2127 string file0 = buildPath(testDir, "file0.txt"); 2128 string file1 = buildPath(testDir, "file1.txt"); 2129 string file2 = buildPath(testDir, "file2.txt"); 2130 string file3 = buildPath(testDir, "file3.txt"); 2131 2132 string file0Header = ""; 2133 string file1Header = "file 1 header\n"; 2134 string file2Header = "file 2 header\n"; 2135 string file3Header = "file 3 header\n"; 2136 2137 string file0Body = ""; 2138 string file1Body = ""; 2139 string file2Body = "file 2 line 1\n"; 2140 string file3Body = "file 3 line 1\nfile 3 line 2\n"; 2141 2142 string file0Data = file0Header ~ file0Body; 2143 string file1Data = file1Header ~ file1Body; 2144 string file2Data = file2Header ~ file2Body; 2145 string file3Data = file3Header ~ file3Body; 2146 2147 { 2148 file0.File("wb").write(file0Data); 2149 file1.File("wb").write(file1Data); 2150 file2.File("wb").write(file2Data); 2151 file3.File("wb").write(file3Data); 2152 } 2153 2154 auto inputFiles = [file0, file1, file2, file3]; 2155 auto fileHeaders = [file0Header, file1Header, file2Header, file3Header]; 2156 auto fileBodies = [file0Body, file1Body, file2Body, file3Body]; 2157 auto fileData = [file0Data, file1Data, file2Data, file3Data]; 2158 2159 auto buffer = new char[1024]; // Must be large enough to hold the test files. 2160 2161 /* Tests without standard input. Don't want to count on state of standard 2162 * input or modifying it when doing unit tests, so avoid reading from it. 2163 */ 2164 2165 auto readSourcesNoTerminator = appender!(ByLineSource!(No.keepTerminator)[]); 2166 auto readSourcesYesTerminator = appender!(ByLineSource!(Yes.keepTerminator)[]); 2167 2168 foreach(numFiles; 1 .. inputFiles.length + 1) 2169 { 2170 /* Using No.keepTerminator. */ 2171 readSourcesNoTerminator.clear; 2172 auto inputSourcesNoTerminator = byLineSourceRange!(No.keepTerminator)(inputFiles[0 .. numFiles]); 2173 assert(inputSourcesNoTerminator.length == numFiles); 2174 2175 foreach(fileNum, source; inputSourcesNoTerminator.enumerate) 2176 { 2177 readSourcesNoTerminator.put(source); 2178 assert(source.isOpen); 2179 assert(source._file.isOpen); 2180 assert(readSourcesNoTerminator.data[0 .. fileNum].all!(s => !s.isOpen)); 2181 assert(readSourcesNoTerminator.data[fileNum].isOpen); 2182 2183 auto headerNoTerminatorLength = fileHeaders[fileNum].length; 2184 if (headerNoTerminatorLength > 0) --headerNoTerminatorLength; 2185 2186 assert(source.byLine.empty || 2187 source.byLine.front == fileHeaders[fileNum][0 .. headerNoTerminatorLength]); 2188 2189 assert(source.name == inputFiles[fileNum]); 2190 assert(!source.isStdin); 2191 2192 auto readFileData = appender!(char[]); 2193 foreach(line; source.byLine) 2194 { 2195 readFileData.put(line); 2196 readFileData.put('\n'); 2197 } 2198 2199 assert(readFileData.data == fileData[fileNum]); 2200 } 2201 2202 /* The ByLineSourceRange is a reference range, consumed by the foreach. */ 2203 assert(inputSourcesNoTerminator.empty); 2204 2205 /* Using Yes.keepTerminator. */ 2206 readSourcesYesTerminator.clear; 2207 auto inputSourcesYesTerminator = byLineSourceRange!(Yes.keepTerminator)(inputFiles[0 .. numFiles]); 2208 assert(inputSourcesYesTerminator.length == numFiles); 2209 2210 foreach(fileNum, source; inputSourcesYesTerminator.enumerate) 2211 { 2212 readSourcesYesTerminator.put(source); 2213 assert(source.isOpen); 2214 assert(source._file.isOpen); 2215 assert(readSourcesYesTerminator.data[0 .. fileNum].all!(s => !s.isOpen)); 2216 assert(readSourcesYesTerminator.data[fileNum].isOpen); 2217 2218 assert(source.byLine.empty || source.byLine.front == fileHeaders[fileNum]); 2219 2220 assert(source.name == inputFiles[fileNum]); 2221 assert(!source.isStdin); 2222 2223 auto readFileData = appender!(char[]); 2224 foreach(line; source.byLine) 2225 { 2226 readFileData.put(line); 2227 } 2228 2229 assert(readFileData.data == fileData[fileNum]); 2230 } 2231 2232 /* The ByLineSourceRange is a reference range, consumed by the foreach. */ 2233 assert(inputSourcesYesTerminator.empty); 2234 } 2235 2236 /* Empty filelist. */ 2237 string[] nofiles; 2238 { 2239 auto sources = byLineSourceRange!(No.keepTerminator)(nofiles); 2240 assert(sources.empty); 2241 } 2242 { 2243 auto sources = byLineSourceRange!(Yes.keepTerminator)(nofiles); 2244 assert(sources.empty); 2245 } 2246 2247 /* Error cases. */ 2248 assertThrown(byLineSourceRange!(No.keepTerminator)([file0, "no_such_file.txt"]).each); 2249 assertThrown(byLineSourceRange!(Yes.keepTerminator)(["no_such_file.txt", file1]).each); 2250 } 2251 2252 /** Defines the 'bufferable' input sources supported by inputSourceByChunk. 2253 * 2254 * This includes std.stdio.File objects and mutable dynamic ubyte arrays. Or, input 2255 * ranges with ubyte elements. 2256 * 2257 * Static, const, and immutable arrays can be sliced to turn them into input ranges. 2258 * 2259 * Note: The element types could easily be generalized much further if that were useful. 2260 * At present, the primary purpose of inputSourceByChunk is to have a range representing 2261 * a buffered file that can also take ubyte arrays as sources for unit testing. 2262 */ 2263 enum bool isBufferableInputSource(R) = 2264 isFileHandle!(Unqual!R) || 2265 (isInputRange!R && is(Unqual!(ElementEncodingType!R) == ubyte) 2266 ); 2267 2268 @safe unittest 2269 { 2270 import std.stdio : stdin; 2271 2272 static assert(isBufferableInputSource!(File)); 2273 static assert(isBufferableInputSource!(typeof(stdin))); 2274 static assert(isBufferableInputSource!(ubyte[])); 2275 static assert(!isBufferableInputSource!(char[])); 2276 static assert(!isBufferableInputSource!(string)); 2277 2278 ubyte[10] staticArray; 2279 const ubyte[1] staticConstArray; 2280 immutable ubyte[1] staticImmutableArray; 2281 const(ubyte)[1] staticArrayConstElts; 2282 immutable(ubyte)[1] staticArrayImmutableElts; 2283 2284 ubyte[] dynamicArray = new ubyte[](10); 2285 const(ubyte)[] dynamicArrayConstElts = new ubyte[](10); 2286 immutable(ubyte)[] dynamicArrayImmutableElts = new ubyte[](10); 2287 const ubyte[] dynamicConstArray = new ubyte[](10); 2288 immutable ubyte[] dynamicImmutableArray = new ubyte[](10); 2289 2290 /* Dynamic mutable arrays are bufferable. */ 2291 static assert(!isBufferableInputSource!(typeof(staticArray))); 2292 static assert(!isBufferableInputSource!(typeof(staticArrayConstElts))); 2293 static assert(!isBufferableInputSource!(typeof(staticArrayImmutableElts))); 2294 static assert(!isBufferableInputSource!(typeof(staticConstArray))); 2295 static assert(!isBufferableInputSource!(typeof(staticImmutableArray))); 2296 2297 static assert(isBufferableInputSource!(typeof(dynamicArray))); 2298 static assert(isBufferableInputSource!(typeof(dynamicArrayConstElts))); 2299 static assert(isBufferableInputSource!(typeof(dynamicArrayImmutableElts))); 2300 static assert(!isBufferableInputSource!(typeof(dynamicConstArray))); 2301 static assert(!isBufferableInputSource!(typeof(dynamicImmutableArray))); 2302 2303 /* Slicing turns all forms into bufferable arrays. */ 2304 static assert(isBufferableInputSource!(typeof(staticArray[]))); 2305 static assert(isBufferableInputSource!(typeof(staticArrayConstElts[]))); 2306 static assert(isBufferableInputSource!(typeof(staticArrayImmutableElts[]))); 2307 static assert(isBufferableInputSource!(typeof(staticConstArray[]))); 2308 static assert(isBufferableInputSource!(typeof(staticImmutableArray[]))); 2309 2310 static assert(isBufferableInputSource!(typeof(dynamicConstArray[]))); 2311 static assert(isBufferableInputSource!(typeof(dynamicImmutableArray[]))); 2312 static assert(isBufferableInputSource!(typeof(dynamicArray[]))); 2313 static assert(isBufferableInputSource!(typeof(dynamicArrayConstElts[]))); 2314 static assert(isBufferableInputSource!(typeof(dynamicArrayImmutableElts[]))); 2315 2316 /* Element type tests. */ 2317 static assert(is(Unqual!(ElementType!(typeof(staticArray))) == ubyte)); 2318 static assert(is(Unqual!(ElementType!(typeof(staticArrayConstElts))) == ubyte)); 2319 static assert(is(Unqual!(ElementType!(typeof(staticArrayImmutableElts))) == ubyte)); 2320 static assert(is(Unqual!(ElementType!(typeof(staticConstArray))) == ubyte)); 2321 static assert(is(Unqual!(ElementType!(typeof(staticImmutableArray))) == ubyte)); 2322 static assert(is(Unqual!(ElementType!(typeof(dynamicArray))) == ubyte)); 2323 static assert(is(Unqual!(ElementType!(typeof(dynamicArrayConstElts))) == ubyte)); 2324 static assert(is(Unqual!(ElementType!(typeof(dynamicArrayImmutableElts))) == ubyte)); 2325 static assert(is(Unqual!(ElementType!(typeof(dynamicConstArray))) == ubyte)); 2326 static assert(is(Unqual!(ElementType!(typeof(dynamicImmutableArray))) == ubyte)); 2327 2328 struct S1 2329 { 2330 void popFront(); 2331 @property bool empty(); 2332 @property ubyte front(); 2333 } 2334 2335 struct S2 2336 { 2337 @property ubyte front(); 2338 void popFront(); 2339 @property bool empty(); 2340 @property auto save() { return this; } 2341 @property size_t length(); 2342 S2 opSlice(size_t, size_t); 2343 } 2344 2345 static assert(isInputRange!S1); 2346 static assert(isBufferableInputSource!S1); 2347 2348 static assert(isInputRange!S2); 2349 static assert(is(ElementEncodingType!S2 == ubyte)); 2350 static assert(hasSlicing!S2); 2351 static assert(isBufferableInputSource!S2); 2352 2353 /* For code coverage. */ 2354 S2 s2; 2355 auto x = s2.save; 2356 2357 auto repeatInt = 7.repeat!int(5); 2358 auto repeatUbyte = 7.repeat!ubyte(5); 2359 auto infiniteUbyte = 7.repeat!ubyte; 2360 2361 static assert(!isBufferableInputSource!(typeof(repeatInt))); 2362 static assert(isBufferableInputSource!(typeof(repeatUbyte))); 2363 static assert(isBufferableInputSource!(typeof(infiniteUbyte))); 2364 } 2365 2366 /** inputSourceByChunk returns a range that reads either a file handle (File) or a 2367 * ubyte[] array a chunk at a time. 2368 * 2369 * This is a cover for File.byChunk that allows passing an in-memory array or input 2370 * range as well. At present the motivation is primarily to enable unit testing of 2371 * chunk-based algorithms using in-memory strings. 2372 * 2373 * inputSourceByChunk takes either a File open for reading or an input range with 2374 * ubyte elements. Data is read a buffer at a time. The buffer can be user provided, 2375 * or allocated by inputSourceByChunk based on a caller provided buffer size. 2376 * 2377 * The primary motivation for supporting both files and input ranges as sources is to 2378 * enable unit testing of buffer based algorithms using in-memory arrays. Dynamic, 2379 * mutable arras are fine. Use slicing to turn a static, const, or immutable arrays 2380 * into an input range. 2381 * 2382 * The chunks are returned as an input range. 2383 */ 2384 auto inputSourceByChunk(InputSource)(InputSource source, size_t size) 2385 { 2386 return inputSourceByChunk(source, new ubyte[](size)); 2387 } 2388 2389 /// Ditto 2390 auto inputSourceByChunk(InputSource)(InputSource source, ubyte[] buffer) 2391 if (isBufferableInputSource!InputSource) 2392 { 2393 static if (isFileHandle!(Unqual!InputSource)) 2394 { 2395 return source.byChunk(buffer); 2396 } 2397 else 2398 { 2399 static struct BufferedChunk 2400 { 2401 private Chunks!InputSource _chunks; 2402 private ubyte[] _buffer; 2403 2404 private void readNextChunk() 2405 { 2406 if (_chunks.empty) 2407 { 2408 _buffer.length = 0; 2409 } 2410 else 2411 { 2412 import std.algorithm : copy; 2413 auto remainingBuffer = _chunks.front.take(_buffer.length).copy(_buffer); 2414 _chunks.popFront; 2415 2416 /* Only the last chunk should be shorter than the buffer. */ 2417 assert(remainingBuffer.length == 0 || _chunks.empty); 2418 2419 _buffer.length -= remainingBuffer.length; 2420 } 2421 } 2422 2423 this(InputSource source, ubyte[] buffer) 2424 { 2425 import std.exception : enforce; 2426 enforce(buffer.length > 0, "buffer size must be larger than 0"); 2427 _chunks = source.chunks(buffer.length); 2428 _buffer = buffer; 2429 readNextChunk(); 2430 } 2431 2432 @property bool empty() 2433 { 2434 return (_buffer.length == 0); 2435 } 2436 2437 @property ubyte[] front() 2438 { 2439 assert(!empty, "Attempting to fetch the front of an empty inputSourceByChunks"); 2440 return _buffer; 2441 } 2442 2443 void popFront() 2444 { 2445 assert(!empty, "Attempting to popFront an empty inputSourceByChunks"); 2446 readNextChunk(); 2447 } 2448 } 2449 2450 return BufferedChunk(source, buffer); 2451 } 2452 } 2453 2454 unittest // inputSourceByChunk 2455 { 2456 import tsv_utils.common.unittest_utils; // tsv-utils unit test helpers 2457 import std.file : mkdir, rmdirRecurse; 2458 import std.path : buildPath; 2459 2460 auto testDir = makeUnittestTempDir("tsv_utils_inputSourceByChunk"); 2461 scope(exit) testDir.rmdirRecurse; 2462 2463 import std.algorithm : equal, joiner; 2464 import std.format; 2465 import std..string : representation; 2466 2467 auto charData = "abcde,ßÀß,あめりか物語,012345"; 2468 ubyte[] ubyteData = charData.dup.representation; 2469 2470 ubyte[1024] rawBuffer; // Must be larger than largest bufferSize in tests. 2471 2472 void writeFileData(string filePath, ubyte[] data) 2473 { 2474 import std.stdio; 2475 2476 auto f = filePath.File("wb"); 2477 f.rawWrite(data); 2478 f.close; 2479 } 2480 2481 foreach (size_t dataSize; 0 .. ubyteData.length) 2482 { 2483 auto data = ubyteData[0 .. dataSize]; 2484 auto filePath = buildPath(testDir, format("data_%d.txt", dataSize)); 2485 writeFileData(filePath, data); 2486 2487 foreach (size_t bufferSize; 1 .. dataSize + 2) 2488 { 2489 assert(data.inputSourceByChunk(bufferSize).joiner.equal(data), 2490 format("[Test-A] dataSize: %d, bufferSize: %d", dataSize, bufferSize)); 2491 2492 assert (rawBuffer.length >= bufferSize); 2493 2494 ubyte[] buffer = rawBuffer[0 .. bufferSize]; 2495 assert(data.inputSourceByChunk(buffer).joiner.equal(data), 2496 format("[Test-B] dataSize: %d, bufferSize: %d", dataSize, bufferSize)); 2497 2498 { 2499 auto inputStream = filePath.File; 2500 assert(inputStream.inputSourceByChunk(bufferSize).joiner.equal(data), 2501 format("[Test-C] dataSize: %d, bufferSize: %d", dataSize, bufferSize)); 2502 inputStream.close; 2503 } 2504 2505 { 2506 auto inputStream = filePath.File; 2507 assert(inputStream.inputSourceByChunk(buffer).joiner.equal(data), 2508 format("[Test-D] dataSize: %d, bufferSize: %d", dataSize, bufferSize)); 2509 inputStream.close; 2510 } 2511 } 2512 } 2513 } 2514 2515 @safe unittest // inputSourceByChunk array cases 2516 { 2517 import std.algorithm : equal; 2518 2519 ubyte[5] staticArray = [5, 6, 7, 8, 9]; 2520 const(ubyte)[5] staticArrayConstElts = [5, 6, 7, 8, 9]; 2521 immutable(ubyte)[5] staticArrayImmutableElts = [5, 6, 7, 8, 9]; 2522 const ubyte[5] staticConstArray = [5, 6, 7, 8, 9]; 2523 immutable ubyte[5] staticImmutableArray = [5, 6, 7, 8, 9]; 2524 2525 ubyte[] dynamicArray = [5, 6, 7, 8, 9]; 2526 const(ubyte)[] dynamicArrayConstElts = [5, 6, 7, 8, 9]; 2527 immutable(ubyte)[] dynamicArrayImmutableElts = [5, 6, 7, 8, 9]; 2528 const ubyte[] dynamicConstArray = [5, 6, 7, 8, 9]; 2529 immutable ubyte[] dynamicImmutableArray = [5, 6, 7, 8, 9]; 2530 2531 /* The dynamic mutable arrays can be used directly. */ 2532 assert (dynamicArray.inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]])); 2533 assert (dynamicArrayConstElts.inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]])); 2534 assert (dynamicArrayImmutableElts.inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]])); 2535 2536 /* All the arrays can be used with slicing. */ 2537 assert (staticArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]])); 2538 assert (staticArrayConstElts[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]])); 2539 assert (staticArrayImmutableElts[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]])); 2540 assert (staticConstArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]])); 2541 assert (staticImmutableArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]])); 2542 assert (dynamicArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]])); 2543 assert (dynamicArrayConstElts[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]])); 2544 assert (dynamicArrayImmutableElts[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]])); 2545 assert (dynamicConstArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]])); 2546 assert (dynamicImmutableArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]])); 2547 } 2548 2549 @safe unittest // inputSourceByChunk input ranges 2550 { 2551 import std.algorithm : equal; 2552 2553 assert (7.repeat!ubyte(5).inputSourceByChunk(1).equal([[7], [7], [7], [7], [7]])); 2554 assert (7.repeat!ubyte(5).inputSourceByChunk(2).equal([[7, 7], [7, 7], [7]])); 2555 assert (7.repeat!ubyte(5).inputSourceByChunk(3).equal([[7, 7, 7], [7, 7]])); 2556 assert (7.repeat!ubyte(5).inputSourceByChunk(4).equal([[7, 7, 7, 7], [7]])); 2557 assert (7.repeat!ubyte(5).inputSourceByChunk(5).equal([[7, 7, 7, 7, 7]])); 2558 assert (7.repeat!ubyte(5).inputSourceByChunk(6).equal([[7, 7, 7, 7, 7]])); 2559 2560 /* Infinite. */ 2561 assert (7.repeat!ubyte.inputSourceByChunk(2).take(3).equal([[7, 7], [7, 7], [7, 7]])); 2562 }