tsv_utils.common.utils source code

1 /**
2 Utilities used by tsv-utils applications. InputFieldReordering, BufferedOutputRange,
3 and a several others.
4 
5 Utilities in this file:
6 $(LIST
7     * [InputFieldReordering] - A class that creates a reordered subset of fields from
8       an input line. Fields in the subset are accessed by array indicies. This is
9       especially useful when processing the subset in a specific order, such as the
10       order listed on the command-line at run-time.
11 
12     * [BufferedOutputRange] - An OutputRange with an internal buffer used to buffer
13       output. Intended for use with stdout, it is a significant performance benefit.
14 
15     * [isFlushableOutputRange] - Tests if something is an OutputRange with a flush
16       member.
17 
18     * [bufferedByLine] - An input range that reads from a File handle line by line.
19       It is similar to the standard library method std.stdio.File.byLine, but quite a
20       bit faster. This is achieved by reading in larger blocks and buffering.
21 
22     * [InputSourceRange] - An input range that provides open file access to a set of
23       files. It is used to iterate over files passed as command line arguments. This
24       enable reading header line of a file during command line argument process, then
25       passing the open file to the main processing functions.
26 
27     * [ByLineSourceRange] - Similar to an InputSourceRange, except that it provides
28       access to a byLine iterator (bufferedByLine) rather than an open file. This is
29       used by tools that run the same processing logic both header non-header lines.
30 
31     * [isBufferableInputSource] - Tests if a file or input range can be read in a
32       buffered fashion by inputSourceByChunk.
33 
34     * [inputSourceByChunk] - Returns a range that reads from a file handle (File) or
35       a ubyte input range a chunk at a time.
36 
37     * [joinAppend] - A function that performs a join, but appending the join output to
38       an output stream. It is a performance improvement over using join or joiner with
39       writeln.
40 
41     * [getTsvFieldValue] - A convenience function when only a single value is needed
42       from an input line.
43 
44     * [throwIfWindowsNewline] - A utility for detecting Windows newlines in input.
45 )
46 
47 Copyright (c) 2015-2020, eBay Inc.
48 Initially written by Jon Degenhardt
49 
50 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt)
51 */
52 
53 module tsv_utils.common.utils;
54 
55 import std.range;
56 import std.stdio : File, isFileHandle, KeepTerminator;
57 import std.traits : isIntegral, isSomeChar, isSomeString, isUnsigned, ReturnType, Unqual;
58 import std.typecons : Flag, No, Yes;
59 
60 // InputFieldReording class.
61 
62 /** Flag used by the InputFieldReordering template. */
63 alias EnablePartialLines = Flag!"enablePartialLines";
64 
65 /**
66 InputFieldReordering - Move select fields from an input line to an output array,
67 reordering along the way.
68 
69 The InputFieldReordering class is used to reorder a subset of fields from an input line.
70 The caller instantiates an InputFieldReordering object at the start of input processing.
71 The instance contains a mapping from input index to output index, plus a buffer holding
72 the reordered fields. The caller processes each input line by calling initNewLine,
73 splitting the line into fields, and calling processNextField on each field. The output
74 buffer is ready when the allFieldsFilled method returns true.
75 
76 Fields are not copied, instead the output buffer points to the fields passed by the caller.
77 The caller needs to use or copy the output buffer while the fields are still valid, which
78 is normally until reading the next input line. The program below illustrates the basic use
79 case. It reads stdin and outputs fields [3, 0, 2], in that order. (See also joinAppend,
80 below, which has a performance improvement over join used here.)
81 
82 ---
83 int main(string[] args)
84 {
85     import tsv_utils.common.utils;
86     import std.algorithm, std.array, std.range, std.stdio;
87     size_t[] fieldIndicies = [3, 0, 2];
88     auto fieldReordering = new InputFieldReordering!char(fieldIndicies);
89     foreach (line; stdin.byLine)
90     {
91         fieldReordering.initNewLine;
92         foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate)
93         {
94             fieldReordering.processNextField(fieldIndex, fieldValue);
95             if (fieldReordering.allFieldsFilled) break;
96         }
97         if (fieldReordering.allFieldsFilled)
98         {
99             writeln(fieldReordering.outputFields.join('\t'));
100         }
101         else
102         {
103             writeln("Error: Insufficient number of field on the line.");
104         }
105     }
106     return 0;
107 }
108 ---
109 
110 Field indicies are zero-based. An individual field can be listed multiple times. The
111 outputFields array is not valid until all the specified fields have been processed. The
112 allFieldsFilled method tests this. If a line does not have enough fields the outputFields
113 buffer cannot be used. For most TSV applications this is okay, as it means the line is
114 invalid and cannot be used. However, if partial lines are okay, the template can be
115 instantiated with EnablePartialLines.yes. This will ensure that any fields not filled-in
116 are empty strings in the outputFields return.
117 */
118 final class InputFieldReordering(C, EnablePartialLines partialLinesOk = EnablePartialLines.no)
119 if (isSomeChar!C)
120 {
121     /* Implementation: The class works by creating an array of tuples mapping the input
122      * field index to the location in the outputFields array. The 'fromToMap' array is
123      * sorted in input field order, enabling placement in the outputFields buffer during a
124      * pass over the input fields. The map is created by the constructor. An example:
125      *
126      *    inputFieldIndicies: [3, 0, 7, 7, 1, 0, 9]
127      *             fromToMap: [<0,1>, <0,5>, <1,4>, <3,0>, <7,2>, <7,3>, <9,6>]
128      *
129      * During processing of an a line, an array slice, mapStack, is used to track how
130      * much of the fromToMap remains to be processed.
131      */
132     import std.typecons : Tuple;
133 
134     alias TupleFromTo = Tuple!(size_t, "from", size_t, "to");
135 
136     private C[][] outputFieldsBuf;
137     private TupleFromTo[] fromToMap;
138     private TupleFromTo[] mapStack;
139 
140     final this(const ref size_t[] inputFieldIndicies, size_t start = 0) pure nothrow @safe
141     {
142         import std.algorithm : sort;
143 
144         outputFieldsBuf = new C[][](inputFieldIndicies.length);
145         fromToMap.reserve(inputFieldIndicies.length);
146 
147         foreach (to, from; inputFieldIndicies.enumerate(start))
148         {
149             fromToMap ~= TupleFromTo(from, to);
150         }
151 
152         sort(fromToMap);
153         initNewLine;
154     }
155 
156     /** initNewLine initializes the object for a new line. */
157     final void initNewLine() pure nothrow @safe
158     {
159         mapStack = fromToMap;
160         static if (partialLinesOk)
161         {
162             import std.algorithm : each;
163             outputFieldsBuf.each!((ref s) => s.length = 0);
164         }
165     }
166 
167     /** processNextField maps an input field to the correct locations in the
168      * outputFields array.
169      *
170      * processNextField should be called once for each field on the line, in the order
171      * found. The processing of the line can terminate once allFieldsFilled returns
172      * true.
173      *
174      * The return value is the number of output fields the input field maps to. Zero
175      * means the field is not mapped to the output fields array.
176      *
177      * If, prior to allFieldsProcessed returning true, any fields on the input line
178      * are not passed to processNextField, the caller should either ensure the fields
179      * are not part of the output fields or have partial lines enabled.
180      */
181     final size_t processNextField(size_t fieldIndex, C[] fieldValue) pure nothrow @safe @nogc
182     {
183         size_t numFilled = 0;
184         while (!mapStack.empty && fieldIndex == mapStack.front.from)
185         {
186             outputFieldsBuf[mapStack.front.to] = fieldValue;
187             mapStack.popFront;
188             numFilled++;
189         }
190         return numFilled;
191     }
192 
193     /** allFieldsFilled returned true if all fields expected have been processed. */
194     final bool allFieldsFilled() const pure nothrow @safe @nogc
195     {
196         return mapStack.empty;
197     }
198 
199     /** outputFields is the assembled output fields. Unless partial lines are enabled,
200      * it is only valid after allFieldsFilled is true.
201      */
202     final C[][] outputFields() pure nothrow @safe @nogc
203     {
204         return outputFieldsBuf[];
205     }
206 }
207 
208 // InputFieldReordering - Tests using different character types.
209 @safe unittest
210 {
211     import std.conv : to;
212 
213     auto inputLines = [["r1f0", "r1f1", "r1f2",   "r1f3"],
214                        ["r2f0", "abc",  "ÀBCßßZ", "ghi"],
215                        ["r3f0", "123",  "456",    "789"]];
216 
217     size_t[] fields_2_0 = [2, 0];
218 
219     auto expected_2_0 = [["r1f2",   "r1f0"],
220                          ["ÀBCßßZ", "r2f0"],
221                          ["456",    "r3f0"]];
222 
223     char[][][]  charExpected_2_0 = to!(char[][][])(expected_2_0);
224     wchar[][][] wcharExpected_2_0 = to!(wchar[][][])(expected_2_0);
225     dchar[][][] dcharExpected_2_0 = to!(dchar[][][])(expected_2_0);
226     dstring[][] dstringExpected_2_0 = to!(dstring[][])(expected_2_0);
227 
228     auto charIFR  = new InputFieldReordering!char(fields_2_0);
229     auto wcharIFR = new InputFieldReordering!wchar(fields_2_0);
230     auto dcharIFR = new InputFieldReordering!dchar(fields_2_0);
231 
232     foreach (lineIndex, line; inputLines)
233     {
234         charIFR.initNewLine;
235         wcharIFR.initNewLine;
236         dcharIFR.initNewLine;
237 
238         foreach (fieldIndex, fieldValue; line)
239         {
240             charIFR.processNextField(fieldIndex, to!(char[])(fieldValue));
241             wcharIFR.processNextField(fieldIndex, to!(wchar[])(fieldValue));
242             dcharIFR.processNextField(fieldIndex, to!(dchar[])(fieldValue));
243 
244             assert ((fieldIndex >= 2) == charIFR.allFieldsFilled);
245             assert ((fieldIndex >= 2) == wcharIFR.allFieldsFilled);
246             assert ((fieldIndex >= 2) == dcharIFR.allFieldsFilled);
247         }
248         assert(charIFR.allFieldsFilled);
249         assert(wcharIFR.allFieldsFilled);
250         assert(dcharIFR.allFieldsFilled);
251 
252         assert(charIFR.outputFields == charExpected_2_0[lineIndex]);
253         assert(wcharIFR.outputFields == wcharExpected_2_0[lineIndex]);
254         assert(dcharIFR.outputFields == dcharExpected_2_0[lineIndex]);
255     }
256 }
257 
258 // InputFieldReordering - Test of partial line support.
259 @safe unittest
260 {
261     import std.conv : to;
262 
263     auto inputLines = [["r1f0", "r1f1", "r1f2",   "r1f3"],
264                        ["r2f0", "abc",  "ÀBCßßZ", "ghi"],
265                        ["r3f0", "123",  "456",    "789"]];
266 
267     size_t[] fields_2_0 = [2, 0];
268 
269     // The expected states of the output field while each line and field are processed.
270     auto expectedBylineByfield_2_0 =
271         [
272             [["", "r1f0"], ["", "r1f0"], ["r1f2", "r1f0"],   ["r1f2", "r1f0"]],
273             [["", "r2f0"], ["", "r2f0"], ["ÀBCßßZ", "r2f0"], ["ÀBCßßZ", "r2f0"]],
274             [["", "r3f0"], ["", "r3f0"], ["456", "r3f0"],    ["456", "r3f0"]],
275         ];
276 
277     char[][][][]  charExpectedBylineByfield_2_0 = to!(char[][][][])(expectedBylineByfield_2_0);
278 
279     auto charIFR  = new InputFieldReordering!(char, EnablePartialLines.yes)(fields_2_0);
280 
281     foreach (lineIndex, line; inputLines)
282     {
283         charIFR.initNewLine;
284         foreach (fieldIndex, fieldValue; line)
285         {
286             charIFR.processNextField(fieldIndex, to!(char[])(fieldValue));
287             assert(charIFR.outputFields == charExpectedBylineByfield_2_0[lineIndex][fieldIndex]);
288         }
289     }
290 }
291 
292 // InputFieldReordering - Field combination tests.
293 @safe unittest
294 {
295     import std.conv : to;
296     import std.stdio;
297 
298     auto inputLines = [["00", "01", "02", "03"],
299                        ["10", "11", "12", "13"],
300                        ["20", "21", "22", "23"]];
301 
302     size_t[] fields_0 = [0];
303     size_t[] fields_3 = [3];
304     size_t[] fields_01 = [0, 1];
305     size_t[] fields_10 = [1, 0];
306     size_t[] fields_03 = [0, 3];
307     size_t[] fields_30 = [3, 0];
308     size_t[] fields_0123 = [0, 1, 2, 3];
309     size_t[] fields_3210 = [3, 2, 1, 0];
310     size_t[] fields_03001 = [0, 3, 0, 0, 1];
311 
312     auto expected_0 = to!(char[][][])([["00"],
313                                        ["10"],
314                                        ["20"]]);
315 
316     auto expected_3 = to!(char[][][])([["03"],
317                                        ["13"],
318                                        ["23"]]);
319 
320     auto expected_01 = to!(char[][][])([["00", "01"],
321                                         ["10", "11"],
322                                         ["20", "21"]]);
323 
324     auto expected_10 = to!(char[][][])([["01", "00"],
325                                         ["11", "10"],
326                                         ["21", "20"]]);
327 
328     auto expected_03 = to!(char[][][])([["00", "03"],
329                                         ["10", "13"],
330                                         ["20", "23"]]);
331 
332     auto expected_30 = to!(char[][][])([["03", "00"],
333                                         ["13", "10"],
334                                         ["23", "20"]]);
335 
336     auto expected_0123 = to!(char[][][])([["00", "01", "02", "03"],
337                                           ["10", "11", "12", "13"],
338                                           ["20", "21", "22", "23"]]);
339 
340     auto expected_3210 = to!(char[][][])([["03", "02", "01", "00"],
341                                           ["13", "12", "11", "10"],
342                                           ["23", "22", "21", "20"]]);
343 
344     auto expected_03001 = to!(char[][][])([["00", "03", "00", "00", "01"],
345                                            ["10", "13", "10", "10", "11"],
346                                            ["20", "23", "20", "20", "21"]]);
347 
348     auto ifr_0 = new InputFieldReordering!char(fields_0);
349     auto ifr_3 = new InputFieldReordering!char(fields_3);
350     auto ifr_01 = new InputFieldReordering!char(fields_01);
351     auto ifr_10 = new InputFieldReordering!char(fields_10);
352     auto ifr_03 = new InputFieldReordering!char(fields_03);
353     auto ifr_30 = new InputFieldReordering!char(fields_30);
354     auto ifr_0123 = new InputFieldReordering!char(fields_0123);
355     auto ifr_3210 = new InputFieldReordering!char(fields_3210);
356     auto ifr_03001 = new InputFieldReordering!char(fields_03001);
357 
358     foreach (lineIndex, line; inputLines)
359     {
360         ifr_0.initNewLine;
361         ifr_3.initNewLine;
362         ifr_01.initNewLine;
363         ifr_10.initNewLine;
364         ifr_03.initNewLine;
365         ifr_30.initNewLine;
366         ifr_0123.initNewLine;
367         ifr_3210.initNewLine;
368         ifr_03001.initNewLine;
369 
370         foreach (fieldIndex, fieldValue; line)
371         {
372             ifr_0.processNextField(fieldIndex, to!(char[])(fieldValue));
373             ifr_3.processNextField(fieldIndex, to!(char[])(fieldValue));
374             ifr_01.processNextField(fieldIndex, to!(char[])(fieldValue));
375             ifr_10.processNextField(fieldIndex, to!(char[])(fieldValue));
376             ifr_03.processNextField(fieldIndex, to!(char[])(fieldValue));
377             ifr_30.processNextField(fieldIndex, to!(char[])(fieldValue));
378             ifr_0123.processNextField(fieldIndex, to!(char[])(fieldValue));
379             ifr_3210.processNextField(fieldIndex, to!(char[])(fieldValue));
380             ifr_03001.processNextField(fieldIndex, to!(char[])(fieldValue));
381         }
382 
383         assert(ifr_0.outputFields == expected_0[lineIndex]);
384         assert(ifr_3.outputFields == expected_3[lineIndex]);
385         assert(ifr_01.outputFields == expected_01[lineIndex]);
386         assert(ifr_10.outputFields == expected_10[lineIndex]);
387         assert(ifr_03.outputFields == expected_03[lineIndex]);
388         assert(ifr_30.outputFields == expected_30[lineIndex]);
389         assert(ifr_0123.outputFields == expected_0123[lineIndex]);
390         assert(ifr_3210.outputFields == expected_3210[lineIndex]);
391         assert(ifr_03001.outputFields == expected_03001[lineIndex]);
392     }
393 }
394 
395 /**
396 BufferedOutputRange is a performance enhancement over writing directly to an output
397 stream. It holds a File open for write or an OutputRange. Ouput is accumulated in an
398 internal buffer and written to the output stream as a block.
399 
400 Writing to stdout is a key use case. BufferedOutputRange is often dramatically faster
401 than writing to stdout directly. This is especially noticable for outputs with short
402 lines, as it blocks many writes together in a single write.
403 
404 The internal buffer is written to the output stream after flushSize has been reached.
405 This is checked at newline boundaries, when appendln is called or when put is called
406 with a single newline character. Other writes check maxSize, which is used to avoid
407 runaway buffers.
408 
409 BufferedOutputRange has a put method allowing it to be used a range. It has a number
410 of other methods providing additional control.
411 
412 $(LIST
413     * `this(outputStream [, flushSize, reserveSize, maxSize])` - Constructor. Takes the
414       output stream, e.g. stdout. Other arguments are optional, defaults normally suffice.
415 
416     * `append(stuff)` - Append to the internal buffer.
417 
418     * `appendln(stuff)` - Append to the internal buffer, followed by a newline. The buffer
419       is flushed to the output stream if is has reached flushSize.
420 
421     * `appendln()` - Append a newline to the internal buffer. The buffer is flushed to the
422       output stream if is has reached flushSize.
423 
424     * `joinAppend(inputRange, delim)` - An optimization of `append(inputRange.joiner(delim))`.
425       For reasons that are not clear, joiner is quite slow.
426 
427     * `flushIfFull()` - Flush the internal buffer to the output stream if flushSize has been
428       reached.
429 
430     * `flush()` - Write the internal buffer to the output stream.
431 
432     * `put(stuff)` - Appends to the internal buffer. Acts as `appendln()` if passed a single
433       newline character, '\n' or "\n".
434 )
435 
436 The internal buffer is automatically flushed when the BufferedOutputRange goes out of
437 scope.
438 */
439 struct BufferedOutputRange(OutputTarget)
440 if (isFileHandle!(Unqual!OutputTarget) || isOutputRange!(Unqual!OutputTarget, char))
441 {
442     import std.array : appender;
443     import std.format : format;
444 
445     /* Identify the output element type. Only supporting char and ubyte for now. */
446     static if (isFileHandle!OutputTarget || isOutputRange!(OutputTarget, char))
447     {
448         alias C = char;
449     }
450     else static if (isOutputRange!(OutputTarget, ubyte))
451     {
452         alias C = ubyte;
453     }
454     else static assert(false);
455 
456     private enum defaultReserveSize = 11264;
457     private enum defaultFlushSize = 10240;
458     private enum defaultMaxSize = 4194304;
459 
460     private OutputTarget _outputTarget;
461     private auto _outputBuffer = appender!(C[]);
462     private immutable size_t _flushSize;
463     private immutable size_t _maxSize;
464 
465     this(OutputTarget outputTarget,
466          size_t flushSize = defaultFlushSize,
467          size_t reserveSize = defaultReserveSize,
468          size_t maxSize = defaultMaxSize)
469     {
470         assert(flushSize <= maxSize);
471 
472         _outputTarget = outputTarget;
473         _flushSize = flushSize;
474         _maxSize = (flushSize <= maxSize) ? maxSize : flushSize;
475         _outputBuffer.reserve(reserveSize);
476     }
477 
478     ~this()
479     {
480         flush();
481     }
482 
483     void flush()
484     {
485         static if (isFileHandle!OutputTarget) _outputTarget.rawWrite(_outputBuffer.data);
486         else _outputTarget.put(_outputBuffer.data);
487 
488         _outputBuffer.clear;
489     }
490 
491     bool flushIfFull()
492     {
493         bool isFull = _outputBuffer.data.length >= _flushSize;
494         if (isFull) flush();
495         return isFull;
496     }
497 
498     /* flushIfMaxSize is a safety check to avoid runaway buffer growth. */
499     void flushIfMaxSize()
500     {
501         if (_outputBuffer.data.length >= _maxSize) flush();
502     }
503 
504     /* maybeFlush is intended for the case where put is called with a trailing newline.
505      *
506      * Flushing occurs if the buffer has a trailing newline and has reached flush size.
507      * Flushing also occurs if the buffer has reached max size.
508      */
509     private bool maybeFlush()
510     {
511         immutable bool doFlush =
512             _outputBuffer.data.length >= _flushSize &&
513             (_outputBuffer.data[$-1] == '\n' || _outputBuffer.data.length >= _maxSize);
514 
515         if (doFlush) flush();
516         return doFlush;
517     }
518 
519 
520     private void appendRaw(T)(T stuff) pure
521     {
522         import std.range : rangePut = put;
523         rangePut(_outputBuffer, stuff);
524     }
525 
526     void append(T)(T stuff)
527     {
528         appendRaw(stuff);
529         maybeFlush();
530     }
531 
532     bool appendln()
533     {
534         appendRaw('\n');
535         return flushIfFull();
536     }
537 
538     bool appendln(T)(T stuff)
539     {
540         appendRaw(stuff);
541         return appendln();
542     }
543 
544     /* joinAppend is an optimization of append(inputRange.joiner(delimiter).
545      * This form is quite a bit faster, 40%+ on some benchmarks.
546      */
547     void joinAppend(InputRange, E)(InputRange inputRange, E delimiter)
548     if (isInputRange!InputRange &&
549         is(ElementType!InputRange : const C[]) &&
550         (is(E : const C[]) || is(E : const C)))
551     {
552         if (!inputRange.empty)
553         {
554             appendRaw(inputRange.front);
555             inputRange.popFront;
556         }
557         foreach (x; inputRange)
558         {
559             appendRaw(delimiter);
560             appendRaw(x);
561         }
562         flushIfMaxSize();
563     }
564 
565     /* Make this an output range. */
566     void put(T)(T stuff)
567     {
568         import std.traits;
569         import std.stdio;
570 
571         static if (isSomeChar!T)
572         {
573             if (stuff == '\n') appendln();
574             else appendRaw(stuff);
575         }
576         else static if (isSomeString!T)
577         {
578             if (stuff == "\n") appendln();
579             else append(stuff);
580         }
581         else append(stuff);
582     }
583 }
584 
585 // BufferedOutputRange.
586 unittest
587 {
588     import tsv_utils.common.unittest_utils;
589     import std.file : rmdirRecurse, readText;
590     import std.path : buildPath;
591 
592     auto testDir = makeUnittestTempDir("tsv_utils_buffered_output");
593     scope(exit) testDir.rmdirRecurse;
594 
595     import std.algorithm : map, joiner;
596     import std.range : iota;
597     import std.conv : to;
598 
599     /* Basic test. Note that exiting the scope triggers flush. */
600     string filepath1 = buildPath(testDir, "file1.txt");
601     {
602         import std.stdio : File;
603 
604         auto ostream = BufferedOutputRange!File(filepath1.File("wb"));
605         ostream.append("file1: ");
606         ostream.append("abc");
607         ostream.append(["def", "ghi", "jkl"]);
608         ostream.appendln(100.to!string);
609         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
610         ostream.appendln();
611     }
612     assert(filepath1.readText == "file1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
613 
614     /* Test with no reserve and no flush at every line. */
615     string filepath2 = buildPath(testDir, "file2.txt");
616     {
617         import std.stdio : File;
618 
619         auto ostream = BufferedOutputRange!File(filepath2.File("wb"), 0, 0);
620         ostream.append("file2: ");
621         ostream.append("abc");
622         ostream.append(["def", "ghi", "jkl"]);
623         ostream.appendln("100");
624         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
625         ostream.appendln();
626     }
627     assert(filepath2.readText == "file2: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
628 
629     /* With a locking text writer. Requires version 2.078.0
630        See: https://issues.dlang.org/show_bug.cgi?id=9661
631      */
632     static if (__VERSION__ >= 2078)
633     {
634         string filepath3 = buildPath(testDir, "file3.txt");
635         {
636             import std.stdio : File;
637 
638             auto ltw = filepath3.File("wb").lockingTextWriter;
639             {
640                 auto ostream = BufferedOutputRange!(typeof(ltw))(ltw);
641                 ostream.append("file3: ");
642                 ostream.append("abc");
643                 ostream.append(["def", "ghi", "jkl"]);
644                 ostream.appendln("100");
645                 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
646                 ostream.appendln();
647             }
648         }
649         assert(filepath3.readText == "file3: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
650     }
651 
652     /* With an Appender. */
653     import std.array : appender;
654     auto app1 = appender!(char[]);
655     {
656         auto ostream = BufferedOutputRange!(typeof(app1))(app1);
657         ostream.append("appender1: ");
658         ostream.append("abc");
659         ostream.append(["def", "ghi", "jkl"]);
660         ostream.appendln("100");
661         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
662         ostream.appendln();
663     }
664     assert(app1.data == "appender1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
665 
666     /* With an Appender, but checking flush boundaries. */
667     auto app2 = appender!(char[]);
668     {
669         auto ostream = BufferedOutputRange!(typeof(app2))(app2, 10, 0); // Flush if 10+
670         bool wasFlushed = false;
671 
672         assert(app2.data == "");
673 
674         ostream.append("12345678"); // Not flushed yet.
675         assert(app2.data == "");
676 
677         wasFlushed = ostream.appendln;  // Nineth char, not flushed yet.
678         assert(!wasFlushed);
679         assert(app2.data == "");
680 
681         wasFlushed = ostream.appendln;  // Tenth char, now flushed.
682         assert(wasFlushed);
683         assert(app2.data == "12345678\n\n");
684 
685         app2.clear;
686         assert(app2.data == "");
687 
688         ostream.append("12345678");
689 
690         wasFlushed = ostream.flushIfFull;
691         assert(!wasFlushed);
692         assert(app2.data == "");
693 
694         ostream.flush;
695         assert(app2.data == "12345678");
696 
697         app2.clear;
698         assert(app2.data == "");
699 
700         ostream.append("123456789012345");
701         assert(app2.data == "");
702     }
703     assert(app2.data == "123456789012345");
704 
705     /* Using joinAppend. */
706     auto app1b = appender!(char[]);
707     {
708         auto ostream = BufferedOutputRange!(typeof(app1b))(app1b);
709         ostream.append("appenderB: ");
710         ostream.joinAppend(["a", "bc", "def"], '-');
711         ostream.append(':');
712         ostream.joinAppend(["g", "hi", "jkl"], '-');
713         ostream.appendln("*100*");
714         ostream.joinAppend(iota(0, 6).map!(x => x.to!string), ' ');
715         ostream.append(' ');
716         ostream.joinAppend(iota(6, 10).map!(x => x.to!string), " ");
717         ostream.appendln();
718     }
719     assert(app1b.data == "appenderB: a-bc-def:g-hi-jkl*100*\n0 1 2 3 4 5 6 7 8 9\n",
720            "app1b.data: |" ~app1b.data ~ "|");
721 
722     /* Operating as an output range. When passed to a function as a ref, exiting
723      * the function does not flush. When passed as a value, it get flushed when
724      * the function returns. Also test both UCFS and non-UFCS styles.
725      */
726 
727     void outputStuffAsRef(T)(ref T range)
728     if (isOutputRange!(T, char))
729     {
730         range.put('1');
731         put(range, "23");
732         range.put('\n');
733         range.put(["5", "67"]);
734         put(range, iota(8, 10).map!(x => x.to!string));
735         put(range, "\n");
736     }
737 
738     void outputStuffAsVal(T)(T range)
739     if (isOutputRange!(T, char))
740     {
741         put(range, '1');
742         range.put("23");
743         put(range, '\n');
744         put(range, ["5", "67"]);
745         range.put(iota(8, 10).map!(x => x.to!string));
746         range.put("\n");
747     }
748 
749     auto app3 = appender!(char[]);
750     {
751         auto ostream = BufferedOutputRange!(typeof(app3))(app3, 12, 0);
752         outputStuffAsRef(ostream);
753         assert(app3.data == "", "app3.data: |" ~app3.data ~ "|");
754         outputStuffAsRef(ostream);
755         assert(app3.data == "123\n56789\n123\n", "app3.data: |" ~app3.data ~ "|");
756     }
757     assert(app3.data == "123\n56789\n123\n56789\n", "app3.data: |" ~app3.data ~ "|");
758 
759     auto app4 = appender!(char[]);
760     {
761         auto ostream = BufferedOutputRange!(typeof(app4))(app4, 12, 0);
762         outputStuffAsVal(ostream);
763         assert(app4.data == "123\n56789\n", "app4.data: |" ~app4.data ~ "|");
764         outputStuffAsVal(ostream);
765         assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|");
766     }
767     assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|");
768 
769     /* Test maxSize. */
770     auto app5 = appender!(char[]);
771     {
772         auto ostream = BufferedOutputRange!(typeof(app5))(app5, 5, 0, 10); // maxSize 10
773         assert(app5.data == "");
774 
775         ostream.append("1234567");  // Not flushed yet (no newline).
776         assert(app5.data == "");
777 
778         ostream.append("89012");    // Flushed by maxSize
779         assert(app5.data == "123456789012");
780 
781         ostream.put("1234567");     // Not flushed yet (no newline).
782         assert(app5.data == "123456789012");
783 
784         ostream.put("89012");       // Flushed by maxSize
785         assert(app5.data == "123456789012123456789012");
786 
787         ostream.joinAppend(["ab", "cd"], '-');        // Not flushed yet
788         ostream.joinAppend(["de", "gh", "ij"], '-');  // Flushed by maxSize
789         assert(app5.data == "123456789012123456789012ab-cdde-gh-ij");
790     }
791     assert(app5.data == "123456789012123456789012ab-cdde-gh-ij");
792 }
793 
794 /**
795 isFlushableOutputRange returns true if R is an output range with a flush member.
796 */
797 enum bool isFlushableOutputRange(R, E=char) = isOutputRange!(R, E)
798     && is(ReturnType!((R r) => r.flush) == void);
799 
800 @safe unittest
801 {
802     import std.array;
803     auto app = appender!(char[]);
804     auto ostream = BufferedOutputRange!(typeof(app))(app, 5, 0, 10); // maxSize 10
805 
806     static assert(isOutputRange!(typeof(app), char));
807     static assert(!isFlushableOutputRange!(typeof(app), char));
808     static assert(!isFlushableOutputRange!(typeof(app)));
809 
810     static assert(isOutputRange!(typeof(ostream), char));
811     static assert(isFlushableOutputRange!(typeof(ostream), char));
812     static assert(isFlushableOutputRange!(typeof(ostream)));
813 
814     static assert(isOutputRange!(Appender!string, string));
815     static assert(!isFlushableOutputRange!(Appender!string, string));
816     static assert(!isFlushableOutputRange!(Appender!string));
817 
818     static assert(isOutputRange!(Appender!(char[]), char));
819     static assert(!isFlushableOutputRange!(Appender!(char[]), char));
820     static assert(!isFlushableOutputRange!(Appender!(char[])));
821 
822     static assert(isOutputRange!(BufferedOutputRange!(Appender!(char[])), char));
823     static assert(isFlushableOutputRange!(BufferedOutputRange!(Appender!(char[]))));
824     static assert(isFlushableOutputRange!(BufferedOutputRange!(Appender!(char[])), char));
825 }
826 
827 
828 /**
829 bufferedByLine is a performance enhancement over std.stdio.File.byLine. It works by
830 reading a large buffer from the input stream rather than just a single line.
831 
832 The file argument needs to be a File object open for reading, typically a filesystem
833 file or standard input. Use the Yes.keepTerminator template parameter to keep the
834 newline. This is similar to stdio.File.byLine, except specified as a template paramter
835 rather than a runtime parameter.
836 
837 Reading in blocks does mean that input is not read until a full buffer is available or
838 end-of-file is reached. For this reason, bufferedByLine is not appropriate for
839 interactive input.
840 */
841 
842 auto bufferedByLine(KeepTerminator keepTerminator = No.keepTerminator, Char = char,
843                     ubyte terminator = '\n', size_t readSize = 1024 * 128, size_t growSize = 1024 * 16)
844     (File file)
845 if (is(Char == char) || is(Char == ubyte))
846 {
847     static assert(0 < growSize && growSize <= readSize);
848 
849     static final class BufferedByLineImpl
850     {
851         /* Buffer state variables
852          *   - _buffer.length - Full length of allocated buffer.
853          *   - _dataEnd - End of currently valid data (end of last read).
854          *   - _lineStart - Start of current line.
855          *   - _lineEnd - End of current line.
856          */
857         private File _file;
858         private ubyte[] _buffer;
859         private size_t _lineStart = 0;
860         private size_t _lineEnd = 0;
861         private size_t _dataEnd = 0;
862 
863         this (File f)
864         {
865             _file = f;
866             _buffer = new ubyte[readSize + growSize];
867         }
868 
869         bool empty() const pure
870         {
871             return _file.eof && _lineStart == _dataEnd;
872         }
873 
874         Char[] front() pure
875         {
876             assert(!empty, "Attempt to take the front of an empty bufferedByLine.");
877 
878             static if (keepTerminator == Yes.keepTerminator)
879             {
880                 return cast(Char[]) _buffer[_lineStart .. _lineEnd];
881             }
882             else
883             {
884                 assert(_lineStart < _lineEnd);
885                 immutable end = (_buffer[_lineEnd - 1] == terminator) ? _lineEnd - 1 : _lineEnd;
886                 return cast(Char[]) _buffer[_lineStart .. end];
887             }
888         }
889 
890         /* Note: Call popFront at initialization to do the initial read. */
891         void popFront()
892         {
893             import std.algorithm: copy, find;
894             assert(!empty, "Attempt to popFront an empty bufferedByLine.");
895 
896             /* Pop the current line. */
897             _lineStart = _lineEnd;
898 
899             /* Set up the next line if more data is available, either in the buffer or
900              * the file. The next line ends at the next newline, if there is one.
901              *
902              * Notes:
903              * - 'find' returns the slice starting with the character searched for, or
904              *   an empty range if not found.
905              * - _lineEnd is set to _dataEnd both when the current buffer does not have
906              *   a newline and when it ends with one.
907              */
908             auto found = _buffer[_lineStart .. _dataEnd].find(terminator);
909             _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1;
910 
911             if (found.empty && !_file.eof)
912             {
913                 /* No newline in current buffer. Read from the file until the next
914                  * newline is found.
915                  */
916                 assert(_lineEnd == _dataEnd);
917 
918                 if (_lineStart > 0)
919                 {
920                     /* Move remaining data to the start of the buffer. */
921                     immutable remainingLength = _dataEnd - _lineStart;
922                     copy(_buffer[_lineStart .. _dataEnd], _buffer[0 .. remainingLength]);
923                     _lineStart = 0;
924                     _lineEnd = _dataEnd = remainingLength;
925                 }
926 
927                 do
928                 {
929                     /* Grow the buffer if necessary. */
930                     immutable availableSize = _buffer.length - _dataEnd;
931                     if (availableSize < readSize)
932                     {
933                         size_t growBy = growSize;
934                         while (availableSize + growBy < readSize) growBy += growSize;
935                         _buffer.length += growBy;
936                     }
937 
938                     /* Read the next block. */
939                     _dataEnd +=
940                         _file.rawRead(_buffer[_dataEnd .. _dataEnd + readSize])
941                         .length;
942 
943                     found = _buffer[_lineEnd .. _dataEnd].find(terminator);
944                     _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1;
945 
946                 } while (found.empty && !_file.eof);
947             }
948         }
949     }
950 
951     assert(file.isOpen, "bufferedByLine passed a closed file.");
952 
953     auto r = new BufferedByLineImpl(file);
954     if (!r.empty) r.popFront;
955     return r;
956 }
957 
958 // BufferedByLine.
959 unittest
960 {
961     import std.array : appender;
962     import std.conv : to;
963     import std.file : rmdirRecurse, readText;
964     import std.path : buildPath;
965     import std.range : lockstep;
966     import std.stdio;
967     import tsv_utils.common.unittest_utils;
968 
969     auto testDir = makeUnittestTempDir("tsv_utils_buffered_byline");
970     scope(exit) testDir.rmdirRecurse;
971 
972     /* Create two data files with the same data. Read both in parallel with byLine and
973      * bufferedByLine and compare each line.
974      */
975     auto data1 = appender!(char[])();
976 
977     foreach (i; 1 .. 1001) data1.put('\n');
978     foreach (i; 1 .. 1001) data1.put("a\n");
979     foreach (i; 1 .. 1001) { data1.put(i.to!string); data1.put('\n'); }
980     foreach (i; 1 .. 1001)
981     {
982         foreach (j; 1 .. i+1) data1.put('x');
983         data1.put('\n');
984     }
985 
986     string file1a = buildPath(testDir, "file1a.txt");
987     string file1b = buildPath(testDir, "file1b.txt");
988     {
989         auto f1aFH = file1a.File("wb");
990         f1aFH.write(data1.data);
991         f1aFH.close;
992 
993         auto f1bFH = file1b.File("wb");
994         f1bFH.write(data1.data);
995         f1bFH.close;
996     }
997 
998     /* Default parameters. */
999     {
1000         auto f1aFH = file1a.File();
1001         auto f1bFH = file1b.File();
1002         auto f1aIn = f1aFH.bufferedByLine!(No.keepTerminator);
1003         auto f1bIn = f1bFH.byLine(No.keepTerminator);
1004 
1005         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1006 
1007         f1aFH.close;
1008         f1bFH.close;
1009     }
1010     {
1011         auto f1aFH = file1a.File();
1012         auto f1bFH = file1b.File();
1013         auto f1aIn = f1aFH.bufferedByLine!(Yes.keepTerminator);
1014         auto f1bIn = f1bFH.byLine(Yes.keepTerminator);
1015 
1016         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1017 
1018         f1aFH.close;
1019         f1bFH.close;
1020     }
1021 
1022     /* Smaller read size. This will trigger buffer growth. */
1023     {
1024         auto f1aFH = file1a.File();
1025         auto f1bFH = file1b.File();
1026         auto f1aIn = f1aFH.bufferedByLine!(No.keepTerminator, char, '\n', 512, 256);
1027         auto f1bIn = f1bFH.byLine(No.keepTerminator);
1028 
1029         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1030 
1031         f1aFH.close;
1032         f1bFH.close;
1033     }
1034 
1035     /* Exercise boundary cases in buffer growth.
1036      * Note: static-foreach requires DMD 2.076 / LDC 1.6
1037      */
1038     static foreach (readSize; [1, 2, 4])
1039     {
1040         static foreach (growSize; 1 .. readSize + 1)
1041         {{
1042             auto f1aFH = file1a.File();
1043             auto f1bFH = file1b.File();
1044             auto f1aIn = f1aFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1045             auto f1bIn = f1bFH.byLine(No.keepTerminator);
1046 
1047             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1048 
1049             f1aFH.close;
1050             f1bFH.close;
1051         }}
1052         static foreach (growSize; 1 .. readSize + 1)
1053         {{
1054             auto f1aFH = file1a.File();
1055             auto f1bFH = file1b.File();
1056             auto f1aIn = f1aFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1057             auto f1bIn = f1bFH.byLine(Yes.keepTerminator);
1058 
1059             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1060 
1061             f1aFH.close;
1062             f1bFH.close;
1063         }}
1064     }
1065 
1066 
1067     /* Files that do not end in a newline. */
1068 
1069     string file2a = buildPath(testDir, "file2a.txt");
1070     string file2b = buildPath(testDir, "file2b.txt");
1071     string file3a = buildPath(testDir, "file3a.txt");
1072     string file3b = buildPath(testDir, "file3b.txt");
1073     string file4a = buildPath(testDir, "file4a.txt");
1074     string file4b = buildPath(testDir, "file4b.txt");
1075 
1076     {
1077         auto f1aFH = file1a.File("wb");
1078         f1aFH.write("a");
1079         f1aFH.close;
1080     }
1081     {
1082         auto f1bFH = file1b.File("wb");
1083         f1bFH.write("a");
1084         f1bFH.close;
1085     }
1086     {
1087         auto f2aFH = file2a.File("wb");
1088         f2aFH.write("ab");
1089         f2aFH.close;
1090     }
1091     {
1092         auto f2bFH = file2b.File("wb");
1093         f2bFH.write("ab");
1094         f2bFH.close;
1095     }
1096     {
1097         auto f3aFH = file3a.File("wb");
1098         f3aFH.write("abc");
1099         f3aFH.close;
1100     }
1101     {
1102         auto f3bFH = file3b.File("wb");
1103         f3bFH.write("abc");
1104         f3bFH.close;
1105     }
1106 
1107     static foreach (readSize; [1, 2, 4])
1108     {
1109         static foreach (growSize; 1 .. readSize + 1)
1110         {{
1111             auto f1aFH = file1a.File();
1112             auto f1bFH = file1b.File();
1113             auto f1aIn = f1aFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1114             auto f1bIn = f1bFH.byLine(No.keepTerminator);
1115 
1116             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1117 
1118             f1aFH.close;
1119             f1bFH.close;
1120 
1121             auto f2aFH = file2a.File();
1122             auto f2bFH = file2b.File();
1123             auto f2aIn = f2aFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1124             auto f2bIn = f2bFH.byLine(No.keepTerminator);
1125 
1126             foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1127 
1128             f2aFH.close;
1129             f2bFH.close;
1130 
1131             auto f3aFH = file3a.File();
1132             auto f3bFH = file3b.File();
1133             auto f3aIn = f3aFH.bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1134             auto f3bIn = f3bFH.byLine(No.keepTerminator);
1135 
1136             foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1137 
1138             f3aFH.close;
1139             f3bFH.close;
1140         }}
1141         static foreach (growSize; 1 .. readSize + 1)
1142         {{
1143             auto f1aFH = file1a.File();
1144             auto f1bFH = file1b.File();
1145             auto f1aIn = f1aFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1146             auto f1bIn = f1bFH.byLine(Yes.keepTerminator);
1147 
1148             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1149 
1150             f1aFH.close;
1151             f1bFH.close;
1152 
1153             auto f2aFH = file2a.File();
1154             auto f2bFH = file2b.File();
1155             auto f2aIn = f2aFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1156             auto f2bIn = f2bFH.byLine(Yes.keepTerminator);
1157 
1158             foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1159 
1160             f2aFH.close;
1161             f2bFH.close;
1162 
1163             auto f3aFH = file3a.File();
1164             auto f3bFH = file3b.File();
1165             auto f3aIn = f3aFH.bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1166             auto f3bIn = f3bFH.byLine(Yes.keepTerminator);
1167 
1168             foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1169 
1170             f3aFH.close;
1171             f3bFH.close;
1172         }}
1173     }
1174 }
1175 
1176 /**
1177 joinAppend performs a join operation on an input range, appending the results to
1178 an output range.
1179 
1180 joinAppend was written as a performance enhancement over using std.algorithm.joiner
1181 or std.array.join with writeln. Using joiner with writeln is quite slow, 3-4x slower
1182 than std.array.join with writeln. The joiner performance may be due to interaction
1183 with writeln, this was not investigated. Using joiner with stdout.lockingTextWriter
1184 is better, but still substantially slower than join. Using join works reasonably well,
1185 but is allocating memory unnecessarily.
1186 
1187 Using joinAppend with Appender is a bit faster than join, and allocates less memory.
1188 The Appender re-uses the underlying data buffer, saving memory. The example below
1189 illustrates. It is a modification of the InputFieldReordering example. The role
1190 Appender plus joinAppend are playing is to buffer the output. BufferedOutputRange
1191 uses a similar technique to buffer multiple lines.
1192 
1193 Note: The original uses joinAppend have been replaced by BufferedOutputRange, which has
1194 its own joinAppend method. However, joinAppend remains useful when constructing internal
1195 buffers where BufferedOutputRange is not appropriate.
1196 
1197 ---
1198 int main(string[] args)
1199 {
1200     import tsvutil;
1201     import std.algorithm, std.array, std.range, std.stdio;
1202     size_t[] fieldIndicies = [3, 0, 2];
1203     auto fieldReordering = new InputFieldReordering!char(fieldIndicies);
1204     auto outputBuffer = appender!(char[]);
1205     foreach (line; stdin.byLine)
1206     {
1207         fieldReordering.initNewLine;
1208         foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate)
1209         {
1210             fieldReordering.processNextField(fieldIndex, fieldValue);
1211             if (fieldReordering.allFieldsFilled) break;
1212         }
1213         if (fieldReordering.allFieldsFilled)
1214         {
1215             outputBuffer.clear;
1216             writeln(fieldReordering.outputFields.joinAppend(outputBuffer, ('\t')));
1217         }
1218         else
1219         {
1220             writeln("Error: Insufficient number of field on the line.");
1221         }
1222     }
1223     return 0;
1224 }
1225 ---
1226 */
1227 OutputRange joinAppend(InputRange, OutputRange, E)
1228     (InputRange inputRange, ref OutputRange outputRange, E delimiter)
1229 if (isInputRange!InputRange &&
1230     (is(ElementType!InputRange : const E[]) &&
1231      isOutputRange!(OutputRange, E[]))
1232      ||
1233     (is(ElementType!InputRange : const E) &&
1234      isOutputRange!(OutputRange, E))
1235     )
1236 {
1237     if (!inputRange.empty)
1238     {
1239         outputRange.put(inputRange.front);
1240         inputRange.popFront;
1241     }
1242     foreach (x; inputRange)
1243     {
1244         outputRange.put(delimiter);
1245         outputRange.put(x);
1246     }
1247     return outputRange;
1248 }
1249 
1250 // joinAppend.
1251 @safe unittest
1252 {
1253     import std.array : appender;
1254     import std.algorithm : equal;
1255 
1256     char[] c1 = ['a', 'b', 'c'];
1257     char[] c2 = ['d', 'e', 'f'];
1258     char[] c3 = ['g', 'h', 'i'];
1259     auto cvec = [c1, c2, c3];
1260 
1261     auto s1 = "abc";
1262     auto s2 = "def";
1263     auto s3 = "ghi";
1264     auto svec = [s1, s2, s3];
1265 
1266     auto charAppender = appender!(char[])();
1267 
1268     assert(cvec.joinAppend(charAppender, '_').data == "abc_def_ghi");
1269     assert(equal(cvec, [c1, c2, c3]));
1270 
1271     charAppender.put('$');
1272     assert(svec.joinAppend(charAppender, '|').data == "abc_def_ghi$abc|def|ghi");
1273     assert(equal(cvec, [s1, s2, s3]));
1274 
1275     charAppender.clear;
1276     assert(svec.joinAppend(charAppender, '|').data == "abc|def|ghi");
1277 
1278     auto intAppender = appender!(int[])();
1279 
1280     auto i1 = [100, 101, 102];
1281     auto i2 = [200, 201, 202];
1282     auto i3 = [300, 301, 302];
1283     auto ivec = [i1, i2, i3];
1284 
1285     assert(ivec.joinAppend(intAppender, 0).data ==
1286            [100, 101, 102, 0, 200, 201, 202, 0, 300, 301, 302]);
1287 
1288     intAppender.clear;
1289     assert(i1.joinAppend(intAppender, 0).data ==
1290            [100, 0, 101, 0, 102]);
1291     assert(i2.joinAppend(intAppender, 1).data ==
1292            [100, 0, 101, 0, 102,
1293             200, 1, 201, 1, 202]);
1294     assert(i3.joinAppend(intAppender, 2).data ==
1295            [100, 0, 101, 0, 102,
1296             200, 1, 201, 1, 202,
1297             300, 2, 301, 2, 302]);
1298 }
1299 
1300 /**
1301 getTsvFieldValue extracts the value of a single field from a delimited text string.
1302 
1303 This is a convenience function intended for cases when only a single field from an
1304 input line is needed. If multiple values are needed, it will be more efficient to
1305 work directly with std.algorithm.splitter or the InputFieldReordering class.
1306 
1307 The input text is split by a delimiter character. The specified field is converted
1308 to the desired type and the value returned.
1309 
1310 An exception is thrown if there are not enough fields on the line or if conversion
1311 fails. Conversion is done with std.conv.to, it throws a std.conv.ConvException on
1312 failure. If not enough fields, the exception text is generated referencing 1-upped
1313 field numbers as would be provided by command line users.
1314  */
1315 T getTsvFieldValue(T, C)(const C[] line, size_t fieldIndex, C delim)
1316 if (isSomeChar!C)
1317 {
1318     import std.algorithm : splitter;
1319     import std.conv : to;
1320     import std.format : format;
1321     import std.range;
1322 
1323     auto splitLine = line.splitter(delim);
1324     size_t atField = 0;
1325 
1326     while (atField < fieldIndex && !splitLine.empty)
1327     {
1328         splitLine.popFront;
1329         atField++;
1330     }
1331 
1332     T val;
1333     if (splitLine.empty)
1334     {
1335         if (fieldIndex == 0)
1336         {
1337             /* This is a workaround to a splitter special case - If the input is empty,
1338              * the returned split range is empty. This doesn't properly represent a single
1339              * column file. More correct mathematically, and for this case, would be a
1340              * single value representing an empty string. The input line is a convenient
1341              * source of an empty line. Info:
1342              *   Bug: https://issues.dlang.org/show_bug.cgi?id=15735
1343              *   Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030
1344              */
1345             assert(line.empty);
1346             val = line.to!T;
1347         }
1348         else
1349         {
1350             throw new Exception(
1351                 format("Not enough fields on line. Number required: %d; Number found: %d",
1352                        fieldIndex + 1, atField));
1353         }
1354     }
1355     else
1356     {
1357         val = splitLine.front.to!T;
1358     }
1359 
1360     return val;
1361 }
1362 
1363 // getTsvFieldValue.
1364 @safe unittest
1365 {
1366     import std.conv : ConvException, to;
1367     import std.exception;
1368 
1369     /* Common cases. */
1370     assert(getTsvFieldValue!double("123", 0, '\t') == 123.0);
1371     assert(getTsvFieldValue!double("-10.5", 0, '\t') == -10.5);
1372     assert(getTsvFieldValue!size_t("abc|123", 1, '|') == 123);
1373     assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99);
1374     assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99);
1375     assert(getTsvFieldValue!string("紅\t红\t99", 2, '\t') == "99");
1376     assert(getTsvFieldValue!string("紅\t红\t99", 1, '\t') == "红");
1377     assert(getTsvFieldValue!string("紅\t红\t99", 0, '\t') == "紅");
1378     assert(getTsvFieldValue!string("红色和绿色\tred and green\t赤と緑\t10.5", 2, '\t') == "赤と緑");
1379     assert(getTsvFieldValue!double("红色和绿色\tred and green\t赤と緑\t10.5", 3, '\t') == 10.5);
1380 
1381     /* The empty field cases. */
1382     assert(getTsvFieldValue!string("", 0, '\t') == "");
1383     assert(getTsvFieldValue!string("\t", 0, '\t') == "");
1384     assert(getTsvFieldValue!string("\t", 1, '\t') == "");
1385     assert(getTsvFieldValue!string("", 0, ':') == "");
1386     assert(getTsvFieldValue!string(":", 0, ':') == "");
1387     assert(getTsvFieldValue!string(":", 1, ':') == "");
1388 
1389     /* Tests with different data types. */
1390     string stringLine = "orange and black\tნარინჯისფერი და შავი\t88.5";
1391     char[] charLine = "orange and black\tნარინჯისფერი და შავი\t88.5".to!(char[]);
1392     dchar[] dcharLine = stringLine.to!(dchar[]);
1393     wchar[] wcharLine = stringLine.to!(wchar[]);
1394 
1395     assert(getTsvFieldValue!string(stringLine, 0, '\t') == "orange and black");
1396     assert(getTsvFieldValue!string(stringLine, 1, '\t') == "ნარინჯისფერი და შავი");
1397     assert(getTsvFieldValue!wstring(stringLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1398     assert(getTsvFieldValue!double(stringLine, 2, '\t') == 88.5);
1399 
1400     assert(getTsvFieldValue!string(charLine, 0, '\t') == "orange and black");
1401     assert(getTsvFieldValue!string(charLine, 1, '\t') == "ნარინჯისფერი და შავი");
1402     assert(getTsvFieldValue!wstring(charLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1403     assert(getTsvFieldValue!double(charLine, 2, '\t') == 88.5);
1404 
1405     assert(getTsvFieldValue!string(dcharLine, 0, '\t') == "orange and black");
1406     assert(getTsvFieldValue!string(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი");
1407     assert(getTsvFieldValue!wstring(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1408     assert(getTsvFieldValue!double(dcharLine, 2, '\t') == 88.5);
1409 
1410     assert(getTsvFieldValue!string(wcharLine, 0, '\t') == "orange and black");
1411     assert(getTsvFieldValue!string(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი");
1412     assert(getTsvFieldValue!wstring(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1413     assert(getTsvFieldValue!double(wcharLine, 2, '\t') == 88.5);
1414 
1415     /* Conversion errors. */
1416     assertThrown!ConvException(getTsvFieldValue!double("", 0, '\t'));
1417     assertThrown!ConvException(getTsvFieldValue!double("abc", 0, '|'));
1418     assertThrown!ConvException(getTsvFieldValue!size_t("-1", 0, '|'));
1419     assertThrown!ConvException(getTsvFieldValue!size_t("a23|23.4", 1, '|'));
1420     assertThrown!ConvException(getTsvFieldValue!double("23.5|def", 1, '|'));
1421 
1422     /* Not enough field errors. These should throw, but not a ConvException.*/
1423     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("", 1, '\t')));
1424     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc", 1, '\t')));
1425     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc\tdef", 2, '\t')));
1426 }
1427 
1428 /**
1429 Yes|No.newlineWasRemoved is a template parameter to throwIfWindowsNewline. A Yes
1430 value indicates the Unix newline was already removed, as might be done via
1431 std.File.byLine or similar mechanism.
1432 */
1433 alias NewlineWasRemoved = Flag!"newlineWasRemoved";
1434 
1435 /**
1436 throwIfWindowsLineNewline throws an exception if the 'line' argument ends with a
1437 Windows/DOS line ending. This is used by TSV Utilities tools to detect Window/DOS
1438 line endings and terminate processing with an error message to the user.
1439 
1440 The 'nlWasRemoved' template parameter can be used if a Unix newline character was
1441 already removed. In this case the CR character from a Windows CRLF remains and can be
1442 detected. This is useful when reading files in binary mode, stripping Unix newlines.
1443 */
1444 void throwIfWindowsNewline
1445     (NewlineWasRemoved nlWasRemoved = Yes.newlineWasRemoved)
1446     (const char[] line, const char[] filename, size_t lineNum)
1447 {
1448     static if (nlWasRemoved)
1449     {
1450         immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r';
1451     }
1452     else
1453     {
1454         immutable bool hasWindowsLineEnding =
1455             line.length > 1 &&
1456             line[$ - 2] == '\r' &&
1457             line[$ - 1] == '\n';
1458     }
1459 
1460     if (hasWindowsLineEnding)
1461     {
1462         import std.format;
1463         throw new Exception(
1464             format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n  File: %s, Line: %s",
1465                    (filename == "-") ? "Standard Input" : filename, lineNum));
1466     }
1467 }
1468 
1469 // throwIfWindowsNewline
1470 @safe unittest
1471 {
1472     import std.exception;
1473 
1474     assertNotThrown(throwIfWindowsNewline("", "afile.tsv", 1));
1475     assertNotThrown(throwIfWindowsNewline("a", "afile.tsv", 2));
1476     assertNotThrown(throwIfWindowsNewline("ab", "afile.tsv", 3));
1477     assertNotThrown(throwIfWindowsNewline("abc", "afile.tsv", 4));
1478 
1479     assertThrown(throwIfWindowsNewline("\r", "afile.tsv", 1));
1480     assertThrown(throwIfWindowsNewline("a\r", "afile.tsv", 2));
1481     assertThrown(throwIfWindowsNewline("ab\r", "afile.tsv", 3));
1482     assertThrown(throwIfWindowsNewline("abc\r", "afile.tsv", 4));
1483 
1484     assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("\n", "afile.tsv", 1));
1485     assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("a\n", "afile.tsv", 2));
1486     assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3));
1487     assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4));
1488 
1489     assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5));
1490     assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6));
1491     assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7));
1492     assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8));
1493 
1494     /* Standard Input formatting. */
1495     import std.algorithm : endsWith;
1496     bool exceptionCaught = false;
1497 
1498     try (throwIfWindowsNewline("\r", "-", 99));
1499     catch (Exception e)
1500     {
1501         assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1502         exceptionCaught = true;
1503     }
1504     finally
1505     {
1506         assert(exceptionCaught);
1507         exceptionCaught = false;
1508     }
1509 
1510     try (throwIfWindowsNewline!(No.newlineWasRemoved)("\r\n", "-", 99));
1511     catch (Exception e)
1512     {
1513         assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1514         exceptionCaught = true;
1515     }
1516     finally
1517     {
1518         assert(exceptionCaught);
1519         exceptionCaught = false;
1520     }
1521 }
1522 
1523 /** Flag used by InputSourceRange to determine if the header line should be when
1524 opening a file.
1525 */
1526 alias ReadHeader = Flag!"readHeader";
1527 
1528 /**
1529 inputSourceRange is a helper function for creating new InputSourceRange objects.
1530 */
1531 InputSourceRange inputSourceRange(string[] filepaths, ReadHeader readHeader)
1532 {
1533     return new InputSourceRange(filepaths, readHeader);
1534 }
1535 
1536 /**
1537 InputSourceRange is an input range that iterates over a set of input files.
1538 
1539 InputSourceRange is used to iterate over a set of files passed on the command line.
1540 Files are automatically opened and closed during iteration. The caller can choose to
1541 have header lines read automatically.
1542 
1543 The range is created from a set of filepaths. These filepaths are mapped to
1544 InputSource objects during the iteration. This is what enables automatically opening
1545 and closing files and reading the header line.
1546 
1547 The motivation for an InputSourceRange is to provide a standard way to look at the
1548 header line of the first input file during command line argument processing, and then
1549 pass the open input file and the header line along to the main processing functions.
1550 This enables a features like named fields to be implemented in a standard way.
1551 
1552 Both InputSourceRange and InputSource are reference objects. This keeps their use
1553 limited to a single iteration over the set of files. The files can be iterated again
1554 by creating a new InputSourceRange against the same filepaths.
1555 
1556 Currently, InputSourceRange supports files and standard input. It is possible other
1557 types of input sources will be added in the future.
1558  */
1559 final class InputSourceRange
1560 {
1561     private string[] _filepaths;
1562     private ReadHeader _readHeader;
1563     private InputSource _front;
1564 
1565     this(string[] filepaths, ReadHeader readHeader)
1566     {
1567         _filepaths = filepaths.dup;
1568         _readHeader = readHeader;
1569         _front = null;
1570 
1571         if (!_filepaths.empty)
1572         {
1573             _front = new InputSource(_filepaths.front, _readHeader);
1574             _front.open;
1575             _filepaths.popFront;
1576         }
1577     }
1578 
1579     size_t length() const pure nothrow @safe
1580     {
1581         return empty ? 0 : _filepaths.length + 1;
1582     }
1583 
1584     bool empty() const pure nothrow @safe
1585     {
1586         return _front is null;
1587     }
1588 
1589     InputSource front() pure @safe
1590     {
1591         assert(!empty, "Attempt to take the front of an empty InputSourceRange");
1592         return _front;
1593     }
1594 
1595     void popFront()
1596     {
1597         assert(!empty, "Attempt to popFront an empty InputSourceRange");
1598 
1599         _front.close;
1600 
1601         if (!_filepaths.empty)
1602         {
1603             _front = new InputSource(_filepaths.front, _readHeader);
1604             _front.open;
1605             _filepaths.popFront;
1606         }
1607         else
1608         {
1609             _front = null;
1610         }
1611     }
1612 }
1613 
1614 /**
1615 InputSource is a class of objects produced by iterating over an InputSourceRange.
1616 
1617 An InputSource object provides access to the open file currently the front element
1618 of an InputSourceRange. The main methods application code is likely to need are:
1619 
1620 $(LIST
1621     * `file()` - Returns the File object. The file will be open for reading as long
1622       InputSource instance is the front element of the InputSourceRange it came from.
1623 
1624     * `header(KeepTerminator keepTerminator = No.keepTerminator)` - Returns the
1625       header line from the file. An empty string is returned if InputSource range
1626       was created with readHeader=false.
1627 
1628     * `name()` - The name of the input source. The name returned is intended for
1629       user error messages. For files, this is the filepath that was passed to
1630       InputSourceRange. For standard input, it is "Standard Input".
1631 )
1632 
1633 An InputSource is a reference object, so the copies will retain the state of the
1634 InputSourceRange front element. In particular, all copies will have the open
1635 state of the front element of the InputSourceRange.
1636 
1637 This class is not intended for use outside the context of an InputSourceRange.
1638 */
1639 final class InputSource
1640 {
1641     import std.stdio;
1642 
1643     private immutable string _filepath;
1644     private immutable bool _isStdin;
1645     private bool _isOpen;
1646     private ReadHeader _readHeader;
1647     private bool _hasBeenOpened;
1648     private string _header;
1649     private File _file;
1650 
1651     private this(string filepath, ReadHeader readHeader) pure nothrow @safe
1652     {
1653         _filepath = filepath;
1654         _isStdin = filepath == "-";
1655         _isOpen = false;
1656         _readHeader = readHeader;
1657         _hasBeenOpened = false;
1658     }
1659 
1660     /** file returns the File object held by the InputSource.
1661      *
1662      * The File will be open for reading as long as the InputSource instance is the
1663      * front element of the InputSourceRange it came from.
1664      */
1665     File file() nothrow @safe
1666     {
1667         return _file;
1668     }
1669 
1670     /** isReadHeaderEnabled returns true if the header line is being read.
1671      */
1672     bool isReadHeaderEnabled() const pure nothrow @safe
1673     {
1674         return _readHeader == Yes.readHeader;
1675     }
1676 
1677     /** header returns the header line from the input file.
1678      *
1679      * An empty string is returned if InputSource range was created with
1680      * readHeader=false.
1681      */
1682     string header(KeepTerminator keepTerminator = No.keepTerminator) const pure nothrow @safe
1683     {
1684         assert(_hasBeenOpened);
1685         return (keepTerminator == Yes.keepTerminator ||
1686                 _header.length == 0 ||
1687                 _header[$ - 1] != '\n') ?
1688             _header : _header[0 .. $-1];
1689     }
1690 
1691     /** isHeaderEmpty returns true if there is no data for a header, including the
1692      * terminator.
1693      *
1694      * When headers are being read, this true only if the file is empty.
1695      */
1696     bool isHeaderEmpty() const pure nothrow @safe
1697     {
1698         assert(_hasBeenOpened);
1699         return _header.empty;
1700     }
1701 
1702     /** name returns a user friendly name representing the input source.
1703      *
1704      * For files, it is the filepath provided to InputSourceRange. For standard
1705      * input, it is "Standard Input". (Use isStdin() to test for standard input,
1706      * not name().
1707      */
1708     string name() const pure nothrow @safe
1709     {
1710         return _isStdin ? "Standard Input" : _filepath;
1711     }
1712 
1713     /** isStdin returns true if the input source is Standard Input, false otherwise.
1714     */
1715     bool isStdin() const pure nothrow @safe
1716     {
1717         return _isStdin;
1718     }
1719 
1720     /** isOpen returns true if the input source is open for reading, false otherwise.
1721      *
1722      * "Open" in this context is whether the InputSource object is currently open,
1723      * meaning that it is the front element of the InputSourceRange that created it.
1724      *
1725      * For files, this is also reflected in the state of the underlying File object.
1726      * However, standard input is never actually closed.
1727      */
1728     bool isOpen() const pure nothrow @safe
1729     {
1730         return _isOpen;
1731     }
1732 
1733     private void open()
1734     {
1735         assert(!_isOpen);
1736         assert(!_hasBeenOpened);
1737 
1738         _file = isStdin ? stdin : _filepath.File("rb");
1739         if (_readHeader) _header = _file.readln;
1740         _isOpen = true;
1741         _hasBeenOpened = true;
1742     }
1743 
1744     private void close()
1745     {
1746         if (!_isStdin) _file.close;
1747         _isOpen = false;
1748     }
1749 }
1750 
1751 // InputSourceRange and InputSource
1752 unittest
1753 {
1754     import std.algorithm : all, each;
1755     import std.array : appender;
1756     import std.exception : assertThrown;
1757     import std.file : rmdirRecurse;
1758     import std.path : buildPath;
1759     import std.range;
1760     import std.stdio;
1761     import tsv_utils.common.unittest_utils;
1762 
1763     auto testDir = makeUnittestTempDir("tsv_utils_input_source_range");
1764     scope(exit) testDir.rmdirRecurse;
1765 
1766     string file0 = buildPath(testDir, "file0.txt");
1767     string file1 = buildPath(testDir, "file1.txt");
1768     string file2 = buildPath(testDir, "file2.txt");
1769     string file3 = buildPath(testDir, "file3.txt");
1770 
1771     string file0Header = "";
1772     string file1Header = "file 1 header\n";
1773     string file2Header = "file 2 header\n";
1774     string file3Header = "file 3 header\n";
1775 
1776     string file0Body = "";
1777     string file1Body = "";
1778     string file2Body = "file 2 line 1\n";
1779     string file3Body = "file 3 line 1\nfile 3 line 2\n";
1780 
1781     string file0Data = file0Header ~ file0Body;
1782     string file1Data = file1Header ~ file1Body;
1783     string file2Data = file2Header ~ file2Body;
1784     string file3Data = file3Header ~ file3Body;
1785 
1786     {
1787         file0.File("wb").write(file0Data);
1788         file1.File("wb").write(file1Data);
1789         file2.File("wb").write(file2Data);
1790         file3.File("wb").write(file3Data);
1791     }
1792 
1793     auto inputFiles = [file0, file1, file2, file3];
1794     auto fileHeaders = [file0Header, file1Header, file2Header, file3Header];
1795     auto fileBodies = [file0Body, file1Body, file2Body, file3Body];
1796     auto fileData = [file0Data, file1Data, file2Data, file3Data];
1797 
1798     auto readSources = appender!(InputSource[]);
1799     auto buffer = new char[1024];    // Must be large enough to hold the test files.
1800 
1801     /* Tests without standard input. Don't want to count on state of standard
1802      * input or modifying it when doing unit tests, so avoid reading from it.
1803      */
1804 
1805     foreach(numFiles; 1 .. inputFiles.length + 1)
1806     {
1807         /* Reading headers. */
1808 
1809         readSources.clear;
1810         auto inputSourcesYesHeader = inputSourceRange(inputFiles[0 .. numFiles], Yes.readHeader);
1811         assert(inputSourcesYesHeader.length == numFiles);
1812 
1813         foreach(fileNum, source; inputSourcesYesHeader.enumerate)
1814         {
1815             readSources.put(source);
1816             assert(source.isOpen);
1817             assert(source.file.isOpen);
1818             assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen));
1819             assert(readSources.data[fileNum].isOpen);
1820 
1821             assert(source.header(Yes.keepTerminator) == fileHeaders[fileNum]);
1822 
1823             auto headerNoTerminatorLength = fileHeaders[fileNum].length;
1824             if (headerNoTerminatorLength > 0) --headerNoTerminatorLength;
1825             assert(source.header(No.keepTerminator) ==
1826                    fileHeaders[fileNum][0 .. headerNoTerminatorLength]);
1827 
1828             assert(source.name == inputFiles[fileNum]);
1829             assert(!source.isStdin);
1830             assert(source.isReadHeaderEnabled);
1831 
1832             assert(source.file.rawRead(buffer) == fileBodies[fileNum]);
1833         }
1834 
1835         /* The InputSourceRange is a reference range, consumed by the foreach. */
1836         assert(inputSourcesYesHeader.empty);
1837 
1838         /* Without reading headers. */
1839 
1840         readSources.clear;
1841         auto inputSourcesNoHeader = inputSourceRange(inputFiles[0 .. numFiles], No.readHeader);
1842         assert(inputSourcesNoHeader.length == numFiles);
1843 
1844         foreach(fileNum, source; inputSourcesNoHeader.enumerate)
1845         {
1846             readSources.put(source);
1847             assert(source.isOpen);
1848             assert(source.file.isOpen);
1849             assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen));
1850             assert(readSources.data[fileNum].isOpen);
1851 
1852             assert(source.header(Yes.keepTerminator).empty);
1853             assert(source.header(No.keepTerminator).empty);
1854 
1855             assert(source.name == inputFiles[fileNum]);
1856             assert(!source.isStdin);
1857             assert(!source.isReadHeaderEnabled);
1858 
1859             assert(source.file.rawRead(buffer) == fileData[fileNum]);
1860         }
1861 
1862         /* The InputSourceRange is a reference range, consumed by the foreach. */
1863         assert(inputSourcesNoHeader.empty);
1864     }
1865 
1866     /* Tests with standard input. No actual reading in these tests.
1867      */
1868 
1869     readSources.clear;
1870     foreach(fileNum, source; inputSourceRange(["-", "-"], No.readHeader).enumerate)
1871     {
1872         readSources.put(source);
1873         assert(source.isOpen);
1874         assert(source.file.isOpen);
1875         assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen));      // InputSource objects are "closed".
1876         assert(readSources.data[0 .. fileNum].all!(s => s.file.isOpen));  // Actual stdin should not be closed.
1877         assert(readSources.data[fileNum].isOpen);
1878 
1879         assert(source.header(Yes.keepTerminator).empty);
1880         assert(source.header(No.keepTerminator).empty);
1881 
1882         assert(source.name == "Standard Input");
1883         assert(source.isStdin);
1884     }
1885 
1886     /* Empty filelist. */
1887     string[] nofiles;
1888     {
1889         auto sources = inputSourceRange(nofiles, No.readHeader);
1890         assert(sources.empty);
1891     }
1892     {
1893         auto sources = inputSourceRange(nofiles, Yes.readHeader);
1894         assert(sources.empty);
1895     }
1896 
1897     /* Error cases. */
1898     assertThrown(inputSourceRange([file0, "no_such_file.txt"], No.readHeader).each);
1899     assertThrown(inputSourceRange(["no_such_file.txt", file1], Yes.readHeader).each);
1900 }
1901 
1902 /**
1903 byLineSourceRange is a helper function for creating new byLineSourceRange objects.
1904 */
1905 auto byLineSourceRange(
1906     KeepTerminator keepTerminator = No.keepTerminator, Char = char, ubyte terminator = '\n')
1907 (string[] filepaths)
1908 if (is(Char == char) || is(Char == ubyte))
1909 {
1910     return new ByLineSourceRange!(keepTerminator, Char, terminator)(filepaths);
1911 }
1912 
1913 /**
1914 ByLineSourceRange is an input range that iterates over a set of input files. It
1915 provides bufferedByLine access to each file.
1916 
1917 A ByLineSourceRange is used to iterate over a set of files passed on the command line.
1918 Files are automatically opened and closed during iteration. The front element of the
1919 range provides access to a bufferedByLine for iterating over the lines in the file.
1920 
1921 The range is created from a set of filepaths. These filepaths are mapped to
1922 ByLineSource objects during the iteration. This is what enables automatically opening
1923 and closing files and providing bufferedByLine access.
1924 
1925 The motivation behind ByLineSourceRange is to provide a standard way to look at the
1926 header line of the first input file during command line argument processing, and then
1927 pass the open input file along to the main processing functions. This enables
1928 features like named fields to be implemented in a standard way.
1929 
1930 Access to the first line of the first file is available after creating the
1931 ByLineSourceRange instance. The first file is opened and a bufferedByLine created.
1932 The first line of the first file is via byLine.front (after checking !byLine.empty).
1933 
1934 Both ByLineSourceRange and ByLineSource are reference objects. This keeps their use
1935 limited to a single iteration over the set of files. The files can be iterated again
1936 by creating a new InputSourceRange against the same filepaths.
1937 
1938 Currently, ByLineSourceRange supports files and standard input. It is possible other
1939 types of input sources will be added in the future.
1940  */
1941 final class ByLineSourceRange(
1942     KeepTerminator keepTerminator = No.keepTerminator, Char = char, ubyte terminator = '\n')
1943 if (is(Char == char) || is(Char == ubyte))
1944 {
1945     alias ByLineSourceType = ByLineSource!(keepTerminator, char, terminator);
1946 
1947     private string[] _filepaths;
1948     private ByLineSourceType _front;
1949 
1950     this(string[] filepaths)
1951     {
1952         _filepaths = filepaths.dup;
1953         _front = null;
1954 
1955         if (!_filepaths.empty)
1956         {
1957             _front = new ByLineSourceType(_filepaths.front);
1958             _front.open;
1959             _filepaths.popFront;
1960         }
1961     }
1962 
1963     size_t length() const pure nothrow @safe
1964     {
1965         return empty ? 0 : _filepaths.length + 1;
1966     }
1967 
1968     bool empty() const pure nothrow @safe
1969     {
1970         return _front is null;
1971     }
1972 
1973     ByLineSourceType front() pure @safe
1974     {
1975         assert(!empty, "Attempt to take the front of an empty ByLineSourceRange");
1976         return _front;
1977     }
1978 
1979     void popFront()
1980     {
1981         assert(!empty, "Attempt to popFront an empty ByLineSourceRange");
1982 
1983         _front.close;
1984 
1985         if (!_filepaths.empty)
1986         {
1987             _front = new ByLineSourceType(_filepaths.front);
1988             _front.open;
1989             _filepaths.popFront;
1990         }
1991         else
1992         {
1993             _front = null;
1994         }
1995     }
1996 }
1997 
1998 /**
1999 ByLineSource is a class of objects produced by iterating over an ByLineSourceRange.
2000 
2001 A ByLineSource instance provides a bufferedByLine range for the current the front
2002 element of a ByLineSourceRange. The main methods application code is likely to
2003 need are:
2004 
2005 $(LIST
2006     * `byLine()` - Returns the bufferedByLine range accessing the open file. The file
2007        will be open for reading (using the bufferedByLine range) as long as the
2008        ByLineSource instance is the front element of the ByLineSourceRange
2009        it came from.
2010 
2011     * `name()` - The name of the input source. The name returned is intended for
2012       user error messages. For files, this is the filepath that was passed to
2013       ByLineSourceRange. For standard input, it is "Standard Input".
2014 )
2015 
2016 A ByLineSource is a reference object, so the copies have the same state as the
2017 ByLineSourceRange front element. In particular, all copies will have the open
2018 state of the front element of the ByLineSourceRange.
2019 
2020 This class is not intended for use outside the context of an ByLineSourceRange.
2021 */
2022 final class ByLineSource(
2023     KeepTerminator keepTerminator, Char = char, ubyte terminator = '\n')
2024 if (is(Char == char) || is(Char == ubyte))
2025 {
2026     import std.stdio;
2027     import std.traits : ReturnType;
2028 
2029     alias newByLineFn = bufferedByLine!(keepTerminator, char, terminator);
2030     alias ByLineType = ReturnType!newByLineFn;
2031 
2032     private immutable string _filepath;
2033     private immutable bool _isStdin;
2034     private bool _isOpen;
2035     private bool _hasBeenOpened;
2036     private File _file;
2037     private ByLineType _byLineRange;
2038 
2039     private this(string filepath) pure nothrow @safe
2040     {
2041         _filepath = filepath;
2042         _isStdin = filepath == "-";
2043         _isOpen = false;
2044         _hasBeenOpened = false;
2045     }
2046 
2047     /** byLine returns the bufferedByLine object held by the ByLineSource instance.
2048      *
2049      * The File underlying the BufferedByLine object is open for reading as long as
2050      * the ByLineSource instance is the front element of the ByLineSourceRange it
2051      * came from.
2052      */
2053     ByLineType byLine() nothrow @safe
2054     {
2055         return _byLineRange;
2056     }
2057 
2058     /** name returns a user friendly name representing the underlying input source.
2059      *
2060      * For files, it is the filepath provided to ByLineSourceRange. For standard
2061      * input, it is "Standard Input". (Use isStdin() to test for standard input,
2062      * compare against name().)
2063      */
2064     string name() const pure nothrow @safe
2065     {
2066         return _isStdin ? "Standard Input" : _filepath;
2067     }
2068 
2069     /** isStdin returns true if the underlying input source is Standard Input, false
2070      * otherwise.
2071      */
2072     bool isStdin() const pure nothrow @safe
2073     {
2074         return _isStdin;
2075     }
2076 
2077     /** isOpen returns true if the ByLineSource instance is open for reading, false
2078      * otherwise.
2079      *
2080      * "Open" in this context is whether the ByLineSource object is currently "open".
2081      * The underlying input source backing it does not necessarily have the same
2082      * state. The ByLineSource instance is "open" if is the front element of the
2083      * ByLineSourceRange that created it.
2084      *
2085      * The underlying input source object follows the same open/close state as makes
2086      * sense. In particular, real files are closed when the ByLineSource object is
2087      * closed. The exception is standard input, which is never actually closed.
2088      */
2089     bool isOpen() const pure nothrow @safe
2090     {
2091         return _isOpen;
2092     }
2093 
2094     private void open()
2095     {
2096         assert(!_isOpen);
2097         assert(!_hasBeenOpened);
2098 
2099         _file = isStdin ? stdin : _filepath.File("rb");
2100         _byLineRange = newByLineFn(_file);
2101         _isOpen = true;
2102         _hasBeenOpened = true;
2103     }
2104 
2105     private void close()
2106     {
2107         if (!_isStdin) _file.close;
2108         _isOpen = false;
2109     }
2110 }
2111 
2112 // ByLineSourceRange and ByLineSource
2113 unittest
2114 {
2115     import std.algorithm : all, each;
2116     import std.array : appender;
2117     import std.exception : assertThrown;
2118     import std.file : rmdirRecurse;
2119     import std.path : buildPath;
2120     import std.range;
2121     import std.stdio;
2122     import tsv_utils.common.unittest_utils;
2123 
2124     auto testDir = makeUnittestTempDir("tsv_utils_byline_input_source_range");
2125     scope(exit) testDir.rmdirRecurse;
2126 
2127     string file0 = buildPath(testDir, "file0.txt");
2128     string file1 = buildPath(testDir, "file1.txt");
2129     string file2 = buildPath(testDir, "file2.txt");
2130     string file3 = buildPath(testDir, "file3.txt");
2131 
2132     string file0Header = "";
2133     string file1Header = "file 1 header\n";
2134     string file2Header = "file 2 header\n";
2135     string file3Header = "file 3 header\n";
2136 
2137     string file0Body = "";
2138     string file1Body = "";
2139     string file2Body = "file 2 line 1\n";
2140     string file3Body = "file 3 line 1\nfile 3 line 2\n";
2141 
2142     string file0Data = file0Header ~ file0Body;
2143     string file1Data = file1Header ~ file1Body;
2144     string file2Data = file2Header ~ file2Body;
2145     string file3Data = file3Header ~ file3Body;
2146 
2147     {
2148         file0.File("wb").write(file0Data);
2149         file1.File("wb").write(file1Data);
2150         file2.File("wb").write(file2Data);
2151         file3.File("wb").write(file3Data);
2152     }
2153 
2154     auto inputFiles = [file0, file1, file2, file3];
2155     auto fileHeaders = [file0Header, file1Header, file2Header, file3Header];
2156     auto fileBodies = [file0Body, file1Body, file2Body, file3Body];
2157     auto fileData = [file0Data, file1Data, file2Data, file3Data];
2158 
2159     auto buffer = new char[1024];    // Must be large enough to hold the test files.
2160 
2161     /* Tests without standard input. Don't want to count on state of standard
2162      * input or modifying it when doing unit tests, so avoid reading from it.
2163      */
2164 
2165     auto readSourcesNoTerminator = appender!(ByLineSource!(No.keepTerminator)[]);
2166     auto readSourcesYesTerminator = appender!(ByLineSource!(Yes.keepTerminator)[]);
2167 
2168     foreach(numFiles; 1 .. inputFiles.length + 1)
2169     {
2170         /* Using No.keepTerminator. */
2171         readSourcesNoTerminator.clear;
2172         auto inputSourcesNoTerminator = byLineSourceRange!(No.keepTerminator)(inputFiles[0 .. numFiles]);
2173         assert(inputSourcesNoTerminator.length == numFiles);
2174 
2175         foreach(fileNum, source; inputSourcesNoTerminator.enumerate)
2176         {
2177             readSourcesNoTerminator.put(source);
2178             assert(source.isOpen);
2179             assert(source._file.isOpen);
2180             assert(readSourcesNoTerminator.data[0 .. fileNum].all!(s => !s.isOpen));
2181             assert(readSourcesNoTerminator.data[fileNum].isOpen);
2182 
2183             auto headerNoTerminatorLength = fileHeaders[fileNum].length;
2184             if (headerNoTerminatorLength > 0) --headerNoTerminatorLength;
2185 
2186             assert(source.byLine.empty ||
2187                    source.byLine.front == fileHeaders[fileNum][0 .. headerNoTerminatorLength]);
2188 
2189             assert(source.name == inputFiles[fileNum]);
2190             assert(!source.isStdin);
2191 
2192             auto readFileData = appender!(char[]);
2193             foreach(line; source.byLine)
2194             {
2195                 readFileData.put(line);
2196                 readFileData.put('\n');
2197             }
2198 
2199             assert(readFileData.data == fileData[fileNum]);
2200         }
2201 
2202         /* The ByLineSourceRange is a reference range, consumed by the foreach. */
2203         assert(inputSourcesNoTerminator.empty);
2204 
2205         /* Using Yes.keepTerminator. */
2206         readSourcesYesTerminator.clear;
2207         auto inputSourcesYesTerminator = byLineSourceRange!(Yes.keepTerminator)(inputFiles[0 .. numFiles]);
2208         assert(inputSourcesYesTerminator.length == numFiles);
2209 
2210         foreach(fileNum, source; inputSourcesYesTerminator.enumerate)
2211         {
2212             readSourcesYesTerminator.put(source);
2213             assert(source.isOpen);
2214             assert(source._file.isOpen);
2215             assert(readSourcesYesTerminator.data[0 .. fileNum].all!(s => !s.isOpen));
2216             assert(readSourcesYesTerminator.data[fileNum].isOpen);
2217 
2218             assert(source.byLine.empty || source.byLine.front == fileHeaders[fileNum]);
2219 
2220             assert(source.name == inputFiles[fileNum]);
2221             assert(!source.isStdin);
2222 
2223             auto readFileData = appender!(char[]);
2224             foreach(line; source.byLine)
2225             {
2226                 readFileData.put(line);
2227             }
2228 
2229             assert(readFileData.data == fileData[fileNum]);
2230         }
2231 
2232         /* The ByLineSourceRange is a reference range, consumed by the foreach. */
2233         assert(inputSourcesYesTerminator.empty);
2234     }
2235 
2236     /* Empty filelist. */
2237     string[] nofiles;
2238     {
2239         auto sources = byLineSourceRange!(No.keepTerminator)(nofiles);
2240         assert(sources.empty);
2241     }
2242     {
2243         auto sources = byLineSourceRange!(Yes.keepTerminator)(nofiles);
2244         assert(sources.empty);
2245     }
2246 
2247     /* Error cases. */
2248     assertThrown(byLineSourceRange!(No.keepTerminator)([file0, "no_such_file.txt"]).each);
2249     assertThrown(byLineSourceRange!(Yes.keepTerminator)(["no_such_file.txt", file1]).each);
2250 }
2251 
2252 /** Defines the 'bufferable' input sources supported by inputSourceByChunk.
2253  *
2254  * This includes std.stdio.File objects and mutable dynamic ubyte arrays. Or, input
2255  * ranges with ubyte elements.
2256  *
2257  * Static, const, and immutable arrays can be sliced to turn them into input ranges.
2258  *
2259  * Note: The element types could easily be generalized much further if that were useful.
2260  * At present, the primary purpose of inputSourceByChunk is to have a range representing
2261  * a buffered file that can also take ubyte arrays as sources for unit testing.
2262  */
2263 enum bool isBufferableInputSource(R) =
2264     isFileHandle!(Unqual!R) ||
2265     (isInputRange!R && is(Unqual!(ElementEncodingType!R) == ubyte)
2266     );
2267 
2268 @safe unittest
2269 {
2270     import std.stdio : stdin;
2271 
2272     static assert(isBufferableInputSource!(File));
2273     static assert(isBufferableInputSource!(typeof(stdin)));
2274     static assert(isBufferableInputSource!(ubyte[]));
2275     static assert(!isBufferableInputSource!(char[]));
2276     static assert(!isBufferableInputSource!(string));
2277 
2278     ubyte[10] staticArray;
2279     const ubyte[1] staticConstArray;
2280     immutable ubyte[1] staticImmutableArray;
2281     const(ubyte)[1] staticArrayConstElts;
2282     immutable(ubyte)[1] staticArrayImmutableElts;
2283 
2284     ubyte[] dynamicArray = new ubyte[](10);
2285     const(ubyte)[] dynamicArrayConstElts = new ubyte[](10);
2286     immutable(ubyte)[] dynamicArrayImmutableElts = new ubyte[](10);
2287     const ubyte[] dynamicConstArray = new ubyte[](10);
2288     immutable ubyte[] dynamicImmutableArray = new ubyte[](10);
2289 
2290     /* Dynamic mutable arrays are bufferable. */
2291     static assert(!isBufferableInputSource!(typeof(staticArray)));
2292     static assert(!isBufferableInputSource!(typeof(staticArrayConstElts)));
2293     static assert(!isBufferableInputSource!(typeof(staticArrayImmutableElts)));
2294     static assert(!isBufferableInputSource!(typeof(staticConstArray)));
2295     static assert(!isBufferableInputSource!(typeof(staticImmutableArray)));
2296 
2297     static assert(isBufferableInputSource!(typeof(dynamicArray)));
2298     static assert(isBufferableInputSource!(typeof(dynamicArrayConstElts)));
2299     static assert(isBufferableInputSource!(typeof(dynamicArrayImmutableElts)));
2300     static assert(!isBufferableInputSource!(typeof(dynamicConstArray)));
2301     static assert(!isBufferableInputSource!(typeof(dynamicImmutableArray)));
2302 
2303     /* Slicing turns all forms into bufferable arrays. */
2304     static assert(isBufferableInputSource!(typeof(staticArray[])));
2305     static assert(isBufferableInputSource!(typeof(staticArrayConstElts[])));
2306     static assert(isBufferableInputSource!(typeof(staticArrayImmutableElts[])));
2307     static assert(isBufferableInputSource!(typeof(staticConstArray[])));
2308     static assert(isBufferableInputSource!(typeof(staticImmutableArray[])));
2309 
2310     static assert(isBufferableInputSource!(typeof(dynamicConstArray[])));
2311     static assert(isBufferableInputSource!(typeof(dynamicImmutableArray[])));
2312     static assert(isBufferableInputSource!(typeof(dynamicArray[])));
2313     static assert(isBufferableInputSource!(typeof(dynamicArrayConstElts[])));
2314     static assert(isBufferableInputSource!(typeof(dynamicArrayImmutableElts[])));
2315 
2316     /* Element type tests. */
2317     static assert(is(Unqual!(ElementType!(typeof(staticArray))) == ubyte));
2318     static assert(is(Unqual!(ElementType!(typeof(staticArrayConstElts))) == ubyte));
2319     static assert(is(Unqual!(ElementType!(typeof(staticArrayImmutableElts))) == ubyte));
2320     static assert(is(Unqual!(ElementType!(typeof(staticConstArray))) == ubyte));
2321     static assert(is(Unqual!(ElementType!(typeof(staticImmutableArray))) == ubyte));
2322     static assert(is(Unqual!(ElementType!(typeof(dynamicArray))) == ubyte));
2323     static assert(is(Unqual!(ElementType!(typeof(dynamicArrayConstElts))) == ubyte));
2324     static assert(is(Unqual!(ElementType!(typeof(dynamicArrayImmutableElts))) == ubyte));
2325     static assert(is(Unqual!(ElementType!(typeof(dynamicConstArray))) == ubyte));
2326     static assert(is(Unqual!(ElementType!(typeof(dynamicImmutableArray))) == ubyte));
2327 
2328     struct S1
2329     {
2330         void popFront();
2331         @property bool empty();
2332         @property ubyte front();
2333     }
2334 
2335     struct S2
2336     {
2337         @property ubyte front();
2338         void popFront();
2339         @property bool empty();
2340         @property auto save() { return this; }
2341         @property size_t length();
2342         S2 opSlice(size_t, size_t);
2343     }
2344 
2345     static assert(isInputRange!S1);
2346     static assert(isBufferableInputSource!S1);
2347 
2348     static assert(isInputRange!S2);
2349     static assert(is(ElementEncodingType!S2 == ubyte));
2350     static assert(hasSlicing!S2);
2351     static assert(isBufferableInputSource!S2);
2352 
2353     /* For code coverage. */
2354     S2 s2;
2355     auto x = s2.save;
2356 
2357     auto repeatInt = 7.repeat!int(5);
2358     auto repeatUbyte = 7.repeat!ubyte(5);
2359     auto infiniteUbyte = 7.repeat!ubyte;
2360 
2361     static assert(!isBufferableInputSource!(typeof(repeatInt)));
2362     static assert(isBufferableInputSource!(typeof(repeatUbyte)));
2363     static assert(isBufferableInputSource!(typeof(infiniteUbyte)));
2364 }
2365 
2366 /** inputSourceByChunk returns a range that reads either a file handle (File) or a
2367  * ubyte[] array a chunk at a time.
2368  *
2369  * This is a cover for File.byChunk that allows passing an in-memory array or input
2370  * range as well. At present the motivation is primarily to enable unit testing of
2371  * chunk-based algorithms using in-memory strings.
2372  *
2373  * inputSourceByChunk takes either a File open for reading or an input range with
2374  * ubyte elements. Data is read a buffer at a time. The buffer can be user provided,
2375  * or  allocated by inputSourceByChunk based on a caller provided buffer size.
2376  *
2377  * The primary motivation for supporting both files and input ranges as sources is to
2378  * enable unit testing of buffer based algorithms using in-memory arrays. Dynamic,
2379  * mutable arras are fine. Use slicing to turn a static, const, or immutable arrays
2380  * into an input range.
2381  *
2382  * The chunks are returned as an input range.
2383  */
2384 auto inputSourceByChunk(InputSource)(InputSource source, size_t size)
2385 {
2386     return inputSourceByChunk(source, new ubyte[](size));
2387 }
2388 
2389 /// Ditto
2390 auto inputSourceByChunk(InputSource)(InputSource source, ubyte[] buffer)
2391 if (isBufferableInputSource!InputSource)
2392 {
2393     static if (isFileHandle!(Unqual!InputSource))
2394     {
2395         return source.byChunk(buffer);
2396     }
2397     else
2398     {
2399         static struct BufferedChunk
2400         {
2401             private Chunks!InputSource _chunks;
2402             private ubyte[] _buffer;
2403 
2404             private void readNextChunk()
2405             {
2406                 if (_chunks.empty)
2407                 {
2408                     _buffer.length = 0;
2409                 }
2410                 else
2411                 {
2412                     import std.algorithm : copy;
2413                     auto remainingBuffer = _chunks.front.take(_buffer.length).copy(_buffer);
2414                     _chunks.popFront;
2415 
2416                     /* Only the last chunk should be shorter than the buffer. */
2417                     assert(remainingBuffer.length == 0 || _chunks.empty);
2418 
2419                     _buffer.length -= remainingBuffer.length;
2420                 }
2421             }
2422 
2423             this(InputSource source, ubyte[] buffer)
2424             {
2425                 import std.exception : enforce;
2426                 enforce(buffer.length > 0, "buffer size must be larger than 0");
2427                 _chunks = source.chunks(buffer.length);
2428                 _buffer = buffer;
2429                 readNextChunk();
2430             }
2431 
2432             @property bool empty()
2433             {
2434                 return (_buffer.length == 0);
2435             }
2436 
2437             @property ubyte[] front()
2438             {
2439                 assert(!empty, "Attempting to fetch the front of an empty inputSourceByChunks");
2440                 return _buffer;
2441             }
2442 
2443             void popFront()
2444             {
2445                 assert(!empty, "Attempting to popFront an empty inputSourceByChunks");
2446                 readNextChunk();
2447             }
2448         }
2449 
2450         return BufferedChunk(source, buffer);
2451     }
2452 }
2453 
2454 unittest  // inputSourceByChunk
2455 {
2456     import tsv_utils.common.unittest_utils;   // tsv-utils unit test helpers
2457     import std.file : mkdir, rmdirRecurse;
2458     import std.path : buildPath;
2459 
2460     auto testDir = makeUnittestTempDir("tsv_utils_inputSourceByChunk");
2461     scope(exit) testDir.rmdirRecurse;
2462 
2463     import std.algorithm : equal, joiner;
2464     import std.format;
2465     import std..string : representation;
2466 
2467     auto charData = "abcde,ßÀß,あめりか物語,012345";
2468     ubyte[] ubyteData = charData.dup.representation;
2469 
2470     ubyte[1024] rawBuffer;  // Must be larger than largest bufferSize in tests.
2471 
2472     void writeFileData(string filePath, ubyte[] data)
2473     {
2474         import std.stdio;
2475 
2476         auto f = filePath.File("wb");
2477         f.rawWrite(data);
2478         f.close;
2479     }
2480 
2481     foreach (size_t dataSize; 0 .. ubyteData.length)
2482     {
2483         auto data = ubyteData[0 .. dataSize];
2484         auto filePath = buildPath(testDir, format("data_%d.txt", dataSize));
2485         writeFileData(filePath, data);
2486 
2487         foreach (size_t bufferSize; 1 .. dataSize + 2)
2488         {
2489             assert(data.inputSourceByChunk(bufferSize).joiner.equal(data),
2490                    format("[Test-A] dataSize: %d, bufferSize: %d", dataSize, bufferSize));
2491 
2492             assert (rawBuffer.length >= bufferSize);
2493 
2494             ubyte[] buffer = rawBuffer[0 .. bufferSize];
2495             assert(data.inputSourceByChunk(buffer).joiner.equal(data),
2496                    format("[Test-B] dataSize: %d, bufferSize: %d", dataSize, bufferSize));
2497 
2498             {
2499                 auto inputStream = filePath.File;
2500                 assert(inputStream.inputSourceByChunk(bufferSize).joiner.equal(data),
2501                        format("[Test-C] dataSize: %d, bufferSize: %d", dataSize, bufferSize));
2502                 inputStream.close;
2503             }
2504 
2505             {
2506                 auto inputStream = filePath.File;
2507                 assert(inputStream.inputSourceByChunk(buffer).joiner.equal(data),
2508                        format("[Test-D] dataSize: %d, bufferSize: %d", dataSize, bufferSize));
2509                 inputStream.close;
2510             }
2511         }
2512     }
2513 }
2514 
2515 @safe unittest // inputSourceByChunk array cases
2516 {
2517     import std.algorithm : equal;
2518 
2519     ubyte[5] staticArray = [5, 6, 7, 8, 9];
2520     const(ubyte)[5] staticArrayConstElts = [5, 6, 7, 8, 9];
2521     immutable(ubyte)[5] staticArrayImmutableElts = [5, 6, 7, 8, 9];
2522     const ubyte[5] staticConstArray = [5, 6, 7, 8, 9];
2523     immutable ubyte[5] staticImmutableArray = [5, 6, 7, 8, 9];
2524 
2525     ubyte[] dynamicArray = [5, 6, 7, 8, 9];
2526     const(ubyte)[] dynamicArrayConstElts = [5, 6, 7, 8, 9];
2527     immutable(ubyte)[] dynamicArrayImmutableElts = [5, 6, 7, 8, 9];
2528     const ubyte[] dynamicConstArray = [5, 6, 7, 8, 9];
2529     immutable ubyte[] dynamicImmutableArray = [5, 6, 7, 8, 9];
2530 
2531     /* The dynamic mutable arrays can be used directly. */
2532     assert (dynamicArray.inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2533     assert (dynamicArrayConstElts.inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2534     assert (dynamicArrayImmutableElts.inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2535 
2536     /* All the arrays can be used with slicing. */
2537     assert (staticArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2538     assert (staticArrayConstElts[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2539     assert (staticArrayImmutableElts[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2540     assert (staticConstArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2541     assert (staticImmutableArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2542     assert (dynamicArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2543     assert (dynamicArrayConstElts[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2544     assert (dynamicArrayImmutableElts[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2545     assert (dynamicConstArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2546     assert (dynamicImmutableArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2547 }
2548 
2549 @safe unittest // inputSourceByChunk input ranges
2550 {
2551     import std.algorithm : equal;
2552 
2553     assert (7.repeat!ubyte(5).inputSourceByChunk(1).equal([[7], [7], [7], [7], [7]]));
2554     assert (7.repeat!ubyte(5).inputSourceByChunk(2).equal([[7, 7], [7, 7], [7]]));
2555     assert (7.repeat!ubyte(5).inputSourceByChunk(3).equal([[7, 7, 7], [7, 7]]));
2556     assert (7.repeat!ubyte(5).inputSourceByChunk(4).equal([[7, 7, 7, 7], [7]]));
2557     assert (7.repeat!ubyte(5).inputSourceByChunk(5).equal([[7, 7, 7, 7, 7]]));
2558     assert (7.repeat!ubyte(5).inputSourceByChunk(6).equal([[7, 7, 7, 7, 7]]));
2559 
2560     /* Infinite. */
2561     assert (7.repeat!ubyte.inputSourceByChunk(2).take(3).equal([[7, 7], [7, 7], [7, 7]]));
2562 }