1 /**
2 Utilities used by tsv-utils applications. InputFieldReordering, BufferedOutputRange,
3 and a several others.
4 
5 Utilities in this file:
6 $(LIST
7     * [InputFieldReordering] - A class that creates a reordered subset of fields from
8       an input line. Fields in the subset are accessed by array indicies. This is
9       especially useful when processing the subset in a specific order, such as the
10       order listed on the command-line at run-time.
11 
12     * [BufferedOutputRange] - An OutputRange with an internal buffer used to buffer
13       output. Intended for use with stdout, it is a significant performance benefit.
14 
15     * [isFlushableOutputRange] - Tests if something is an OutputRange with a flush
16       member.
17 
18     * [bufferedByLine] - An input range that reads from a File handle line by line.
19       It is similar to the standard library method std.stdio.File.byLine, but quite a
20       bit faster. This is achieved by reading in larger blocks and buffering.
21 
22     * [InputSourceRange] - An input range that provides open file access to a set of
23       files. It is used to iterate over files passed as command line arguments. This
24       enable reading header line of a file during command line argument process, then
25       passing the open file to the main processing functions.
26 
27     * [ByLineSourceRange] - Similar to an InputSourceRange, except that it provides
28       access to a byLine iterator (bufferedByLine) rather than an open file. This is
29       used by tools that run the same processing logic both header non-header lines.
30 
31     * [isBufferableInputSource] - Tests if a file or input range can be read in a
32       buffered fashion by inputSourceByChunk.
33 
34     * [inputSourceByChunk] - Returns a range that reads from a file handle (File) or
35       a ubyte input range a chunk at a time.
36 
37     * [joinAppend] - A function that performs a join, but appending the join output to
38       an output stream. It is a performance improvement over using join or joiner with
39       writeln.
40 
41     * [getTsvFieldValue] - A convenience function when only a single value is needed
42       from an input line.
43 
44     * [throwIfWindowsNewline] - A utility for detecting Windows newlines in input.
45 )
46 
47 Copyright (c) 2015-2021, eBay Inc.
48 Initially written by Jon Degenhardt
49 
50 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt)
51 */
52 
53 module tsv_utils.common.utils;
54 
55 import std.range;
56 import std.stdio : File, isFileHandle, KeepTerminator;
57 import std.traits : isIntegral, isSomeChar, isSomeString, isUnsigned, ReturnType, Unqual;
58 import std.typecons : Flag, No, Yes;
59 
60 // InputFieldReording class.
61 
62 /** Flag used by the InputFieldReordering template. */
63 alias EnablePartialLines = Flag!"enablePartialLines";
64 
65 /**
66 InputFieldReordering - Move select fields from an input line to an output array,
67 reordering along the way.
68 
69 The InputFieldReordering class is used to reorder a subset of fields from an input line.
70 The caller instantiates an InputFieldReordering object at the start of input processing.
71 The instance contains a mapping from input index to output index, plus a buffer holding
72 the reordered fields. The caller processes each input line by calling initNewLine,
73 splitting the line into fields, and calling processNextField on each field. The output
74 buffer is ready when the allFieldsFilled method returns true.
75 
76 Fields are not copied, instead the output buffer points to the fields passed by the caller.
77 The caller needs to use or copy the output buffer while the fields are still valid, which
78 is normally until reading the next input line. The program below illustrates the basic use
79 case. It reads stdin and outputs fields [3, 0, 2], in that order. (See also joinAppend,
80 below, which has a performance improvement over join used here.)
81 
82 ---
83 int main(string[] args)
84 {
85     import tsv_utils.common.utils;
86     import std.algorithm, std.array, std.range, std.stdio;
87     size_t[] fieldIndicies = [3, 0, 2];
88     auto fieldReordering = new InputFieldReordering!char(fieldIndicies);
89     foreach (line; stdin.byLine)
90     {
91         fieldReordering.initNewLine;
92         foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate)
93         {
94             fieldReordering.processNextField(fieldIndex, fieldValue);
95             if (fieldReordering.allFieldsFilled) break;
96         }
97         if (fieldReordering.allFieldsFilled)
98         {
99             writeln(fieldReordering.outputFields.join('\t'));
100         }
101         else
102         {
103             writeln("Error: Insufficient number of field on the line.");
104         }
105     }
106     return 0;
107 }
108 ---
109 
110 Field indicies are zero-based. An individual field can be listed multiple times. The
111 outputFields array is not valid until all the specified fields have been processed. The
112 allFieldsFilled method tests this. If a line does not have enough fields the outputFields
113 buffer cannot be used. For most TSV applications this is okay, as it means the line is
114 invalid and cannot be used. However, if partial lines are okay, the template can be
115 instantiated with EnablePartialLines.yes. This will ensure that any fields not filled-in
116 are empty strings in the outputFields return.
117 */
118 final class InputFieldReordering(C, EnablePartialLines partialLinesOk = EnablePartialLines.no)
119 if (isSomeChar!C)
120 {
121     /* Implementation: The class works by creating an array of tuples mapping the input
122      * field index to the location in the outputFields array. The 'fromToMap' array is
123      * sorted in input field order, enabling placement in the outputFields buffer during a
124      * pass over the input fields. The map is created by the constructor. An example:
125      *
126      *    inputFieldIndicies: [3, 0, 7, 7, 1, 0, 9]
127      *             fromToMap: [<0,1>, <0,5>, <1,4>, <3,0>, <7,2>, <7,3>, <9,6>]
128      *
129      * During processing of an a line, an array slice, mapStack, is used to track how
130      * much of the fromToMap remains to be processed.
131      */
132     import std.typecons : Tuple;
133 
134     alias TupleFromTo = Tuple!(size_t, "from", size_t, "to");
135 
136     private C[][] outputFieldsBuf;
137     private TupleFromTo[] fromToMap;
138     private TupleFromTo[] mapStack;
139 
140     final this(const ref size_t[] inputFieldIndicies, size_t start = 0) pure nothrow @safe
141     {
142         import std.algorithm : sort;
143 
144         outputFieldsBuf = new C[][](inputFieldIndicies.length);
145         fromToMap.reserve(inputFieldIndicies.length);
146 
147         foreach (to, from; inputFieldIndicies.enumerate(start))
148         {
149             fromToMap ~= TupleFromTo(from, to);
150         }
151 
152         sort(fromToMap);
153         initNewLine;
154     }
155 
156     /** initNewLine initializes the object for a new line. */
157     final void initNewLine() pure nothrow @safe
158     {
159         mapStack = fromToMap;
160         static if (partialLinesOk)
161         {
162             import std.algorithm : each;
163             outputFieldsBuf.each!((ref s) => s.length = 0);
164         }
165     }
166 
167     /** processNextField maps an input field to the correct locations in the
168      * outputFields array.
169      *
170      * processNextField should be called once for each field on the line, in the order
171      * found. The processing of the line can terminate once allFieldsFilled returns
172      * true.
173      *
174      * The return value is the number of output fields the input field maps to. Zero
175      * means the field is not mapped to the output fields array.
176      *
177      * If, prior to allFieldsProcessed returning true, any fields on the input line
178      * are not passed to processNextField, the caller should either ensure the fields
179      * are not part of the output fields or have partial lines enabled.
180      */
181     final size_t processNextField(size_t fieldIndex, C[] fieldValue) pure nothrow @safe @nogc
182     {
183         size_t numFilled = 0;
184         while (!mapStack.empty && fieldIndex == mapStack.front.from)
185         {
186             outputFieldsBuf[mapStack.front.to] = fieldValue;
187             mapStack.popFront;
188             numFilled++;
189         }
190         return numFilled;
191     }
192 
193     /** allFieldsFilled returned true if all fields expected have been processed. */
194     final bool allFieldsFilled() const pure nothrow @safe @nogc
195     {
196         return mapStack.empty;
197     }
198 
199     /** outputFields is the assembled output fields. Unless partial lines are enabled,
200      * it is only valid after allFieldsFilled is true.
201      */
202     final C[][] outputFields() pure nothrow @safe @nogc
203     {
204         return outputFieldsBuf[];
205     }
206 }
207 
208 // InputFieldReordering - Tests using different character types.
209 @safe unittest
210 {
211     import std.conv : to;
212 
213     auto inputLines = [["r1f0", "r1f1", "r1f2",   "r1f3"],
214                        ["r2f0", "abc",  "ÀBCßßZ", "ghi"],
215                        ["r3f0", "123",  "456",    "789"]];
216 
217     size_t[] fields_2_0 = [2, 0];
218 
219     auto expected_2_0 = [["r1f2",   "r1f0"],
220                          ["ÀBCßßZ", "r2f0"],
221                          ["456",    "r3f0"]];
222 
223     char[][][]  charExpected_2_0 = to!(char[][][])(expected_2_0);
224     wchar[][][] wcharExpected_2_0 = to!(wchar[][][])(expected_2_0);
225     dchar[][][] dcharExpected_2_0 = to!(dchar[][][])(expected_2_0);
226     dstring[][] dstringExpected_2_0 = to!(dstring[][])(expected_2_0);
227 
228     auto charIFR  = new InputFieldReordering!char(fields_2_0);
229     auto wcharIFR = new InputFieldReordering!wchar(fields_2_0);
230     auto dcharIFR = new InputFieldReordering!dchar(fields_2_0);
231 
232     foreach (lineIndex, line; inputLines)
233     {
234         charIFR.initNewLine;
235         wcharIFR.initNewLine;
236         dcharIFR.initNewLine;
237 
238         foreach (fieldIndex, fieldValue; line)
239         {
240             charIFR.processNextField(fieldIndex, to!(char[])(fieldValue));
241             wcharIFR.processNextField(fieldIndex, to!(wchar[])(fieldValue));
242             dcharIFR.processNextField(fieldIndex, to!(dchar[])(fieldValue));
243 
244             assert ((fieldIndex >= 2) == charIFR.allFieldsFilled);
245             assert ((fieldIndex >= 2) == wcharIFR.allFieldsFilled);
246             assert ((fieldIndex >= 2) == dcharIFR.allFieldsFilled);
247         }
248         assert(charIFR.allFieldsFilled);
249         assert(wcharIFR.allFieldsFilled);
250         assert(dcharIFR.allFieldsFilled);
251 
252         assert(charIFR.outputFields == charExpected_2_0[lineIndex]);
253         assert(wcharIFR.outputFields == wcharExpected_2_0[lineIndex]);
254         assert(dcharIFR.outputFields == dcharExpected_2_0[lineIndex]);
255     }
256 }
257 
258 // InputFieldReordering - Test of partial line support.
259 @safe unittest
260 {
261     import std.conv : to;
262 
263     auto inputLines = [["r1f0", "r1f1", "r1f2",   "r1f3"],
264                        ["r2f0", "abc",  "ÀBCßßZ", "ghi"],
265                        ["r3f0", "123",  "456",    "789"]];
266 
267     size_t[] fields_2_0 = [2, 0];
268 
269     // The expected states of the output field while each line and field are processed.
270     auto expectedBylineByfield_2_0 =
271         [
272             [["", "r1f0"], ["", "r1f0"], ["r1f2", "r1f0"],   ["r1f2", "r1f0"]],
273             [["", "r2f0"], ["", "r2f0"], ["ÀBCßßZ", "r2f0"], ["ÀBCßßZ", "r2f0"]],
274             [["", "r3f0"], ["", "r3f0"], ["456", "r3f0"],    ["456", "r3f0"]],
275         ];
276 
277     char[][][][]  charExpectedBylineByfield_2_0 = to!(char[][][][])(expectedBylineByfield_2_0);
278 
279     auto charIFR  = new InputFieldReordering!(char, EnablePartialLines.yes)(fields_2_0);
280 
281     foreach (lineIndex, line; inputLines)
282     {
283         charIFR.initNewLine;
284         foreach (fieldIndex, fieldValue; line)
285         {
286             charIFR.processNextField(fieldIndex, to!(char[])(fieldValue));
287             assert(charIFR.outputFields == charExpectedBylineByfield_2_0[lineIndex][fieldIndex]);
288         }
289     }
290 }
291 
292 // InputFieldReordering - Field combination tests.
293 @safe unittest
294 {
295     import std.conv : to;
296     import std.stdio;
297 
298     auto inputLines = [["00", "01", "02", "03"],
299                        ["10", "11", "12", "13"],
300                        ["20", "21", "22", "23"]];
301 
302     size_t[] fields_0 = [0];
303     size_t[] fields_3 = [3];
304     size_t[] fields_01 = [0, 1];
305     size_t[] fields_10 = [1, 0];
306     size_t[] fields_03 = [0, 3];
307     size_t[] fields_30 = [3, 0];
308     size_t[] fields_0123 = [0, 1, 2, 3];
309     size_t[] fields_3210 = [3, 2, 1, 0];
310     size_t[] fields_03001 = [0, 3, 0, 0, 1];
311 
312     auto expected_0 = to!(char[][][])([["00"],
313                                        ["10"],
314                                        ["20"]]);
315 
316     auto expected_3 = to!(char[][][])([["03"],
317                                        ["13"],
318                                        ["23"]]);
319 
320     auto expected_01 = to!(char[][][])([["00", "01"],
321                                         ["10", "11"],
322                                         ["20", "21"]]);
323 
324     auto expected_10 = to!(char[][][])([["01", "00"],
325                                         ["11", "10"],
326                                         ["21", "20"]]);
327 
328     auto expected_03 = to!(char[][][])([["00", "03"],
329                                         ["10", "13"],
330                                         ["20", "23"]]);
331 
332     auto expected_30 = to!(char[][][])([["03", "00"],
333                                         ["13", "10"],
334                                         ["23", "20"]]);
335 
336     auto expected_0123 = to!(char[][][])([["00", "01", "02", "03"],
337                                           ["10", "11", "12", "13"],
338                                           ["20", "21", "22", "23"]]);
339 
340     auto expected_3210 = to!(char[][][])([["03", "02", "01", "00"],
341                                           ["13", "12", "11", "10"],
342                                           ["23", "22", "21", "20"]]);
343 
344     auto expected_03001 = to!(char[][][])([["00", "03", "00", "00", "01"],
345                                            ["10", "13", "10", "10", "11"],
346                                            ["20", "23", "20", "20", "21"]]);
347 
348     auto ifr_0 = new InputFieldReordering!char(fields_0);
349     auto ifr_3 = new InputFieldReordering!char(fields_3);
350     auto ifr_01 = new InputFieldReordering!char(fields_01);
351     auto ifr_10 = new InputFieldReordering!char(fields_10);
352     auto ifr_03 = new InputFieldReordering!char(fields_03);
353     auto ifr_30 = new InputFieldReordering!char(fields_30);
354     auto ifr_0123 = new InputFieldReordering!char(fields_0123);
355     auto ifr_3210 = new InputFieldReordering!char(fields_3210);
356     auto ifr_03001 = new InputFieldReordering!char(fields_03001);
357 
358     foreach (lineIndex, line; inputLines)
359     {
360         ifr_0.initNewLine;
361         ifr_3.initNewLine;
362         ifr_01.initNewLine;
363         ifr_10.initNewLine;
364         ifr_03.initNewLine;
365         ifr_30.initNewLine;
366         ifr_0123.initNewLine;
367         ifr_3210.initNewLine;
368         ifr_03001.initNewLine;
369 
370         foreach (fieldIndex, fieldValue; line)
371         {
372             ifr_0.processNextField(fieldIndex, to!(char[])(fieldValue));
373             ifr_3.processNextField(fieldIndex, to!(char[])(fieldValue));
374             ifr_01.processNextField(fieldIndex, to!(char[])(fieldValue));
375             ifr_10.processNextField(fieldIndex, to!(char[])(fieldValue));
376             ifr_03.processNextField(fieldIndex, to!(char[])(fieldValue));
377             ifr_30.processNextField(fieldIndex, to!(char[])(fieldValue));
378             ifr_0123.processNextField(fieldIndex, to!(char[])(fieldValue));
379             ifr_3210.processNextField(fieldIndex, to!(char[])(fieldValue));
380             ifr_03001.processNextField(fieldIndex, to!(char[])(fieldValue));
381         }
382 
383         assert(ifr_0.outputFields == expected_0[lineIndex]);
384         assert(ifr_3.outputFields == expected_3[lineIndex]);
385         assert(ifr_01.outputFields == expected_01[lineIndex]);
386         assert(ifr_10.outputFields == expected_10[lineIndex]);
387         assert(ifr_03.outputFields == expected_03[lineIndex]);
388         assert(ifr_30.outputFields == expected_30[lineIndex]);
389         assert(ifr_0123.outputFields == expected_0123[lineIndex]);
390         assert(ifr_3210.outputFields == expected_3210[lineIndex]);
391         assert(ifr_03001.outputFields == expected_03001[lineIndex]);
392     }
393 }
394 
395 /** Flag accepted by input buffering ranges to indicate if data should be read using
396 line buffering. Input is read as soon as lines are available when line buffered mode
397 is used.
398  */
399 alias LineBuffered = Flag!"lineBuffered";
400 
401 /** Flag accepted by input buffering ranges to indicate if the header line should be
402 read when opening a file.
403 */
404 alias ReadHeader = Flag!"readHeader";
405 
406 /**
407 BufferedOutputRangeDefaults defines the parameter defaults used by
408 BufferedOutputRange. These can be passed to the BufferedOutputRange
409 constructor when mixing specific setting with defaults.
410  */
411 enum BufferedOutputRangeDefaults
412 {
413     flushSize = 10240,
414     lineBufferedFlushSize = 1,
415     reserveSize = 11264,
416     maxSize = 4194304
417 }
418 
419 /**
420 BufferedOutputRange is a performance enhancement over writing directly to an output
421 stream. It holds a File open for write or an OutputRange. Ouput is accumulated in an
422 internal buffer and written to the output stream as a block.
423 
424 Writing to stdout is a key use case. BufferedOutputRange is often dramatically faster
425 than writing to stdout directly. This is especially noticable for outputs with short
426 lines, as it blocks many writes together in a single write.
427 
428 The internal buffer is written to the output stream after flushSize has been reached.
429 This is checked at newline boundaries, when appendln is called or when put is called
430 with a single newline character. Other writes check maxSize, which is used to avoid
431 runaway buffers.
432 
433 This scheme only flushes the internal buffer, it does not flush the output stream.
434 Use flush() to flush both the internal buffer and the output stream. Specify flushSize
435 as BufferedOutputRangeDefaults.lineBufferedFlushSize in the constructor to get line
436 buffering with immediate flushes to the output stream.
437 
438 The output stream type must be provided as a template argument during construction. E.g.
439 ```
440     auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout)
441 ```
442 
443 BufferedOutputRange has a put method allowing it to be used an output range. It has a
444 number of other methods providing additional control.
445 
446 Methods:
447 
448 $(LIST
449     * `this(outputStream [, flushSize, reserveSize, maxSize])` - Constructor. Takes the
450       output stream, e.g. stdout. Other arguments are optional, defaults normally suffice.
451 
452     * `this(outputStream, LineBuffered)` - Alternate constructor for turning line-buffered
453       mode on.
454 
455     * `append(stuff)` - Append to the internal buffer.
456 
457     * `appendln(stuff)` - Append to the internal buffer, followed by a newline. The buffer
458       is flushed to the output stream if is has reached flushSize.
459 
460     * `appendln()` - Append a newline to the internal buffer. The buffer is flushed to the
461       output stream if is has reached flushSize.
462 
463     * `joinAppend(inputRange, delim)` - An optimization of `append(inputRange.joiner(delim))`.
464       For reasons that are not clear, joiner is quite slow.
465 
466     * `flush()` - Writes the internal buffer to the output stream and flush the output stream.
467 
468     * `put(stuff)` - Appends to the internal buffer. Acts as `appendln()` if passed a single
469       newline character, '\n' or "\n".
470 
471     * `flushBuffer()` - This flushes both the internal buffers and the output stream.
472 )
473 
474 The internal buffer is automatically flushed when the BufferedOutputRange goes out of
475 scope.
476 */
477 struct BufferedOutputRange(OutputTarget)
478 if (isFileHandle!(Unqual!OutputTarget) || isOutputRange!(Unqual!OutputTarget, char))
479 {
480     import std.array : appender;
481     import std.format : format;
482 
483     /* Identify the output element type. Only supporting char and ubyte for now. */
484     static if (isFileHandle!OutputTarget || isOutputRange!(OutputTarget, char))
485     {
486         alias C = char;
487     }
488     else static if (isOutputRange!(OutputTarget, ubyte))
489     {
490         alias C = ubyte;
491     }
492     else static assert(false);
493 
494     private OutputTarget _outputTarget;
495     private auto _outputBuffer = appender!(C[]);
496     private immutable size_t _flushSize;
497     private immutable size_t _maxSize;
498 
499     /** Constructor. Takes the output stream, e.g. stdout. Optional arguments control
500      *  buffering behavior, defaults normally suffice. The defaults are available from
501      *  the `BufferedOutputRangeDefault` enum.
502      */
503     this(OutputTarget outputTarget,
504          size_t flushSize = BufferedOutputRangeDefaults.flushSize,
505          size_t reserveSize = BufferedOutputRangeDefaults.reserveSize,
506          size_t maxSize = BufferedOutputRangeDefaults.maxSize)
507     {
508         assert(flushSize <= maxSize);
509 
510         _outputTarget = outputTarget;
511         _flushSize = flushSize;
512         _maxSize = (flushSize <= maxSize) ? maxSize : flushSize;
513         _outputBuffer.reserve(reserveSize);
514     }
515 
516     /** Alternate constuctor used to turn line-buffered mode on. Use Yes.lineBuffered
517      *  to enable. Lines are flushed at newline boundaries when in line-buffered mode.
518      */
519     this(OutputTarget outputTarget, LineBuffered lineBuffered)
520     {
521         immutable size_t flushSize = lineBuffered ?
522             BufferedOutputRangeDefaults.lineBufferedFlushSize :
523             BufferedOutputRangeDefaults.flushSize;
524 
525         this(outputTarget, flushSize);
526     }
527 
528     ~this()
529     {
530         flush();
531     }
532 
533     private void flushBuffer()
534     {
535         static if (isFileHandle!OutputTarget)
536         {
537             _outputTarget.rawWrite(_outputBuffer.data);
538 
539             if (_flushSize == BufferedOutputRangeDefaults.lineBufferedFlushSize)
540             {
541                 _outputTarget.flush();
542             }
543         }
544         else _outputTarget.put(_outputBuffer.data);
545 
546         _outputBuffer.clear;
547     }
548 
549     /**  Writes the internal buffer to the output stream and flush the output stream.
550      */
551     void flush()
552     {
553         flushBuffer();
554         static if (isFileHandle!OutputTarget) _outputTarget.flush();
555     }
556 
557     /* flushIfFull flushes the internal buffer if flushSize has been reached. */
558     private bool flushIfFull()
559     {
560         bool isFull = _outputBuffer.data.length >= _flushSize;
561         if (isFull) flushBuffer();
562         return isFull;
563     }
564 
565     /* flushIfMaxSize is a safety check to avoid runaway buffer growth. */
566     private void flushIfMaxSize()
567     {
568         if (_outputBuffer.data.length >= _maxSize) flushBuffer();
569     }
570 
571     /* maybeFlush is intended for the case where put is called with a trailing newline.
572      *
573      * Flushing occurs if the buffer has a trailing newline and has reached flush size.
574      * Flushing also occurs if the buffer has reached max size.
575      */
576     private bool maybeFlush()
577     {
578         immutable bool doFlush =
579             _outputBuffer.data.length >= _flushSize &&
580             (_outputBuffer.data[$-1] == '\n' || _outputBuffer.data.length >= _maxSize);
581 
582         if (doFlush) flush();
583         return doFlush;
584     }
585 
586     /** Appends data to the output buffer without checking for flush conditions. This
587      * is intended for cases where an `appendln` or `append` ending in newline will
588      * shortly follow.
589      */
590     private void appendRaw(T)(T stuff) pure
591     {
592         import std.range : rangePut = put;
593         rangePut(_outputBuffer, stuff);
594     }
595 
596     /** Appends data to the output buffer. The output buffer is flushed if the appended
597      *  data ends in a newline and the output buffer has reached `flushSize`.
598      */
599     void append(T...)(T stuff)
600     {
601         foreach (x; stuff) appendRaw(x);
602         maybeFlush();
603     }
604 
605     /** Appends data plus a newline to the output buffer. The output buffer is flushed
606      *  if it has reached `flushSize`.
607      */
608     bool appendln(T...)(T stuff)
609     {
610         foreach (x; stuff) appendRaw(x);
611         appendRaw('\n');
612         return flushIfFull();
613     }
614 
615     /** joinAppend is an optimization of append(inputRange.joiner(delimiter).
616      * This form is quite a bit faster, 40%+ on some benchmarks.
617      */
618     void joinAppend(InputRange, E)(InputRange inputRange, E delimiter)
619     if (isInputRange!InputRange &&
620         is(ElementType!InputRange : const C[]) &&
621         (is(E : const C[]) || is(E : const C)))
622     {
623         if (!inputRange.empty)
624         {
625             appendRaw(inputRange.front);
626             inputRange.popFront;
627         }
628         foreach (x; inputRange)
629         {
630             appendRaw(delimiter);
631             appendRaw(x);
632         }
633         flushIfMaxSize();
634     }
635 
636     /** The `put` method makes BufferOutputRange an OutputRange. It operates similarly
637      *  to `append`.
638      */
639     void put(T)(T stuff)
640     {
641         import std.traits;
642         import std.stdio;
643 
644         static if (isSomeChar!T)
645         {
646             if (stuff == '\n') appendln();
647             else appendRaw(stuff);
648         }
649         else static if (isSomeString!T)
650         {
651             if (stuff == "\n") appendln();
652             else append(stuff);
653         }
654         else append(stuff);
655     }
656 }
657 
658 // BufferedOutputRange.
659 unittest
660 {
661     import tsv_utils.common.unittest_utils;
662     import std.file : rmdirRecurse, readText;
663     import std.path : buildPath;
664 
665     auto testDir = makeUnittestTempDir("tsv_utils_buffered_output");
666     scope(exit) testDir.rmdirRecurse;
667 
668     import std.algorithm : map, joiner;
669     import std.range : iota;
670     import std.conv : to;
671 
672     /* Basic test. Note that exiting the scope triggers flush. */
673     string filepath1 = buildPath(testDir, "file1.txt");
674     {
675         import std.stdio : File;
676 
677         auto ostream = BufferedOutputRange!File(filepath1.File("wb"));
678         ostream.append("file1: ");
679         ostream.append("abc");
680         ostream.append(["def", "ghi", "jkl"]);
681         ostream.appendln(100.to!string);
682         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
683         ostream.appendln();
684         ostream.appendln('A');
685         ostream.appendln("B", "CD");
686         ostream.appendln('E', "FG", 'H');
687         ostream.appendln('I', "JK", 'L', "M");
688         ostream.append('N', "O");
689         ostream.append('P', "QR", "STU\n");
690     }
691     assert(filepath1.readText == "file1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\nA\nBCD\nEFGH\nIJKLM\nNOPQRSTU\n");
692 
693     /* Test with no reserve and flush at every line. */
694     string filepath2 = buildPath(testDir, "file2.txt");
695     {
696         import std.stdio : File;
697 
698         auto ostream = BufferedOutputRange!File(filepath2.File("wb"), 0, 0);
699         ostream.append("file2: ");
700         ostream.append("abc");
701         ostream.append(["def", "ghi", "jkl"]);
702         ostream.appendln("100");
703         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
704         ostream.appendln();
705         ostream.appendln("X");
706     }
707     assert(filepath2.readText == "file2: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\nX\n");
708 
709     /* Test default line-buffered mode (flush at every line). */
710     string filepath2a = buildPath(testDir, "file2a.txt");
711     {
712         import std.stdio : File;
713 
714         auto ostream = BufferedOutputRange!File(
715             filepath2a.File("wb"), BufferedOutputRangeDefaults.lineBufferedFlushSize);
716         ostream.append("file2a: ");
717         ostream.append("abc");
718         ostream.append(["def", "ghi", "jkl"]);
719         ostream.appendln("100");
720         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
721         ostream.appendln();
722         ostream.appendln("X");
723     }
724     assert(filepath2a.readText == "file2a: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\nX\n");
725 
726     /* Test the alternate constructor. */
727     static foreach (isLineBuffered; [Yes.lineBuffered, No.lineBuffered])
728     {{
729         string filepath2b = buildPath(testDir, "file2b.txt");
730         {
731             import std.stdio : File;
732 
733             auto ostream = BufferedOutputRange!File(filepath2b.File("wb"), isLineBuffered);
734             ostream.append("file2b: ");
735             ostream.append("abc");
736             ostream.append(["def", "ghi", "jkl"]);
737             ostream.appendln("100");
738             ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
739             ostream.appendln();
740             ostream.appendln("X");
741         }
742         assert(filepath2b.readText == "file2b: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\nX\n");
743     }}
744 
745     /* With a locking text writer. Requires version 2.078.0
746        See: https://issues.dlang.org/show_bug.cgi?id=9661
747      */
748     static if (__VERSION__ >= 2078)
749     {
750         string filepath3 = buildPath(testDir, "file3.txt");
751         {
752             import std.stdio : File;
753 
754             auto ltw = filepath3.File("wb").lockingTextWriter;
755             {
756                 auto ostream = BufferedOutputRange!(typeof(ltw))(ltw);
757                 ostream.append("file3: ");
758                 ostream.append("abc");
759                 ostream.append(["def", "ghi", "jkl"]);
760                 ostream.appendln("100");
761                 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
762                 ostream.appendln();
763             }
764         }
765         assert(filepath3.readText == "file3: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
766     }
767 
768     /* With an Appender. */
769     import std.array : appender;
770     auto app1 = appender!(char[]);
771     {
772         auto ostream = BufferedOutputRange!(typeof(app1))(app1);
773         ostream.append("appender1: ");
774         ostream.append("abc");
775         ostream.append(["def", "ghi", "jkl"]);
776         ostream.appendln("100");
777         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
778         ostream.appendln();
779     }
780     assert(app1.data == "appender1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
781 
782     /* With an Appender, but checking flush boundaries. */
783     auto app2 = appender!(char[]);
784     {
785         auto ostream = BufferedOutputRange!(typeof(app2))(app2, 10, 0); // Flush if 10+
786         bool wasFlushed = false;
787 
788         assert(app2.data == "");
789 
790         ostream.append("12345678"); // Not flushed yet.
791         assert(app2.data == "");
792 
793         wasFlushed = ostream.appendln;  // Nineth char, not flushed yet.
794         assert(!wasFlushed);
795         assert(app2.data == "");
796 
797         wasFlushed = ostream.appendln;  // Tenth char, now flushed.
798         assert(wasFlushed);
799         assert(app2.data == "12345678\n\n");
800 
801         app2.clear;
802         assert(app2.data == "");
803 
804         ostream.append("12345678");
805 
806         wasFlushed = ostream.flushIfFull;
807         assert(!wasFlushed);
808         assert(app2.data == "");
809 
810         ostream.flush;
811         assert(app2.data == "12345678");
812 
813         app2.clear;
814         assert(app2.data == "");
815 
816         ostream.append("123456789012345");
817         assert(app2.data == "");
818     }
819     assert(app2.data == "123456789012345");
820 
821     /* Using joinAppend. */
822     auto app1b = appender!(char[]);
823     {
824         auto ostream = BufferedOutputRange!(typeof(app1b))(app1b);
825         ostream.append("appenderB: ");
826         ostream.joinAppend(["a", "bc", "def"], '-');
827         ostream.append(':');
828         ostream.joinAppend(["g", "hi", "jkl"], '-');
829         ostream.appendln("*100*");
830         ostream.joinAppend(iota(0, 6).map!(x => x.to!string), ' ');
831         ostream.append(' ');
832         ostream.joinAppend(iota(6, 10).map!(x => x.to!string), " ");
833         ostream.appendln();
834     }
835     assert(app1b.data == "appenderB: a-bc-def:g-hi-jkl*100*\n0 1 2 3 4 5 6 7 8 9\n",
836            "app1b.data: |" ~app1b.data ~ "|");
837 
838     /* Operating as an output range. When passed to a function as a ref, exiting
839      * the function does not flush. When passed as a value, it get flushed when
840      * the function returns. Also test both UCFS and non-UFCS styles.
841      */
842 
843     void outputStuffAsRef(T)(ref T range)
844     if (isOutputRange!(T, char))
845     {
846         range.put('1');
847         put(range, "23");
848         range.put('\n');
849         range.put(["5", "67"]);
850         put(range, iota(8, 10).map!(x => x.to!string));
851         put(range, "\n");
852     }
853 
854     void outputStuffAsVal(T)(T range)
855     if (isOutputRange!(T, char))
856     {
857         put(range, '1');
858         range.put("23");
859         put(range, '\n');
860         put(range, ["5", "67"]);
861         range.put(iota(8, 10).map!(x => x.to!string));
862         range.put("\n");
863     }
864 
865     auto app3 = appender!(char[]);
866     {
867         auto ostream = BufferedOutputRange!(typeof(app3))(app3, 12, 0);
868         outputStuffAsRef(ostream);
869         assert(app3.data == "", "app3.data: |" ~app3.data ~ "|");
870         outputStuffAsRef(ostream);
871         assert(app3.data == "123\n56789\n123\n", "app3.data: |" ~app3.data ~ "|");
872     }
873     assert(app3.data == "123\n56789\n123\n56789\n", "app3.data: |" ~app3.data ~ "|");
874 
875     auto app4 = appender!(char[]);
876     {
877         auto ostream = BufferedOutputRange!(typeof(app4))(app4, 12, 0);
878         outputStuffAsVal(ostream);
879         assert(app4.data == "123\n56789\n", "app4.data: |" ~app4.data ~ "|");
880         outputStuffAsVal(ostream);
881         assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|");
882     }
883     assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|");
884 
885     /* Test maxSize. */
886     auto app5 = appender!(char[]);
887     {
888         auto ostream = BufferedOutputRange!(typeof(app5))(app5, 5, 0, 10); // maxSize 10
889         assert(app5.data == "");
890 
891         ostream.append("1234567");  // Not flushed yet (no newline).
892         assert(app5.data == "");
893 
894         ostream.append("89012");    // Flushed by maxSize
895         assert(app5.data == "123456789012");
896 
897         ostream.put("1234567");     // Not flushed yet (no newline).
898         assert(app5.data == "123456789012");
899 
900         ostream.put("89012");       // Flushed by maxSize
901         assert(app5.data == "123456789012123456789012");
902 
903         ostream.joinAppend(["ab", "cd"], '-');        // Not flushed yet
904         ostream.joinAppend(["de", "gh", "ij"], '-');  // Flushed by maxSize
905         assert(app5.data == "123456789012123456789012ab-cdde-gh-ij");
906     }
907     assert(app5.data == "123456789012123456789012ab-cdde-gh-ij");
908 }
909 
910 /**
911 isFlushableOutputRange returns true if R is an output range with a flush member.
912 */
913 enum bool isFlushableOutputRange(R, E=char) = isOutputRange!(R, E)
914     && is(ReturnType!((R r) => r.flush) == void);
915 
916 @safe unittest
917 {
918     import std.array;
919     auto app = appender!(char[]);
920     auto ostream = BufferedOutputRange!(typeof(app))(app, 5, 0, 10); // maxSize 10
921 
922     static assert(isOutputRange!(typeof(app), char));
923     static assert(!isFlushableOutputRange!(typeof(app), char));
924     static assert(!isFlushableOutputRange!(typeof(app)));
925 
926     static assert(isOutputRange!(typeof(ostream), char));
927     static assert(isFlushableOutputRange!(typeof(ostream), char));
928     static assert(isFlushableOutputRange!(typeof(ostream)));
929 
930     static assert(isOutputRange!(Appender!string, string));
931     static assert(!isFlushableOutputRange!(Appender!string, string));
932     static assert(!isFlushableOutputRange!(Appender!string));
933 
934     static assert(isOutputRange!(Appender!(char[]), char));
935     static assert(!isFlushableOutputRange!(Appender!(char[]), char));
936     static assert(!isFlushableOutputRange!(Appender!(char[])));
937 
938     static assert(isOutputRange!(BufferedOutputRange!(Appender!(char[])), char));
939     static assert(isFlushableOutputRange!(BufferedOutputRange!(Appender!(char[]))));
940     static assert(isFlushableOutputRange!(BufferedOutputRange!(Appender!(char[])), char));
941 }
942 
943 /**
944 bufferedByLine is a performance enhancement over std.stdio.File.byLine. It works by
945 reading a large buffer from the input stream rather than just a single line.
946 
947 The file argument needs to be a File object open for reading, typically a filesystem
948 file or standard input. Use the Yes.keepTerminator template parameter to keep the
949 newline. This is similar to stdio.File.byLine, except specified as a template paramter
950 rather than a runtime parameter.
951 
952 Reading in blocks does mean that input is not read until a full buffer is available or
953 end-of-file is reached. Reading each line as it is available can be enabled by setting
954 the lineBuffered parameter to Yes.lineBuffered. In this case bufferedByLine behaves
955 like stdio.File.byLine.
956 
957 As a separate option, the first line of the file can be read as soon as it is available,
958 without waiting for a complete buffer. This is useful for reading the header line before
959 the rest of the data is available. Set the readHeader parameter to Yes.readHeader to get
960 this behavior.
961 */
962 
963 auto bufferedByLine(KeepTerminator keepTerminator = No.keepTerminator, Char = char,
964                     ubyte terminator = '\n', size_t readSize = 1024 * 128, size_t growSize = 1024 * 16)
965 (File file, LineBuffered lineBuffered = No.lineBuffered, ReadHeader readHeader = No.readHeader)
966 if (is(Char == char) || is(Char == ubyte))
967 {
968     static assert(0 < growSize && growSize <= readSize);
969 
970     static final class BufferedByLineImpl
971     {
972         /* Buffer state variables
973          *   - _buffer.length - Full length of allocated buffer.
974          *   - _dataEnd - End of currently valid data (end of last read).
975          *   - _lineStart - Start of current line.
976          *   - _lineEnd - End of current line.
977          */
978         private File _file;
979         private immutable LineBuffered _lineBuffered;
980         private ubyte[] _buffer;
981         private size_t _lineStart = 0;
982         private size_t _lineEnd = 0;
983         private size_t _dataEnd = 0;
984 
985         this (File f, LineBuffered lineBuffered, ReadHeader readHeader)
986         {
987             _file = f;
988             _lineBuffered = lineBuffered;
989             _buffer = new ubyte[readSize + growSize];
990 
991             if (!_file.eof)
992             {
993                 if (readHeader) popFrontLineBuffered();
994                 else popFront();
995             }
996         }
997 
998         bool empty() const pure
999         {
1000             return _file.eof && _lineStart == _dataEnd;
1001         }
1002 
1003         Char[] front() pure
1004         {
1005             assert(!empty, "Attempt to take the front of an empty bufferedByLine.");
1006 
1007             static if (keepTerminator == Yes.keepTerminator)
1008             {
1009                 return cast(Char[]) _buffer[_lineStart .. _lineEnd];
1010             }
1011             else
1012             {
1013                 assert(_lineStart < _lineEnd);
1014                 immutable end = (_buffer[_lineEnd - 1] == terminator) ? _lineEnd - 1 : _lineEnd;
1015                 return cast(Char[]) _buffer[_lineStart .. end];
1016             }
1017         }
1018 
1019         void popFront()
1020         {
1021             assert(!empty, "Attempt to popFront an empty bufferedByLine.");
1022 
1023             if (!_lineBuffered) popFrontFullBuffered();
1024             else popFrontLineBuffered();
1025         }
1026 
1027         /* Discards the current line and reads the next line with File.readln.
1028          * Intended for use when reading in line-buffered mode. However, it is
1029          * also used to read in the header line when in full-buffered mode.
1030          */
1031         private void popFrontLineBuffered()
1032         {
1033             assert(_lineEnd == _dataEnd);
1034             assert(!empty, "Attempt to popFront (LineBuffered) an empty bufferedByLine.");
1035 
1036             char[] line = cast(char[]) _buffer;
1037             _lineStart = 0;
1038             _lineEnd = _dataEnd = _file.readln(line);
1039             if (line.length > _buffer.length) _buffer = cast(ubyte[]) line;
1040 
1041             assert(_lineEnd == line.length);
1042             assert(_dataEnd == line.length);
1043         }
1044 
1045         private void popFrontFullBuffered()
1046         {
1047             import std.algorithm: copy, find;
1048 
1049             assert(!empty, "Attempt to popFront (Full Buffered) an empty bufferedByLine.");
1050 
1051             /* Pop the current line. */
1052             _lineStart = _lineEnd;
1053 
1054             /* Set up the next line if more data is available, either in the buffer or
1055              * the file. The next line ends at the next newline, if there is one.
1056              *
1057              * Notes:
1058              * - 'find' returns the slice starting with the character searched for, or
1059              *   an empty range if not found.
1060              * - _lineEnd is set to _dataEnd both when the current buffer does not have
1061              *   a newline and when it ends with one.
1062              */
1063             auto found = _buffer[_lineStart .. _dataEnd].find(terminator);
1064             _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1;
1065 
1066             if (found.empty && !_file.eof)
1067             {
1068                 /* No newline in current buffer. Read from the file until the next
1069                  * newline is found.
1070                  */
1071                 assert(_lineEnd == _dataEnd);
1072 
1073                 if (_lineStart > 0)
1074                 {
1075                     /* Move remaining data to the start of the buffer. */
1076                     immutable remainingLength = _dataEnd - _lineStart;
1077                     copy(_buffer[_lineStart .. _dataEnd], _buffer[0 .. remainingLength]);
1078                     _lineStart = 0;
1079                     _lineEnd = _dataEnd = remainingLength;
1080                 }
1081 
1082                 do
1083                 {
1084                     /* Grow the buffer if necessary. */
1085                     immutable availableSize = _buffer.length - _dataEnd;
1086                     if (availableSize < readSize)
1087                     {
1088                         size_t growBy = growSize;
1089                         while (availableSize + growBy < readSize) growBy += growSize;
1090                         _buffer.length += growBy;
1091                     }
1092 
1093                     /* Read the next block. */
1094                     _dataEnd +=
1095                         _file.rawRead(_buffer[_dataEnd .. _dataEnd + readSize])
1096                         .length;
1097 
1098                     found = _buffer[_lineEnd .. _dataEnd].find(terminator);
1099                     _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1;
1100 
1101                 } while (found.empty && !_file.eof);
1102             }
1103         }
1104     }
1105 
1106     assert(file.isOpen, "bufferedByLine passed a closed file.");
1107 
1108     return new BufferedByLineImpl(file, lineBuffered, readHeader);
1109 }
1110 
1111 // BufferedByLine.
1112 unittest
1113 {
1114     import std.array : appender;
1115     import std.conv : to;
1116     import std.file : rmdirRecurse, readText;
1117     import std.path : buildPath;
1118     import std.range : lockstep;
1119     import std.stdio;
1120     import tsv_utils.common.unittest_utils;
1121 
1122     auto testDir = makeUnittestTempDir("tsv_utils_buffered_byline");
1123     scope(exit) testDir.rmdirRecurse;
1124 
1125     /* Create three data files with the same data. Read ech in parallel with byLine and
1126      * bufferedByLine and compare each line. bufferedByLine is run in both full buffered
1127      * and line buffered modes.
1128      */
1129     auto data1 = appender!(char[])();
1130 
1131     foreach (i; 1 .. 1001) data1.put('\n');
1132     foreach (i; 1 .. 1001) data1.put("a\n");
1133     foreach (i; 1 .. 1001) { data1.put(i.to!string); data1.put('\n'); }
1134     foreach (i; 1 .. 1001)
1135     {
1136         foreach (j; 1 .. i+1) data1.put('x');
1137         data1.put('\n');
1138     }
1139 
1140     string file1a = buildPath(testDir, "file1a.txt");
1141     string file1b = buildPath(testDir, "file1b.txt");
1142     string file1c = buildPath(testDir, "file1c.txt");
1143     string file1d = buildPath(testDir, "file1d.txt");
1144     string file1e = buildPath(testDir, "file1e.txt");
1145 
1146     foreach (f; [file1a, file1b, file1c, file1d, file1e])
1147     {
1148         auto fh = f.File("wb");
1149         fh.write(data1.data);
1150         fh.close;
1151     }
1152 
1153     /* Default buffer sizes */
1154     static foreach (keepTerm; [No.keepTerminator, Yes.keepTerminator])
1155     {{
1156         auto f1aFH = file1a.File();
1157         auto f1bFH = file1b.File();
1158         auto f1cFH = file1c.File();
1159         auto f1dFH = file1d.File();
1160         auto f1eFH = file1e.File();
1161 
1162         auto f1aIn = f1aFH.byLine(keepTerm);
1163         auto f1bIn = f1bFH.bufferedByLine!(keepTerm);
1164         auto f1cIn = f1cFH.bufferedByLine!(keepTerm)(Yes.lineBuffered);
1165         auto f1dIn = f1dFH.bufferedByLine!(keepTerm)(No.lineBuffered, Yes.readHeader);
1166         auto f1eIn = f1eFH.bufferedByLine!(keepTerm)(Yes.lineBuffered, Yes.readHeader);
1167 
1168         foreach (a, b, c, d, e; lockstep(f1aIn, f1bIn, f1cIn, f1dIn, f1eIn, StoppingPolicy.requireSameLength))
1169         {
1170             assert(a == b);
1171             assert(a == c);
1172             assert(a == d);
1173             assert(a == e);
1174         }
1175 
1176         f1aFH.close;
1177         f1bFH.close;
1178         f1cFH.close;
1179         f1dFH.close;
1180         f1eFH.close;
1181     }}
1182 
1183     /* Smaller read size. This will trigger buffer growth. */
1184     static foreach (keepTerm; [No.keepTerminator, Yes.keepTerminator])
1185     {{
1186         auto f1aFH = file1a.File();
1187         auto f1bFH = file1b.File();
1188         auto f1cFH = file1c.File();
1189         auto f1dFH = file1d.File();
1190         auto f1eFH = file1e.File();
1191 
1192         auto f1aIn = f1aFH.byLine(keepTerm);
1193         auto f1bIn = f1bFH.bufferedByLine!(keepTerm, char, '\n', 512, 256);
1194         auto f1cIn = f1cFH.bufferedByLine!(keepTerm, char, '\n', 512, 256)(Yes.lineBuffered);
1195         auto f1dIn = f1dFH.bufferedByLine!(keepTerm, char, '\n', 512, 256)(No.lineBuffered, Yes.readHeader);
1196         auto f1eIn = f1eFH.bufferedByLine!(keepTerm, char, '\n', 512, 256)(Yes.lineBuffered, Yes.readHeader);
1197 
1198         foreach (a, b, c, d, e; lockstep(f1aIn, f1bIn, f1cIn, f1dIn, f1eIn, StoppingPolicy.requireSameLength))
1199         {
1200             assert(a == b);
1201             assert(a == c);
1202             assert(a == d);
1203             assert(a == e);
1204         }
1205 
1206         f1aFH.close;
1207         f1bFH.close;
1208         f1cFH.close;
1209         f1dFH.close;
1210         f1eFH.close;
1211     }}
1212 
1213     /* Exercise boundary cases in buffer growth. */
1214     static foreach (keepTerm; [No.keepTerminator, Yes.keepTerminator])
1215     {
1216         static foreach (readSize; [1, 2, 4])
1217         {
1218             static foreach (growSize; 1 .. readSize + 1)
1219             {{
1220                 auto f1aFH = file1a.File();
1221                 auto f1bFH = file1b.File();
1222                 auto f1cFH = file1c.File();
1223                 auto f1dFH = file1d.File();
1224                 auto f1eFH = file1e.File();
1225 
1226                 auto f1aIn = f1aFH.byLine(keepTerm);
1227                 auto f1bIn = f1bFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize);
1228                 auto f1cIn = f1cFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(Yes.lineBuffered);
1229                 auto f1dIn = f1dFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(No.lineBuffered, Yes.readHeader);
1230                 auto f1eIn = f1eFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(Yes.lineBuffered, Yes.readHeader);
1231 
1232                 foreach (a, b, c, d, e; lockstep(f1aIn, f1bIn, f1cIn, f1dIn, f1eIn, StoppingPolicy.requireSameLength))
1233                 {
1234                     assert(a == b);
1235                     assert(a == c);
1236                     assert(a == d);
1237                     assert(a == e);
1238                 }
1239 
1240                 f1aFH.close;
1241                 f1bFH.close;
1242                 f1cFH.close;
1243                 f1dFH.close;
1244                 f1eFH.close;
1245             }}
1246         }
1247     }
1248 
1249     /* Files that do not end in a newline. */
1250 
1251     string file2a = buildPath(testDir, "file2a.txt");
1252     string file2b = buildPath(testDir, "file2b.txt");
1253     string file2c = buildPath(testDir, "file2c.txt");
1254     string file2d = buildPath(testDir, "file2d.txt");
1255     string file2e = buildPath(testDir, "file2e.txt");
1256     string file3a = buildPath(testDir, "file3a.txt");
1257     string file3b = buildPath(testDir, "file3b.txt");
1258     string file3c = buildPath(testDir, "file3c.txt");
1259     string file3d = buildPath(testDir, "file3d.txt");
1260     string file3e = buildPath(testDir, "file3e.txt");
1261 
1262     foreach (f; [file1a, file1b, file1c, file1d, file1e])
1263     {
1264         auto fh = f.File("wb");
1265         fh.write("a");
1266         fh.close;
1267     }
1268 
1269     foreach (f; [file2a, file2b, file2c, file2d, file2e])
1270     {
1271         auto fh = f.File("wb");
1272         fh.write("ab");
1273         fh.close;
1274     }
1275 
1276     foreach (f; [file3a, file3b, file3c, file3d, file3e])
1277     {
1278         auto fh = f.File("wb");
1279         fh.write("abc");
1280         fh.close;
1281     }
1282 
1283     static foreach (keepTerm; [No.keepTerminator, Yes.keepTerminator])
1284     {
1285         static foreach (readSize; [1, 2, 4])
1286         {
1287             static foreach (growSize; 1 .. readSize + 1)
1288             {{
1289                 auto f1aFH = file1a.File();
1290                 auto f1bFH = file1b.File();
1291                 auto f1cFH = file1c.File();
1292                 auto f1dFH = file1d.File();
1293                 auto f1eFH = file1e.File();
1294 
1295                 auto f1aIn = f1aFH.byLine(keepTerm);
1296                 auto f1bIn = f1bFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(No.lineBuffered);
1297                 auto f1cIn = f1cFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(Yes.lineBuffered);
1298                 auto f1dIn = f1dFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(No.lineBuffered, Yes.readHeader);
1299                 auto f1eIn = f1eFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(Yes.lineBuffered, Yes.readHeader);
1300 
1301                 foreach (a, b, c, d, e; lockstep(f1aIn, f1bIn, f1cIn, f1dIn, f1eIn, StoppingPolicy.requireSameLength))
1302                 {
1303                     assert(a == b);
1304                     assert(a == c);
1305                     assert(a == d);
1306                     assert(a == e);
1307                 }
1308 
1309                 f1aFH.close;
1310                 f1bFH.close;
1311                 f1cFH.close;
1312                 f1dFH.close;
1313                 f1eFH.close;
1314 
1315                 auto f2aFH = file2a.File();
1316                 auto f2bFH = file2b.File();
1317                 auto f2cFH = file2c.File();
1318                 auto f2dFH = file2d.File();
1319                 auto f2eFH = file2e.File();
1320 
1321                 auto f2aIn = f2aFH.byLine(keepTerm);
1322                 auto f2bIn = f2bFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(No.lineBuffered);
1323                 auto f2cIn = f2cFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(Yes.lineBuffered);
1324                 auto f2dIn = f2dFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(No.lineBuffered, Yes.readHeader);
1325                 auto f2eIn = f2eFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(Yes.lineBuffered, Yes.readHeader);
1326 
1327                 foreach (a, b, c, d, e; lockstep(f2aIn, f2bIn, f2cIn, f2dIn, f2eIn, StoppingPolicy.requireSameLength))
1328                 {
1329                     assert(a == b);
1330                     assert(a == c);
1331                     assert(a == d);
1332                     assert(a == e);
1333                 }
1334 
1335                 f2aFH.close;
1336                 f2bFH.close;
1337                 f2cFH.close;
1338                 f2dFH.close;
1339                 f2eFH.close;
1340 
1341                 auto f3aFH = file3a.File();
1342                 auto f3bFH = file3b.File();
1343                 auto f3cFH = file3c.File();
1344                 auto f3dFH = file3d.File();
1345                 auto f3eFH = file3e.File();
1346 
1347                 auto f3aIn = f3aFH.byLine(keepTerm);
1348                 auto f3bIn = f3bFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(No.lineBuffered);
1349                 auto f3cIn = f3cFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(Yes.lineBuffered);
1350                 auto f3dIn = f3dFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(No.lineBuffered, Yes.readHeader);
1351                 auto f3eIn = f3eFH.bufferedByLine!(keepTerm, char, '\n', readSize, growSize)(Yes.lineBuffered, Yes.readHeader);
1352 
1353                 foreach (a, b, c, d, e; lockstep(f3aIn, f3bIn, f3cIn, f3dIn, f3eIn, StoppingPolicy.requireSameLength))
1354                 {
1355                     assert(a == b);
1356                     assert(a == c);
1357                     assert(a == d);
1358                     assert(a == e);
1359                 }
1360 
1361                 f3aFH.close;
1362                 f3bFH.close;
1363                 f3cFH.close;
1364                 f3dFH.close;
1365                 f3eFH.close;
1366             }}
1367         }
1368     }
1369 }
1370 
1371 /**
1372 joinAppend performs a join operation on an input range, appending the results to
1373 an output range.
1374 
1375 joinAppend was written as a performance enhancement over using std.algorithm.joiner
1376 or std.array.join with writeln. Using joiner with writeln is quite slow, 3-4x slower
1377 than std.array.join with writeln. The joiner performance may be due to interaction
1378 with writeln, this was not investigated. Using joiner with stdout.lockingTextWriter
1379 is better, but still substantially slower than join. Using join works reasonably well,
1380 but is allocating memory unnecessarily.
1381 
1382 Using joinAppend with Appender is a bit faster than join, and allocates less memory.
1383 The Appender re-uses the underlying data buffer, saving memory. The example below
1384 illustrates. It is a modification of the InputFieldReordering example. The role
1385 Appender plus joinAppend are playing is to buffer the output. BufferedOutputRange
1386 uses a similar technique to buffer multiple lines.
1387 
1388 Note: The original uses joinAppend have been replaced by BufferedOutputRange, which has
1389 its own joinAppend method. However, joinAppend remains useful when constructing internal
1390 buffers where BufferedOutputRange is not appropriate.
1391 
1392 ---
1393 int main(string[] args)
1394 {
1395     import tsvutil;
1396     import std.algorithm, std.array, std.range, std.stdio;
1397     size_t[] fieldIndicies = [3, 0, 2];
1398     auto fieldReordering = new InputFieldReordering!char(fieldIndicies);
1399     auto outputBuffer = appender!(char[]);
1400     foreach (line; stdin.byLine)
1401     {
1402         fieldReordering.initNewLine;
1403         foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate)
1404         {
1405             fieldReordering.processNextField(fieldIndex, fieldValue);
1406             if (fieldReordering.allFieldsFilled) break;
1407         }
1408         if (fieldReordering.allFieldsFilled)
1409         {
1410             outputBuffer.clear;
1411             writeln(fieldReordering.outputFields.joinAppend(outputBuffer, ('\t')));
1412         }
1413         else
1414         {
1415             writeln("Error: Insufficient number of field on the line.");
1416         }
1417     }
1418     return 0;
1419 }
1420 ---
1421 */
1422 OutputRange joinAppend(InputRange, OutputRange, E)
1423     (InputRange inputRange, ref OutputRange outputRange, E delimiter)
1424 if (isInputRange!InputRange &&
1425     (is(ElementType!InputRange : const E[]) &&
1426      isOutputRange!(OutputRange, E[]))
1427      ||
1428     (is(ElementType!InputRange : const E) &&
1429      isOutputRange!(OutputRange, E))
1430     )
1431 {
1432     if (!inputRange.empty)
1433     {
1434         outputRange.put(inputRange.front);
1435         inputRange.popFront;
1436     }
1437     foreach (x; inputRange)
1438     {
1439         outputRange.put(delimiter);
1440         outputRange.put(x);
1441     }
1442     return outputRange;
1443 }
1444 
1445 // joinAppend.
1446 @safe unittest
1447 {
1448     import std.array : appender;
1449     import std.algorithm : equal;
1450 
1451     char[] c1 = ['a', 'b', 'c'];
1452     char[] c2 = ['d', 'e', 'f'];
1453     char[] c3 = ['g', 'h', 'i'];
1454     auto cvec = [c1, c2, c3];
1455 
1456     auto s1 = "abc";
1457     auto s2 = "def";
1458     auto s3 = "ghi";
1459     auto svec = [s1, s2, s3];
1460 
1461     auto charAppender = appender!(char[])();
1462 
1463     assert(cvec.joinAppend(charAppender, '_').data == "abc_def_ghi");
1464     assert(equal(cvec, [c1, c2, c3]));
1465 
1466     charAppender.put('$');
1467     assert(svec.joinAppend(charAppender, '|').data == "abc_def_ghi$abc|def|ghi");
1468     assert(equal(cvec, [s1, s2, s3]));
1469 
1470     charAppender.clear;
1471     assert(svec.joinAppend(charAppender, '|').data == "abc|def|ghi");
1472 
1473     auto intAppender = appender!(int[])();
1474 
1475     auto i1 = [100, 101, 102];
1476     auto i2 = [200, 201, 202];
1477     auto i3 = [300, 301, 302];
1478     auto ivec = [i1, i2, i3];
1479 
1480     assert(ivec.joinAppend(intAppender, 0).data ==
1481            [100, 101, 102, 0, 200, 201, 202, 0, 300, 301, 302]);
1482 
1483     intAppender.clear;
1484     assert(i1.joinAppend(intAppender, 0).data ==
1485            [100, 0, 101, 0, 102]);
1486     assert(i2.joinAppend(intAppender, 1).data ==
1487            [100, 0, 101, 0, 102,
1488             200, 1, 201, 1, 202]);
1489     assert(i3.joinAppend(intAppender, 2).data ==
1490            [100, 0, 101, 0, 102,
1491             200, 1, 201, 1, 202,
1492             300, 2, 301, 2, 302]);
1493 }
1494 
1495 /**
1496 getTsvFieldValue extracts the value of a single field from a delimited text string.
1497 
1498 This is a convenience function intended for cases when only a single field from an
1499 input line is needed. If multiple values are needed, it will be more efficient to
1500 work directly with std.algorithm.splitter or the InputFieldReordering class.
1501 
1502 The input text is split by a delimiter character. The specified field is converted
1503 to the desired type and the value returned.
1504 
1505 An exception is thrown if there are not enough fields on the line or if conversion
1506 fails. Conversion is done with std.conv.to, it throws a std.conv.ConvException on
1507 failure. If not enough fields, the exception text is generated referencing 1-upped
1508 field numbers as would be provided by command line users.
1509  */
1510 T getTsvFieldValue(T, C)(const C[] line, size_t fieldIndex, C delim)
1511 if (isSomeChar!C)
1512 {
1513     import std.algorithm : splitter;
1514     import std.conv : to;
1515     import std.format : format;
1516     import std.range;
1517 
1518     auto splitLine = line.splitter(delim);
1519     size_t atField = 0;
1520 
1521     while (atField < fieldIndex && !splitLine.empty)
1522     {
1523         splitLine.popFront;
1524         atField++;
1525     }
1526 
1527     T val;
1528     if (splitLine.empty)
1529     {
1530         if (fieldIndex == 0)
1531         {
1532             /* This is a workaround to a splitter special case - If the input is empty,
1533              * the returned split range is empty. This doesn't properly represent a single
1534              * column file. More correct mathematically, and for this case, would be a
1535              * single value representing an empty string. The input line is a convenient
1536              * source of an empty line. Info:
1537              *   Bug: https://issues.dlang.org/show_bug.cgi?id=15735
1538              *   Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030
1539              */
1540             assert(line.empty);
1541             val = line.to!T;
1542         }
1543         else
1544         {
1545             throw new Exception(
1546                 format("Not enough fields on line. Number required: %d; Number found: %d",
1547                        fieldIndex + 1, atField));
1548         }
1549     }
1550     else
1551     {
1552         val = splitLine.front.to!T;
1553     }
1554 
1555     return val;
1556 }
1557 
1558 // getTsvFieldValue.
1559 @safe unittest
1560 {
1561     import std.conv : ConvException, to;
1562     import std.exception;
1563 
1564     /* Common cases. */
1565     assert(getTsvFieldValue!double("123", 0, '\t') == 123.0);
1566     assert(getTsvFieldValue!double("-10.5", 0, '\t') == -10.5);
1567     assert(getTsvFieldValue!size_t("abc|123", 1, '|') == 123);
1568     assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99);
1569     assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99);
1570     assert(getTsvFieldValue!string("紅\t红\t99", 2, '\t') == "99");
1571     assert(getTsvFieldValue!string("紅\t红\t99", 1, '\t') == "红");
1572     assert(getTsvFieldValue!string("紅\t红\t99", 0, '\t') == "紅");
1573     assert(getTsvFieldValue!string("红色和绿色\tred and green\t赤と緑\t10.5", 2, '\t') == "赤と緑");
1574     assert(getTsvFieldValue!double("红色和绿色\tred and green\t赤と緑\t10.5", 3, '\t') == 10.5);
1575 
1576     /* The empty field cases. */
1577     assert(getTsvFieldValue!string("", 0, '\t') == "");
1578     assert(getTsvFieldValue!string("\t", 0, '\t') == "");
1579     assert(getTsvFieldValue!string("\t", 1, '\t') == "");
1580     assert(getTsvFieldValue!string("", 0, ':') == "");
1581     assert(getTsvFieldValue!string(":", 0, ':') == "");
1582     assert(getTsvFieldValue!string(":", 1, ':') == "");
1583 
1584     /* Tests with different data types. */
1585     string stringLine = "orange and black\tნარინჯისფერი და შავი\t88.5";
1586     char[] charLine = "orange and black\tნარინჯისფერი და შავი\t88.5".to!(char[]);
1587     dchar[] dcharLine = stringLine.to!(dchar[]);
1588     wchar[] wcharLine = stringLine.to!(wchar[]);
1589 
1590     assert(getTsvFieldValue!string(stringLine, 0, '\t') == "orange and black");
1591     assert(getTsvFieldValue!string(stringLine, 1, '\t') == "ნარინჯისფერი და შავი");
1592     assert(getTsvFieldValue!wstring(stringLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1593     assert(getTsvFieldValue!double(stringLine, 2, '\t') == 88.5);
1594 
1595     assert(getTsvFieldValue!string(charLine, 0, '\t') == "orange and black");
1596     assert(getTsvFieldValue!string(charLine, 1, '\t') == "ნარინჯისფერი და შავი");
1597     assert(getTsvFieldValue!wstring(charLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1598     assert(getTsvFieldValue!double(charLine, 2, '\t') == 88.5);
1599 
1600     assert(getTsvFieldValue!string(dcharLine, 0, '\t') == "orange and black");
1601     assert(getTsvFieldValue!string(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი");
1602     assert(getTsvFieldValue!wstring(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1603     assert(getTsvFieldValue!double(dcharLine, 2, '\t') == 88.5);
1604 
1605     assert(getTsvFieldValue!string(wcharLine, 0, '\t') == "orange and black");
1606     assert(getTsvFieldValue!string(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი");
1607     assert(getTsvFieldValue!wstring(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1608     assert(getTsvFieldValue!double(wcharLine, 2, '\t') == 88.5);
1609 
1610     /* Conversion errors. */
1611     assertThrown!ConvException(getTsvFieldValue!double("", 0, '\t'));
1612     assertThrown!ConvException(getTsvFieldValue!double("abc", 0, '|'));
1613     assertThrown!ConvException(getTsvFieldValue!size_t("-1", 0, '|'));
1614     assertThrown!ConvException(getTsvFieldValue!size_t("a23|23.4", 1, '|'));
1615     assertThrown!ConvException(getTsvFieldValue!double("23.5|def", 1, '|'));
1616 
1617     /* Not enough field errors. These should throw, but not a ConvException.*/
1618     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("", 1, '\t')));
1619     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc", 1, '\t')));
1620     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc\tdef", 2, '\t')));
1621 }
1622 
1623 /**
1624 Yes|No.newlineWasRemoved is a template parameter to throwIfWindowsNewline. A Yes
1625 value indicates the Unix newline was already removed, as might be done via
1626 std.File.byLine or similar mechanism.
1627 */
1628 alias NewlineWasRemoved = Flag!"newlineWasRemoved";
1629 
1630 /**
1631 throwIfWindowsLineNewline throws an exception if the 'line' argument ends with a
1632 Windows/DOS line ending. This is used by TSV Utilities tools to detect Window/DOS
1633 line endings and terminate processing with an error message to the user.
1634 
1635 The 'nlWasRemoved' template parameter can be used if a Unix newline character was
1636 already removed. In this case the CR character from a Windows CRLF remains and can be
1637 detected. This is useful when reading files in binary mode, stripping Unix newlines.
1638 */
1639 void throwIfWindowsNewline
1640     (NewlineWasRemoved nlWasRemoved = Yes.newlineWasRemoved)
1641     (const char[] line, const char[] filename, size_t lineNum)
1642 {
1643     static if (nlWasRemoved)
1644     {
1645         immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r';
1646     }
1647     else
1648     {
1649         immutable bool hasWindowsLineEnding =
1650             line.length > 1 &&
1651             line[$ - 2] == '\r' &&
1652             line[$ - 1] == '\n';
1653     }
1654 
1655     if (hasWindowsLineEnding)
1656     {
1657         import std.format;
1658         throw new Exception(
1659             format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n  File: %s, Line: %s",
1660                    (filename == "-") ? "Standard Input" : filename, lineNum));
1661     }
1662 }
1663 
1664 // throwIfWindowsNewline
1665 @safe unittest
1666 {
1667     import std.exception;
1668 
1669     assertNotThrown(throwIfWindowsNewline("", "afile.tsv", 1));
1670     assertNotThrown(throwIfWindowsNewline("a", "afile.tsv", 2));
1671     assertNotThrown(throwIfWindowsNewline("ab", "afile.tsv", 3));
1672     assertNotThrown(throwIfWindowsNewline("abc", "afile.tsv", 4));
1673 
1674     assertThrown(throwIfWindowsNewline("\r", "afile.tsv", 1));
1675     assertThrown(throwIfWindowsNewline("a\r", "afile.tsv", 2));
1676     assertThrown(throwIfWindowsNewline("ab\r", "afile.tsv", 3));
1677     assertThrown(throwIfWindowsNewline("abc\r", "afile.tsv", 4));
1678 
1679     assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("\n", "afile.tsv", 1));
1680     assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("a\n", "afile.tsv", 2));
1681     assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3));
1682     assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4));
1683 
1684     assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5));
1685     assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6));
1686     assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7));
1687     assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8));
1688 
1689     /* Standard Input formatting. */
1690     import std.algorithm : endsWith;
1691     bool exceptionCaught = false;
1692 
1693     try (throwIfWindowsNewline("\r", "-", 99));
1694     catch (Exception e)
1695     {
1696         assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1697         exceptionCaught = true;
1698     }
1699     finally
1700     {
1701         assert(exceptionCaught);
1702         exceptionCaught = false;
1703     }
1704 
1705     try (throwIfWindowsNewline!(No.newlineWasRemoved)("\r\n", "-", 99));
1706     catch (Exception e)
1707     {
1708         assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1709         exceptionCaught = true;
1710     }
1711     finally
1712     {
1713         assert(exceptionCaught);
1714         exceptionCaught = false;
1715     }
1716 }
1717 
1718 /**
1719 inputSourceRange is a helper function for creating new InputSourceRange objects.
1720 */
1721 InputSourceRange inputSourceRange(string[] filepaths, ReadHeader readHeader)
1722 {
1723     return new InputSourceRange(filepaths, readHeader);
1724 }
1725 
1726 /**
1727 InputSourceRange is an input range that iterates over a set of input files.
1728 
1729 InputSourceRange is used to iterate over a set of files passed on the command line.
1730 Files are automatically opened and closed during iteration. The caller can choose to
1731 have header lines read automatically.
1732 
1733 The range is created from a set of filepaths. These filepaths are mapped to
1734 InputSource objects during the iteration. This is what enables automatically opening
1735 and closing files and reading the header line.
1736 
1737 The motivation for an InputSourceRange is to provide a standard way to look at the
1738 header line of the first input file during command line argument processing, and then
1739 pass the open input file and the header line along to the main processing functions.
1740 This enables a features like named fields to be implemented in a standard way.
1741 
1742 Both InputSourceRange and InputSource are reference objects. This keeps their use
1743 limited to a single iteration over the set of files. The files can be iterated again
1744 by creating a new InputSourceRange against the same filepaths.
1745 
1746 Currently, InputSourceRange supports files and standard input. It is possible other
1747 types of input sources will be added in the future.
1748  */
1749 final class InputSourceRange
1750 {
1751     private string[] _filepaths;
1752     private immutable ReadHeader _readHeader;
1753     private InputSource _front;
1754 
1755     this(string[] filepaths, ReadHeader readHeader)
1756     {
1757         _filepaths = filepaths.dup;
1758         _readHeader = readHeader;
1759         _front = null;
1760 
1761         if (!_filepaths.empty)
1762         {
1763             _front = new InputSource(_filepaths.front, _readHeader);
1764             _front.open;
1765             _filepaths.popFront;
1766         }
1767     }
1768 
1769     size_t length() const pure nothrow @safe
1770     {
1771         return empty ? 0 : _filepaths.length + 1;
1772     }
1773 
1774     bool empty() const pure nothrow @safe
1775     {
1776         return _front is null;
1777     }
1778 
1779     InputSource front() pure @safe
1780     {
1781         assert(!empty, "Attempt to take the front of an empty InputSourceRange");
1782         return _front;
1783     }
1784 
1785     void popFront()
1786     {
1787         assert(!empty, "Attempt to popFront an empty InputSourceRange");
1788 
1789         _front.close;
1790 
1791         if (!_filepaths.empty)
1792         {
1793             _front = new InputSource(_filepaths.front, _readHeader);
1794             _front.open;
1795             _filepaths.popFront;
1796         }
1797         else
1798         {
1799             _front = null;
1800         }
1801     }
1802 }
1803 
1804 /**
1805 InputSource is a class of objects produced by iterating over an InputSourceRange.
1806 
1807 An InputSource object provides access to the open file currently the front element
1808 of an InputSourceRange. The main methods application code is likely to need are:
1809 
1810 $(LIST
1811     * `file()` - Returns the File object. The file will be open for reading as long
1812       InputSource instance is the front element of the InputSourceRange it came from.
1813 
1814     * `header(KeepTerminator keepTerminator = No.keepTerminator)` - Returns the
1815       header line from the file. An empty string is returned if InputSource range
1816       was created with readHeader=false.
1817 
1818     * `name()` - The name of the input source. The name returned is intended for
1819       user error messages. For files, this is the filepath that was passed to
1820       InputSourceRange. For standard input, it is "Standard Input".
1821 )
1822 
1823 An InputSource is a reference object, so the copies will retain the state of the
1824 InputSourceRange front element. In particular, all copies will have the open
1825 state of the front element of the InputSourceRange.
1826 
1827 This class is not intended for use outside the context of an InputSourceRange.
1828 */
1829 final class InputSource
1830 {
1831     import std.stdio;
1832 
1833     private immutable string _filepath;
1834     private immutable bool _isStdin;
1835     private bool _isOpen;
1836     private ReadHeader _readHeader;
1837     private bool _hasBeenOpened;
1838     private string _header;
1839     private File _file;
1840 
1841     private this(string filepath, ReadHeader readHeader) pure nothrow @safe
1842     {
1843         _filepath = filepath;
1844         _isStdin = filepath == "-";
1845         _isOpen = false;
1846         _readHeader = readHeader;
1847         _hasBeenOpened = false;
1848     }
1849 
1850     /** file returns the File object held by the InputSource.
1851      *
1852      * The File will be open for reading as long as the InputSource instance is the
1853      * front element of the InputSourceRange it came from.
1854      */
1855     File file() nothrow @safe
1856     {
1857         return _file;
1858     }
1859 
1860     /** isReadHeaderEnabled returns true if the header line is being read.
1861      */
1862     bool isReadHeaderEnabled() const pure nothrow @safe
1863     {
1864         return _readHeader == Yes.readHeader;
1865     }
1866 
1867     /** header returns the header line from the input file.
1868      *
1869      * An empty string is returned if InputSource range was created with
1870      * readHeader=false.
1871      */
1872     string header(KeepTerminator keepTerminator = No.keepTerminator) const pure nothrow @safe
1873     {
1874         assert(_hasBeenOpened);
1875         return (keepTerminator == Yes.keepTerminator ||
1876                 _header.length == 0 ||
1877                 _header[$ - 1] != '\n') ?
1878             _header : _header[0 .. $-1];
1879     }
1880 
1881     /** isHeaderEmpty returns true if there is no data for a header, including the
1882      * terminator.
1883      *
1884      * When headers are being read, this true only if the file is empty.
1885      */
1886     bool isHeaderEmpty() const pure nothrow @safe
1887     {
1888         assert(_hasBeenOpened);
1889         return _header.empty;
1890     }
1891 
1892     /** name returns a user friendly name representing the input source.
1893      *
1894      * For files, it is the filepath provided to InputSourceRange. For standard
1895      * input, it is "Standard Input". (Use isStdin() to test for standard input,
1896      * not name().
1897      */
1898     string name() const pure nothrow @safe
1899     {
1900         return _isStdin ? "Standard Input" : _filepath;
1901     }
1902 
1903     /** isStdin returns true if the input source is Standard Input, false otherwise.
1904     */
1905     bool isStdin() const pure nothrow @safe
1906     {
1907         return _isStdin;
1908     }
1909 
1910     /** isOpen returns true if the input source is open for reading, false otherwise.
1911      *
1912      * "Open" in this context is whether the InputSource object is currently open,
1913      * meaning that it is the front element of the InputSourceRange that created it.
1914      *
1915      * For files, this is also reflected in the state of the underlying File object.
1916      * However, standard input is never actually closed.
1917      */
1918     bool isOpen() const pure nothrow @safe
1919     {
1920         return _isOpen;
1921     }
1922 
1923     private void open()
1924     {
1925         assert(!_isOpen);
1926         assert(!_hasBeenOpened);
1927 
1928         _file = isStdin ? stdin : _filepath.File("rb");
1929         if (_readHeader) _header = _file.readln;
1930         _isOpen = true;
1931         _hasBeenOpened = true;
1932     }
1933 
1934     private void close()
1935     {
1936         if (!_isStdin) _file.close;
1937         _isOpen = false;
1938     }
1939 }
1940 
1941 // InputSourceRange and InputSource
1942 unittest
1943 {
1944     import std.algorithm : all, each;
1945     import std.array : appender;
1946     import std.exception : assertThrown;
1947     import std.file : rmdirRecurse;
1948     import std.path : buildPath;
1949     import std.range;
1950     import std.stdio;
1951     import tsv_utils.common.unittest_utils;
1952 
1953     auto testDir = makeUnittestTempDir("tsv_utils_input_source_range");
1954     scope(exit) testDir.rmdirRecurse;
1955 
1956     string file0 = buildPath(testDir, "file0.txt");
1957     string file1 = buildPath(testDir, "file1.txt");
1958     string file2 = buildPath(testDir, "file2.txt");
1959     string file3 = buildPath(testDir, "file3.txt");
1960 
1961     string file0Header = "";
1962     string file1Header = "file 1 header\n";
1963     string file2Header = "file 2 header\n";
1964     string file3Header = "file 3 header\n";
1965 
1966     string file0Body = "";
1967     string file1Body = "";
1968     string file2Body = "file 2 line 1\n";
1969     string file3Body = "file 3 line 1\nfile 3 line 2\n";
1970 
1971     string file0Data = file0Header ~ file0Body;
1972     string file1Data = file1Header ~ file1Body;
1973     string file2Data = file2Header ~ file2Body;
1974     string file3Data = file3Header ~ file3Body;
1975 
1976     {
1977         file0.File("wb").write(file0Data);
1978         file1.File("wb").write(file1Data);
1979         file2.File("wb").write(file2Data);
1980         file3.File("wb").write(file3Data);
1981     }
1982 
1983     auto inputFiles = [file0, file1, file2, file3];
1984     auto fileHeaders = [file0Header, file1Header, file2Header, file3Header];
1985     auto fileBodies = [file0Body, file1Body, file2Body, file3Body];
1986     auto fileData = [file0Data, file1Data, file2Data, file3Data];
1987 
1988     auto readSources = appender!(InputSource[]);
1989     auto buffer = new char[1024];    // Must be large enough to hold the test files.
1990 
1991     /* Tests without standard input. Don't want to count on state of standard
1992      * input or modifying it when doing unit tests, so avoid reading from it.
1993      */
1994 
1995     foreach(numFiles; 1 .. inputFiles.length + 1)
1996     {
1997         /* Reading headers. */
1998 
1999         readSources.clear;
2000         auto inputSourcesYesHeader = inputSourceRange(inputFiles[0 .. numFiles], Yes.readHeader);
2001         assert(inputSourcesYesHeader.length == numFiles);
2002 
2003         foreach(fileNum, source; inputSourcesYesHeader.enumerate)
2004         {
2005             readSources.put(source);
2006             assert(source.isOpen);
2007             assert(source.file.isOpen);
2008             assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen));
2009             assert(readSources.data[fileNum].isOpen);
2010 
2011             assert(source.header(Yes.keepTerminator) == fileHeaders[fileNum]);
2012 
2013             auto headerNoTerminatorLength = fileHeaders[fileNum].length;
2014             if (headerNoTerminatorLength > 0) --headerNoTerminatorLength;
2015             assert(source.header(No.keepTerminator) ==
2016                    fileHeaders[fileNum][0 .. headerNoTerminatorLength]);
2017 
2018             assert(source.name == inputFiles[fileNum]);
2019             assert(!source.isStdin);
2020             assert(source.isReadHeaderEnabled);
2021 
2022             assert(source.file.rawRead(buffer) == fileBodies[fileNum]);
2023         }
2024 
2025         /* The InputSourceRange is a reference range, consumed by the foreach. */
2026         assert(inputSourcesYesHeader.empty);
2027 
2028         /* Without reading headers. */
2029 
2030         readSources.clear;
2031         auto inputSourcesNoHeader = inputSourceRange(inputFiles[0 .. numFiles], No.readHeader);
2032         assert(inputSourcesNoHeader.length == numFiles);
2033 
2034         foreach(fileNum, source; inputSourcesNoHeader.enumerate)
2035         {
2036             readSources.put(source);
2037             assert(source.isOpen);
2038             assert(source.file.isOpen);
2039             assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen));
2040             assert(readSources.data[fileNum].isOpen);
2041 
2042             assert(source.header(Yes.keepTerminator).empty);
2043             assert(source.header(No.keepTerminator).empty);
2044 
2045             assert(source.name == inputFiles[fileNum]);
2046             assert(!source.isStdin);
2047             assert(!source.isReadHeaderEnabled);
2048 
2049             assert(source.file.rawRead(buffer) == fileData[fileNum]);
2050         }
2051 
2052         /* The InputSourceRange is a reference range, consumed by the foreach. */
2053         assert(inputSourcesNoHeader.empty);
2054     }
2055 
2056     /* Tests with standard input. No actual reading in these tests.
2057      */
2058 
2059     readSources.clear;
2060     foreach(fileNum, source; inputSourceRange(["-", "-"], No.readHeader).enumerate)
2061     {
2062         readSources.put(source);
2063         assert(source.isOpen);
2064         assert(source.file.isOpen);
2065         assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen));      // InputSource objects are "closed".
2066         assert(readSources.data[0 .. fileNum].all!(s => s.file.isOpen));  // Actual stdin should not be closed.
2067         assert(readSources.data[fileNum].isOpen);
2068 
2069         assert(source.header(Yes.keepTerminator).empty);
2070         assert(source.header(No.keepTerminator).empty);
2071 
2072         assert(source.name == "Standard Input");
2073         assert(source.isStdin);
2074     }
2075 
2076     /* Empty filelist. */
2077     string[] nofiles;
2078     {
2079         auto sources = inputSourceRange(nofiles, No.readHeader);
2080         assert(sources.empty);
2081     }
2082     {
2083         auto sources = inputSourceRange(nofiles, Yes.readHeader);
2084         assert(sources.empty);
2085     }
2086 
2087     /* Error cases. */
2088     assertThrown(inputSourceRange([file0, "no_such_file.txt"], No.readHeader).each);
2089     assertThrown(inputSourceRange(["no_such_file.txt", file1], Yes.readHeader).each);
2090 }
2091 
2092 /**
2093 byLineSourceRange is a helper function for creating new byLineSourceRange objects.
2094 */
2095 auto byLineSourceRange(
2096     KeepTerminator keepTerminator = No.keepTerminator, Char = char, ubyte terminator = '\n')
2097 (string[] filepaths, LineBuffered lineBuffered = No.lineBuffered,
2098  ReadHeader readHeader = No.readHeader)
2099 if (is(Char == char) || is(Char == ubyte))
2100 {
2101     return new ByLineSourceRange!(keepTerminator, Char, terminator)
2102         (filepaths, lineBuffered, readHeader);
2103 }
2104 
2105 /**
2106 ByLineSourceRange is an input range that iterates over a set of input files. It
2107 provides bufferedByLine access to each file.
2108 
2109 A ByLineSourceRange is used to iterate over a set of files passed on the command line.
2110 Files are automatically opened and closed during iteration. The front element of the
2111 range provides access to a bufferedByLine for iterating over the lines in the file.
2112 
2113 The range is created from a set of filepaths. These filepaths are mapped to
2114 ByLineSource objects during the iteration. This is what enables automatically opening
2115 and closing files and providing bufferedByLine access.
2116 
2117 The motivation behind ByLineSourceRange is to provide a standard way to look at the
2118 header line of the first input file during command line argument processing, and then
2119 pass the open input file along to the main processing functions. This enables
2120 features like named fields to be implemented in a standard way.
2121 
2122 Access to the first line of the first file is available after creating the
2123 ByLineSourceRange instance. The first file is opened and a bufferedByLine created.
2124 The first line of the first file is via byLine.front (after checking !byLine.empty).
2125 
2126 Buffering is handled by bufferedByLine. Full buffering is used by default, this can be
2127 changed to line buffering by Yes.lineBuffered. When using full buffering, the header
2128 line (first line) of the first file can read as soon as available using Yes.readHeader.
2129 This is only done for the first file, as that is when immediate processing is useful.
2130 
2131 Both ByLineSourceRange and ByLineSource are reference objects. This keeps their use
2132 limited to a single iteration over the set of files. The files can be iterated again
2133 by creating a new InputSourceRange against the same filepaths.
2134 
2135 Currently, ByLineSourceRange supports files and standard input. It is possible other
2136 types of input sources will be added in the future.
2137  */
2138 final class ByLineSourceRange(
2139     KeepTerminator keepTerminator = No.keepTerminator, Char = char, ubyte terminator = '\n')
2140 if (is(Char == char) || is(Char == ubyte))
2141 {
2142     alias ByLineSourceType = ByLineSource!(keepTerminator, char, terminator);
2143 
2144     private string[] _filepaths;
2145     private immutable LineBuffered _lineBuffered;
2146     private ByLineSourceType _front;
2147 
2148     this(string[] filepaths, LineBuffered lineBuffered = No.lineBuffered,
2149          ReadHeader readHeader = No.readHeader)
2150     {
2151         _filepaths = filepaths.dup;
2152         _lineBuffered = lineBuffered;
2153         _front = null;
2154 
2155         if (!_filepaths.empty)
2156         {
2157             _front = new ByLineSourceType(_filepaths.front, _lineBuffered, readHeader);
2158             _front.open;
2159             _filepaths.popFront;
2160         }
2161     }
2162 
2163     size_t length() const pure nothrow @safe
2164     {
2165         return empty ? 0 : _filepaths.length + 1;
2166     }
2167 
2168     bool empty() const pure nothrow @safe
2169     {
2170         return _front is null;
2171     }
2172 
2173     ByLineSourceType front() pure @safe
2174     {
2175         assert(!empty, "Attempt to take the front of an empty ByLineSourceRange");
2176         return _front;
2177     }
2178 
2179     void popFront()
2180     {
2181         assert(!empty, "Attempt to popFront an empty ByLineSourceRange");
2182 
2183         _front.close;
2184 
2185         if (!_filepaths.empty)
2186         {
2187             _front = new ByLineSourceType(_filepaths.front, _lineBuffered);
2188             _front.open;
2189             _filepaths.popFront;
2190         }
2191         else
2192         {
2193             _front = null;
2194         }
2195     }
2196 }
2197 
2198 /**
2199 ByLineSource is a class of objects produced by iterating over an ByLineSourceRange.
2200 
2201 A ByLineSource instance provides a bufferedByLine range for the current the front
2202 element of a ByLineSourceRange. The main methods application code is likely to
2203 need are:
2204 
2205 $(LIST
2206     * `byLine()` - Returns the bufferedByLine range accessing the open file. The file
2207        will be open for reading (using the bufferedByLine range) as long as the
2208        ByLineSource instance is the front element of the ByLineSourceRange
2209        it came from.
2210 
2211     * `name()` - The name of the input source. The name returned is intended for
2212       user error messages. For files, this is the filepath that was passed to
2213       ByLineSourceRange. For standard input, it is "Standard Input".
2214 )
2215 
2216 A ByLineSource is a reference object, so the copies have the same state as the
2217 ByLineSourceRange front element. In particular, all copies will have the open
2218 state of the front element of the ByLineSourceRange.
2219 
2220 This class is not intended for use outside the context of an ByLineSourceRange.
2221 */
2222 final class ByLineSource(
2223     KeepTerminator keepTerminator, Char = char, ubyte terminator = '\n')
2224 if (is(Char == char) || is(Char == ubyte))
2225 {
2226     import std.stdio;
2227     import std.traits : ReturnType;
2228 
2229     alias newByLineFn = bufferedByLine!(keepTerminator, char, terminator);
2230     alias ByLineType = ReturnType!newByLineFn;
2231 
2232     private immutable string _filepath;
2233     private immutable LineBuffered _lineBuffered;
2234     private immutable ReadHeader _readHeader;
2235     private immutable bool _isStdin;
2236     private bool _isOpen;
2237     private bool _hasBeenOpened;
2238     private File _file;
2239     private ByLineType _byLineRange;
2240 
2241     private this(string filepath, LineBuffered lineBuffered = No.lineBuffered,
2242                 ReadHeader readHeader = No.readHeader) pure nothrow @safe
2243     {
2244         _filepath = filepath;
2245         _lineBuffered = lineBuffered;
2246         _readHeader = readHeader;
2247         _isStdin = filepath == "-";
2248         _isOpen = false;
2249         _hasBeenOpened = false;
2250     }
2251 
2252     /** byLine returns the bufferedByLine object held by the ByLineSource instance.
2253      *
2254      * The File underlying the BufferedByLine object is open for reading as long as
2255      * the ByLineSource instance is the front element of the ByLineSourceRange it
2256      * came from.
2257      */
2258     ByLineType byLine() nothrow @safe
2259     {
2260         return _byLineRange;
2261     }
2262 
2263     /** name returns a user friendly name representing the underlying input source.
2264      *
2265      * For files, it is the filepath provided to ByLineSourceRange. For standard
2266      * input, it is "Standard Input". (Use isStdin() to test for standard input,
2267      * compare against name().)
2268      */
2269     string name() const pure nothrow @safe
2270     {
2271         return _isStdin ? "Standard Input" : _filepath;
2272     }
2273 
2274     /** isStdin returns true if the underlying input source is Standard Input, false
2275      * otherwise.
2276      */
2277     bool isStdin() const pure nothrow @safe
2278     {
2279         return _isStdin;
2280     }
2281 
2282     /** isOpen returns true if the ByLineSource instance is open for reading, false
2283      * otherwise.
2284      *
2285      * "Open" in this context is whether the ByLineSource object is currently "open".
2286      * The underlying input source backing it does not necessarily have the same
2287      * state. The ByLineSource instance is "open" if is the front element of the
2288      * ByLineSourceRange that created it.
2289      *
2290      * The underlying input source object follows the same open/close state as makes
2291      * sense. In particular, real files are closed when the ByLineSource object is
2292      * closed. The exception is standard input, which is never actually closed.
2293      */
2294     bool isOpen() const pure nothrow @safe
2295     {
2296         return _isOpen;
2297     }
2298 
2299     private void open()
2300     {
2301         assert(!_isOpen);
2302         assert(!_hasBeenOpened);
2303 
2304         _file = isStdin ? stdin : _filepath.File("rb");
2305         _byLineRange = newByLineFn(_file, _lineBuffered, _readHeader);
2306         _isOpen = true;
2307         _hasBeenOpened = true;
2308     }
2309 
2310     private void close()
2311     {
2312         if (!_isStdin) _file.close;
2313         _isOpen = false;
2314     }
2315 }
2316 
2317 // ByLineSourceRange and ByLineSource
2318 unittest
2319 {
2320     import std.algorithm : all, each;
2321     import std.array : appender;
2322     import std.exception : assertThrown;
2323     import std.file : rmdirRecurse;
2324     import std.path : buildPath;
2325     import std.range;
2326     import std.stdio;
2327     import tsv_utils.common.unittest_utils;
2328 
2329     auto testDir = makeUnittestTempDir("tsv_utils_byline_input_source_range");
2330     scope(exit) testDir.rmdirRecurse;
2331 
2332     string file0 = buildPath(testDir, "file0.txt");
2333     string file1 = buildPath(testDir, "file1.txt");
2334     string file2 = buildPath(testDir, "file2.txt");
2335     string file3 = buildPath(testDir, "file3.txt");
2336 
2337     string file0Header = "";
2338     string file1Header = "file 1 header\n";
2339     string file2Header = "file 2 header\n";
2340     string file3Header = "file 3 header\n";
2341 
2342     string file0Body = "";
2343     string file1Body = "";
2344     string file2Body = "file 2 line 1\n";
2345     string file3Body = "file 3 line 1\nfile 3 line 2\n";
2346 
2347     string file0Data = file0Header ~ file0Body;
2348     string file1Data = file1Header ~ file1Body;
2349     string file2Data = file2Header ~ file2Body;
2350     string file3Data = file3Header ~ file3Body;
2351 
2352     {
2353         file0.File("wb").write(file0Data);
2354         file1.File("wb").write(file1Data);
2355         file2.File("wb").write(file2Data);
2356         file3.File("wb").write(file3Data);
2357     }
2358 
2359     auto inputFiles = [file0, file1, file2, file3];
2360     auto fileHeaders = [file0Header, file1Header, file2Header, file3Header];
2361     auto fileBodies = [file0Body, file1Body, file2Body, file3Body];
2362     auto fileData = [file0Data, file1Data, file2Data, file3Data];
2363 
2364     auto buffer = new char[1024];    // Must be large enough to hold the test files.
2365 
2366     /* Test without standard input. Don't want to count on state of standard
2367      * input or modifying it when doing unit tests, so avoid reading from it.
2368      */
2369 
2370     static foreach (keepTerm; [No.keepTerminator, Yes.keepTerminator])
2371     {
2372         foreach (lineBuf; [No.lineBuffered, Yes.lineBuffered])
2373         {
2374             foreach (readHdr; [No.readHeader, Yes.readHeader])
2375             {
2376                 foreach(numFiles; 1 .. inputFiles.length + 1)
2377                 {
2378                     auto readSources = appender!(ByLineSource!(keepTerm)[]);
2379                     auto inputSources = byLineSourceRange!(keepTerm)(inputFiles[0 .. numFiles], lineBuf, readHdr);
2380                     assert(inputSources.length == numFiles);
2381 
2382                     foreach(fileNum, source; inputSources.enumerate)
2383                     {
2384                         readSources.put(source);
2385                         assert(source.isOpen);
2386                         assert(source._file.isOpen);
2387                         assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen));
2388                         assert(readSources.data[fileNum].isOpen);
2389 
2390                         auto headerLength = fileHeaders[fileNum].length;
2391                         static if (!keepTerm)
2392                         {
2393                             if (headerLength > 0) --headerLength;
2394                         }
2395 
2396                         assert(source.byLine.empty ||
2397                                source.byLine.front == fileHeaders[fileNum][0 .. headerLength]);
2398 
2399                         assert(source.name == inputFiles[fileNum]);
2400                         assert(!source.isStdin);
2401 
2402                         auto readFileData = appender!(char[]);
2403                         foreach(line; source.byLine)
2404                         {
2405                             readFileData.put(line);
2406                             static if (!keepTerm) readFileData.put('\n');
2407                         }
2408 
2409                         assert(readFileData.data == fileData[fileNum]);
2410                     }
2411 
2412                     /* The ByLineSourceRange is a reference range, consumed by the foreach. */
2413                     assert(inputSources.empty);
2414                 }
2415             }
2416         }
2417     }
2418 
2419     /* Empty filelist. */
2420     string[] nofiles;
2421     {
2422         auto sources = byLineSourceRange!(No.keepTerminator)(nofiles);
2423         assert(sources.empty);
2424     }
2425     {
2426         auto sources = byLineSourceRange!(Yes.keepTerminator)(nofiles);
2427         assert(sources.empty);
2428     }
2429 
2430     /* Error cases. */
2431     assertThrown(byLineSourceRange!(No.keepTerminator)([file0, "no_such_file.txt"]).each);
2432     assertThrown(byLineSourceRange!(Yes.keepTerminator)(["no_such_file.txt", file1]).each);
2433 }
2434 
2435 /** Defines the 'bufferable' input sources supported by inputSourceByChunk.
2436  *
2437  * This includes std.stdio.File objects and mutable dynamic ubyte arrays. Or, input
2438  * ranges with ubyte elements.
2439  *
2440  * Static, const, and immutable arrays can be sliced to turn them into input ranges.
2441  *
2442  * Note: The element types could easily be generalized much further if that were useful.
2443  * At present, the primary purpose of inputSourceByChunk is to have a range representing
2444  * a buffered file that can also take ubyte arrays as sources for unit testing.
2445  */
2446 enum bool isBufferableInputSource(R) =
2447     isFileHandle!(Unqual!R) ||
2448     (isInputRange!R && is(Unqual!(ElementEncodingType!R) == ubyte)
2449     );
2450 
2451 @safe unittest
2452 {
2453     import std.stdio : stdin;
2454 
2455     static assert(isBufferableInputSource!(File));
2456     static assert(isBufferableInputSource!(typeof(stdin)));
2457     static assert(isBufferableInputSource!(ubyte[]));
2458     static assert(!isBufferableInputSource!(char[]));
2459     static assert(!isBufferableInputSource!(string));
2460 
2461     ubyte[10] staticArray;
2462     const ubyte[1] staticConstArray;
2463     immutable ubyte[1] staticImmutableArray;
2464     const(ubyte)[1] staticArrayConstElts;
2465     immutable(ubyte)[1] staticArrayImmutableElts;
2466 
2467     ubyte[] dynamicArray = new ubyte[](10);
2468     const(ubyte)[] dynamicArrayConstElts = new ubyte[](10);
2469     immutable(ubyte)[] dynamicArrayImmutableElts = new ubyte[](10);
2470     const ubyte[] dynamicConstArray = new ubyte[](10);
2471     immutable ubyte[] dynamicImmutableArray = new ubyte[](10);
2472 
2473     /* Dynamic mutable arrays are bufferable. */
2474     static assert(!isBufferableInputSource!(typeof(staticArray)));
2475     static assert(!isBufferableInputSource!(typeof(staticArrayConstElts)));
2476     static assert(!isBufferableInputSource!(typeof(staticArrayImmutableElts)));
2477     static assert(!isBufferableInputSource!(typeof(staticConstArray)));
2478     static assert(!isBufferableInputSource!(typeof(staticImmutableArray)));
2479 
2480     static assert(isBufferableInputSource!(typeof(dynamicArray)));
2481     static assert(isBufferableInputSource!(typeof(dynamicArrayConstElts)));
2482     static assert(isBufferableInputSource!(typeof(dynamicArrayImmutableElts)));
2483     static assert(!isBufferableInputSource!(typeof(dynamicConstArray)));
2484     static assert(!isBufferableInputSource!(typeof(dynamicImmutableArray)));
2485 
2486     /* Slicing turns all forms into bufferable arrays. */
2487     static assert(isBufferableInputSource!(typeof(staticArray[])));
2488     static assert(isBufferableInputSource!(typeof(staticArrayConstElts[])));
2489     static assert(isBufferableInputSource!(typeof(staticArrayImmutableElts[])));
2490     static assert(isBufferableInputSource!(typeof(staticConstArray[])));
2491     static assert(isBufferableInputSource!(typeof(staticImmutableArray[])));
2492 
2493     static assert(isBufferableInputSource!(typeof(dynamicConstArray[])));
2494     static assert(isBufferableInputSource!(typeof(dynamicImmutableArray[])));
2495     static assert(isBufferableInputSource!(typeof(dynamicArray[])));
2496     static assert(isBufferableInputSource!(typeof(dynamicArrayConstElts[])));
2497     static assert(isBufferableInputSource!(typeof(dynamicArrayImmutableElts[])));
2498 
2499     /* Element type tests. */
2500     static assert(is(Unqual!(ElementType!(typeof(staticArray))) == ubyte));
2501     static assert(is(Unqual!(ElementType!(typeof(staticArrayConstElts))) == ubyte));
2502     static assert(is(Unqual!(ElementType!(typeof(staticArrayImmutableElts))) == ubyte));
2503     static assert(is(Unqual!(ElementType!(typeof(staticConstArray))) == ubyte));
2504     static assert(is(Unqual!(ElementType!(typeof(staticImmutableArray))) == ubyte));
2505     static assert(is(Unqual!(ElementType!(typeof(dynamicArray))) == ubyte));
2506     static assert(is(Unqual!(ElementType!(typeof(dynamicArrayConstElts))) == ubyte));
2507     static assert(is(Unqual!(ElementType!(typeof(dynamicArrayImmutableElts))) == ubyte));
2508     static assert(is(Unqual!(ElementType!(typeof(dynamicConstArray))) == ubyte));
2509     static assert(is(Unqual!(ElementType!(typeof(dynamicImmutableArray))) == ubyte));
2510 
2511     struct S1
2512     {
2513         void popFront();
2514         @property bool empty();
2515         @property ubyte front();
2516     }
2517 
2518     struct S2
2519     {
2520         @property ubyte front();
2521         void popFront();
2522         @property bool empty();
2523         @property auto save() { return this; }
2524         @property size_t length();
2525         S2 opSlice(size_t, size_t);
2526     }
2527 
2528     static assert(isInputRange!S1);
2529     static assert(isBufferableInputSource!S1);
2530 
2531     static assert(isInputRange!S2);
2532     static assert(is(ElementEncodingType!S2 == ubyte));
2533     static assert(hasSlicing!S2);
2534     static assert(isBufferableInputSource!S2);
2535 
2536     /* For code coverage. */
2537     S2 s2;
2538     auto x = s2.save;
2539 
2540     auto repeatInt = 7.repeat!int(5);
2541     auto repeatUbyte = 7.repeat!ubyte(5);
2542     auto infiniteUbyte = 7.repeat!ubyte;
2543 
2544     static assert(!isBufferableInputSource!(typeof(repeatInt)));
2545     static assert(isBufferableInputSource!(typeof(repeatUbyte)));
2546     static assert(isBufferableInputSource!(typeof(infiniteUbyte)));
2547 }
2548 
2549 /** inputSourceByChunk returns a range that reads either a file handle (File) or a
2550  * ubyte[] array a chunk at a time.
2551  *
2552  * This is a cover for File.byChunk that allows passing an in-memory array or input
2553  * range as well. At present the motivation is primarily to enable unit testing of
2554  * chunk-based algorithms using in-memory strings.
2555  *
2556  * inputSourceByChunk takes either a File open for reading or an input range with
2557  * ubyte elements. Data is read a buffer at a time. The buffer can be user provided,
2558  * or  allocated by inputSourceByChunk based on a caller provided buffer size.
2559  *
2560  * The primary motivation for supporting both files and input ranges as sources is to
2561  * enable unit testing of buffer based algorithms using in-memory arrays. Dynamic,
2562  * mutable arras are fine. Use slicing to turn a static, const, or immutable arrays
2563  * into an input range.
2564  *
2565  * The chunks are returned as an input range.
2566  */
2567 auto inputSourceByChunk(InputSource)(InputSource source, size_t size)
2568 {
2569     return inputSourceByChunk(source, new ubyte[](size));
2570 }
2571 
2572 /// Ditto
2573 auto inputSourceByChunk(InputSource)(InputSource source, ubyte[] buffer)
2574 if (isBufferableInputSource!InputSource)
2575 {
2576     static if (isFileHandle!(Unqual!InputSource))
2577     {
2578         return source.byChunk(buffer);
2579     }
2580     else
2581     {
2582         static struct BufferedChunk
2583         {
2584             private Chunks!InputSource _chunks;
2585             private ubyte[] _buffer;
2586 
2587             private void readNextChunk()
2588             {
2589                 if (_chunks.empty)
2590                 {
2591                     _buffer.length = 0;
2592                 }
2593                 else
2594                 {
2595                     import std.algorithm : copy;
2596                     auto remainingBuffer = _chunks.front.take(_buffer.length).copy(_buffer);
2597                     _chunks.popFront;
2598 
2599                     /* Only the last chunk should be shorter than the buffer. */
2600                     assert(remainingBuffer.length == 0 || _chunks.empty);
2601 
2602                     _buffer.length -= remainingBuffer.length;
2603                 }
2604             }
2605 
2606             this(InputSource source, ubyte[] buffer)
2607             {
2608                 import std.exception : enforce;
2609                 enforce(buffer.length > 0, "buffer size must be larger than 0");
2610                 _chunks = source.chunks(buffer.length);
2611                 _buffer = buffer;
2612                 readNextChunk();
2613             }
2614 
2615             @property bool empty()
2616             {
2617                 return (_buffer.length == 0);
2618             }
2619 
2620             @property ubyte[] front()
2621             {
2622                 assert(!empty, "Attempting to fetch the front of an empty inputSourceByChunks");
2623                 return _buffer;
2624             }
2625 
2626             void popFront()
2627             {
2628                 assert(!empty, "Attempting to popFront an empty inputSourceByChunks");
2629                 readNextChunk();
2630             }
2631         }
2632 
2633         return BufferedChunk(source, buffer);
2634     }
2635 }
2636 
2637 unittest  // inputSourceByChunk
2638 {
2639     import tsv_utils.common.unittest_utils;   // tsv-utils unit test helpers
2640     import std.file : mkdir, rmdirRecurse;
2641     import std.path : buildPath;
2642 
2643     auto testDir = makeUnittestTempDir("tsv_utils_inputSourceByChunk");
2644     scope(exit) testDir.rmdirRecurse;
2645 
2646     import std.algorithm : equal, joiner;
2647     import std.format;
2648     import std..string : representation;
2649 
2650     auto charData = "abcde,ßÀß,あめりか物語,012345";
2651     ubyte[] ubyteData = charData.dup.representation;
2652 
2653     ubyte[1024] rawBuffer;  // Must be larger than largest bufferSize in tests.
2654 
2655     void writeFileData(string filePath, ubyte[] data)
2656     {
2657         import std.stdio;
2658 
2659         auto f = filePath.File("wb");
2660         f.rawWrite(data);
2661         f.close;
2662     }
2663 
2664     foreach (size_t dataSize; 0 .. ubyteData.length)
2665     {
2666         auto data = ubyteData[0 .. dataSize];
2667         auto filePath = buildPath(testDir, format("data_%d.txt", dataSize));
2668         writeFileData(filePath, data);
2669 
2670         foreach (size_t bufferSize; 1 .. dataSize + 2)
2671         {
2672             assert(data.inputSourceByChunk(bufferSize).joiner.equal(data),
2673                    format("[Test-A] dataSize: %d, bufferSize: %d", dataSize, bufferSize));
2674 
2675             assert (rawBuffer.length >= bufferSize);
2676 
2677             ubyte[] buffer = rawBuffer[0 .. bufferSize];
2678             assert(data.inputSourceByChunk(buffer).joiner.equal(data),
2679                    format("[Test-B] dataSize: %d, bufferSize: %d", dataSize, bufferSize));
2680 
2681             {
2682                 auto inputStream = filePath.File;
2683                 assert(inputStream.inputSourceByChunk(bufferSize).joiner.equal(data),
2684                        format("[Test-C] dataSize: %d, bufferSize: %d", dataSize, bufferSize));
2685                 inputStream.close;
2686             }
2687 
2688             {
2689                 auto inputStream = filePath.File;
2690                 assert(inputStream.inputSourceByChunk(buffer).joiner.equal(data),
2691                        format("[Test-D] dataSize: %d, bufferSize: %d", dataSize, bufferSize));
2692                 inputStream.close;
2693             }
2694         }
2695     }
2696 }
2697 
2698 @safe unittest // inputSourceByChunk array cases
2699 {
2700     import std.algorithm : equal;
2701 
2702     ubyte[5] staticArray = [5, 6, 7, 8, 9];
2703     const(ubyte)[5] staticArrayConstElts = [5, 6, 7, 8, 9];
2704     immutable(ubyte)[5] staticArrayImmutableElts = [5, 6, 7, 8, 9];
2705     const ubyte[5] staticConstArray = [5, 6, 7, 8, 9];
2706     immutable ubyte[5] staticImmutableArray = [5, 6, 7, 8, 9];
2707 
2708     ubyte[] dynamicArray = [5, 6, 7, 8, 9];
2709     const(ubyte)[] dynamicArrayConstElts = [5, 6, 7, 8, 9];
2710     immutable(ubyte)[] dynamicArrayImmutableElts = [5, 6, 7, 8, 9];
2711     const ubyte[] dynamicConstArray = [5, 6, 7, 8, 9];
2712     immutable ubyte[] dynamicImmutableArray = [5, 6, 7, 8, 9];
2713 
2714     /* The dynamic mutable arrays can be used directly. */
2715     assert (dynamicArray.inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2716     assert (dynamicArrayConstElts.inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2717     assert (dynamicArrayImmutableElts.inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2718 
2719     /* All the arrays can be used with slicing. */
2720     assert (staticArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2721     assert (staticArrayConstElts[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2722     assert (staticArrayImmutableElts[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2723     assert (staticConstArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2724     assert (staticImmutableArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2725     assert (dynamicArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2726     assert (dynamicArrayConstElts[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2727     assert (dynamicArrayImmutableElts[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2728     assert (dynamicConstArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2729     assert (dynamicImmutableArray[].inputSourceByChunk(2).equal([[5, 6], [7, 8], [9]]));
2730 }
2731 
2732 @safe unittest // inputSourceByChunk input ranges
2733 {
2734     import std.algorithm : equal;
2735 
2736     assert (7.repeat!ubyte(5).inputSourceByChunk(1).equal([[7], [7], [7], [7], [7]]));
2737     assert (7.repeat!ubyte(5).inputSourceByChunk(2).equal([[7, 7], [7, 7], [7]]));
2738     assert (7.repeat!ubyte(5).inputSourceByChunk(3).equal([[7, 7, 7], [7, 7]]));
2739     assert (7.repeat!ubyte(5).inputSourceByChunk(4).equal([[7, 7, 7, 7], [7]]));
2740     assert (7.repeat!ubyte(5).inputSourceByChunk(5).equal([[7, 7, 7, 7, 7]]));
2741     assert (7.repeat!ubyte(5).inputSourceByChunk(6).equal([[7, 7, 7, 7, 7]]));
2742 
2743     /* Infinite. */
2744     assert (7.repeat!ubyte.inputSourceByChunk(2).take(3).equal([[7, 7], [7, 7], [7, 7]]));
2745 }