tsv_utils.common.utils source code

1 /**
2 Utilities used by tsv-utils applications. InputFieldReordering, BufferedOutputRange,
3 and a several others.
4 
5 Utilities in this file:
6 $(LIST
7     * [InputFieldReordering] - A class that creates a reordered subset of fields from
8       an input line. Fields in the subset are accessed by array indicies. This is
9       especially useful when processing the subset in a specific order, such as the
10       order listed on the command-line at run-time.
11 
12     * [BufferedOutputRange] - An OutputRange with an internal buffer used to buffer
13       output. Intended for use with stdout, it is a significant performance benefit.
14 
15     * [isFlushableOutputRange] - Tests if something is an OutputRange with a flush
16       member.
17 
18     * [bufferedByLine] - An input range that reads from a File handle line by line.
19       It is similar to the standard library method std.stdio.File.byLine, but quite a
20       bit faster. This is achieved by reading in larger blocks and buffering.
21 
22     * [InputSourceRange] - An input range that provides open file access to a set of
23       files. It is used to iterate over files passed as command line arguments. This
24       enable reading header line of a file during command line argument process, then
25       passing the open file to the main processing functions.
26 
27     * [ByLineSourceRange] - Similar to an InputSourceRange, except that it provides
28       access to a byLine iterator (bufferedByLine) rather than an open file. This is
29       used by tools that run the same processing logic both header non-header lines.
30 
31     * [joinAppend] - A function that performs a join, but appending the join output to
32       an output stream. It is a performance improvement over using join or joiner with
33       writeln.
34 
35     * [getTsvFieldValue] - A convenience function when only a single value is needed
36       from an input line.
37 
38     * [throwIfWindowsNewlineOnUnix] - A utility for Unix platform builds to detecting
39       Windows newlines in input.
40 )
41 
42 Copyright (c) 2015-2020, eBay Inc.
43 Initially written by Jon Degenhardt
44 
45 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt)
46 */
47 
48 module tsv_utils.common.utils;
49 
50 import std.range;
51 import std.traits : isIntegral, isSomeChar, isSomeString, isUnsigned, ReturnType;
52 import std.typecons : Flag, No, Yes;
53 
54 // InputFieldReording class.
55 
56 /** Flag used by the InputFieldReordering template. */
57 alias EnablePartialLines = Flag!"enablePartialLines";
58 
59 /**
60 InputFieldReordering - Move select fields from an input line to an output array,
61 reordering along the way.
62 
63 The InputFieldReordering class is used to reorder a subset of fields from an input line.
64 The caller instantiates an InputFieldReordering object at the start of input processing.
65 The instance contains a mapping from input index to output index, plus a buffer holding
66 the reordered fields. The caller processes each input line by calling initNewLine,
67 splitting the line into fields, and calling processNextField on each field. The output
68 buffer is ready when the allFieldsFilled method returns true.
69 
70 Fields are not copied, instead the output buffer points to the fields passed by the caller.
71 The caller needs to use or copy the output buffer while the fields are still valid, which
72 is normally until reading the next input line. The program below illustrates the basic use
73 case. It reads stdin and outputs fields [3, 0, 2], in that order. (See also joinAppend,
74 below, which has a performance improvement over join used here.)
75 
76 ---
77 int main(string[] args)
78 {
79     import tsv_utils.common.utils;
80     import std.algorithm, std.array, std.range, std.stdio;
81     size_t[] fieldIndicies = [3, 0, 2];
82     auto fieldReordering = new InputFieldReordering!char(fieldIndicies);
83     foreach (line; stdin.byLine)
84     {
85         fieldReordering.initNewLine;
86         foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate)
87         {
88             fieldReordering.processNextField(fieldIndex, fieldValue);
89             if (fieldReordering.allFieldsFilled) break;
90         }
91         if (fieldReordering.allFieldsFilled)
92         {
93             writeln(fieldReordering.outputFields.join('\t'));
94         }
95         else
96         {
97             writeln("Error: Insufficient number of field on the line.");
98         }
99     }
100     return 0;
101 }
102 ---
103 
104 Field indicies are zero-based. An individual field can be listed multiple times. The
105 outputFields array is not valid until all the specified fields have been processed. The
106 allFieldsFilled method tests this. If a line does not have enough fields the outputFields
107 buffer cannot be used. For most TSV applications this is okay, as it means the line is
108 invalid and cannot be used. However, if partial lines are okay, the template can be
109 instantiated with EnablePartialLines.yes. This will ensure that any fields not filled-in
110 are empty strings in the outputFields return.
111 */
112 final class InputFieldReordering(C, EnablePartialLines partialLinesOk = EnablePartialLines.no)
113 if (isSomeChar!C)
114 {
115     /* Implementation: The class works by creating an array of tuples mapping the input
116      * field index to the location in the outputFields array. The 'fromToMap' array is
117      * sorted in input field order, enabling placement in the outputFields buffer during a
118      * pass over the input fields. The map is created by the constructor. An example:
119      *
120      *    inputFieldIndicies: [3, 0, 7, 7, 1, 0, 9]
121      *             fromToMap: [<0,1>, <0,5>, <1,4>, <3,0>, <7,2>, <7,3>, <9,6>]
122      *
123      * During processing of an a line, an array slice, mapStack, is used to track how
124      * much of the fromToMap remains to be processed.
125      */
126     import std.range;
127     import std.typecons : Tuple;
128 
129     alias TupleFromTo = Tuple!(size_t, "from", size_t, "to");
130 
131     private C[][] outputFieldsBuf;
132     private TupleFromTo[] fromToMap;
133     private TupleFromTo[] mapStack;
134 
135     final this(const ref size_t[] inputFieldIndicies, size_t start = 0) pure nothrow @safe
136     {
137         import std.algorithm : sort;
138 
139         outputFieldsBuf = new C[][](inputFieldIndicies.length);
140         fromToMap.reserve(inputFieldIndicies.length);
141 
142         foreach (to, from; inputFieldIndicies.enumerate(start))
143         {
144             fromToMap ~= TupleFromTo(from, to);
145         }
146 
147         sort(fromToMap);
148         initNewLine;
149     }
150 
151     /** initNewLine initializes the object for a new line. */
152     final void initNewLine() pure nothrow @safe
153     {
154         mapStack = fromToMap;
155         static if (partialLinesOk)
156         {
157             import std.algorithm : each;
158             outputFieldsBuf.each!((ref s) => s.length = 0);
159         }
160     }
161 
162     /** processNextField maps an input field to the correct locations in the
163      * outputFields array.
164      *
165      * processNextField should be called once for each field on the line, in the order
166      * found. The processing of the line can terminate once allFieldsFilled returns
167      * true.
168      *
169      * The return value is the number of output fields the input field maps to. Zero
170      * means the field is not mapped to the output fields array.
171      *
172      * If, prior to allFieldsProcessed returning true, any fields on the input line
173      * are not passed to processNextField, the caller should either ensure the fields
174      * are not part of the output fields or have partial lines enabled.
175      */
176     final size_t processNextField(size_t fieldIndex, C[] fieldValue) pure nothrow @safe @nogc
177     {
178         size_t numFilled = 0;
179         while (!mapStack.empty && fieldIndex == mapStack.front.from)
180         {
181             outputFieldsBuf[mapStack.front.to] = fieldValue;
182             mapStack.popFront;
183             numFilled++;
184         }
185         return numFilled;
186     }
187 
188     /** allFieldsFilled returned true if all fields expected have been processed. */
189     final bool allFieldsFilled() const pure nothrow @safe @nogc
190     {
191         return mapStack.empty;
192     }
193 
194     /** outputFields is the assembled output fields. Unless partial lines are enabled,
195      * it is only valid after allFieldsFilled is true.
196      */
197     final C[][] outputFields() pure nothrow @safe @nogc
198     {
199         return outputFieldsBuf[];
200     }
201 }
202 
203 // InputFieldReordering - Tests using different character types.
204 @safe unittest
205 {
206     import std.conv : to;
207 
208     auto inputLines = [["r1f0", "r1f1", "r1f2",   "r1f3"],
209                        ["r2f0", "abc",  "ÀBCßßZ", "ghi"],
210                        ["r3f0", "123",  "456",    "789"]];
211 
212     size_t[] fields_2_0 = [2, 0];
213 
214     auto expected_2_0 = [["r1f2",   "r1f0"],
215                          ["ÀBCßßZ", "r2f0"],
216                          ["456",    "r3f0"]];
217 
218     char[][][]  charExpected_2_0 = to!(char[][][])(expected_2_0);
219     wchar[][][] wcharExpected_2_0 = to!(wchar[][][])(expected_2_0);
220     dchar[][][] dcharExpected_2_0 = to!(dchar[][][])(expected_2_0);
221     dstring[][] dstringExpected_2_0 = to!(dstring[][])(expected_2_0);
222 
223     auto charIFR  = new InputFieldReordering!char(fields_2_0);
224     auto wcharIFR = new InputFieldReordering!wchar(fields_2_0);
225     auto dcharIFR = new InputFieldReordering!dchar(fields_2_0);
226 
227     foreach (lineIndex, line; inputLines)
228     {
229         charIFR.initNewLine;
230         wcharIFR.initNewLine;
231         dcharIFR.initNewLine;
232 
233         foreach (fieldIndex, fieldValue; line)
234         {
235             charIFR.processNextField(fieldIndex, to!(char[])(fieldValue));
236             wcharIFR.processNextField(fieldIndex, to!(wchar[])(fieldValue));
237             dcharIFR.processNextField(fieldIndex, to!(dchar[])(fieldValue));
238 
239             assert ((fieldIndex >= 2) == charIFR.allFieldsFilled);
240             assert ((fieldIndex >= 2) == wcharIFR.allFieldsFilled);
241             assert ((fieldIndex >= 2) == dcharIFR.allFieldsFilled);
242         }
243         assert(charIFR.allFieldsFilled);
244         assert(wcharIFR.allFieldsFilled);
245         assert(dcharIFR.allFieldsFilled);
246 
247         assert(charIFR.outputFields == charExpected_2_0[lineIndex]);
248         assert(wcharIFR.outputFields == wcharExpected_2_0[lineIndex]);
249         assert(dcharIFR.outputFields == dcharExpected_2_0[lineIndex]);
250     }
251 }
252 
253 // InputFieldReordering - Test of partial line support.
254 @safe unittest
255 {
256     import std.conv : to;
257 
258     auto inputLines = [["r1f0", "r1f1", "r1f2",   "r1f3"],
259                        ["r2f0", "abc",  "ÀBCßßZ", "ghi"],
260                        ["r3f0", "123",  "456",    "789"]];
261 
262     size_t[] fields_2_0 = [2, 0];
263 
264     // The expected states of the output field while each line and field are processed.
265     auto expectedBylineByfield_2_0 =
266         [
267             [["", "r1f0"], ["", "r1f0"], ["r1f2", "r1f0"],   ["r1f2", "r1f0"]],
268             [["", "r2f0"], ["", "r2f0"], ["ÀBCßßZ", "r2f0"], ["ÀBCßßZ", "r2f0"]],
269             [["", "r3f0"], ["", "r3f0"], ["456", "r3f0"],    ["456", "r3f0"]],
270         ];
271 
272     char[][][][]  charExpectedBylineByfield_2_0 = to!(char[][][][])(expectedBylineByfield_2_0);
273 
274     auto charIFR  = new InputFieldReordering!(char, EnablePartialLines.yes)(fields_2_0);
275 
276     foreach (lineIndex, line; inputLines)
277     {
278         charIFR.initNewLine;
279         foreach (fieldIndex, fieldValue; line)
280         {
281             charIFR.processNextField(fieldIndex, to!(char[])(fieldValue));
282             assert(charIFR.outputFields == charExpectedBylineByfield_2_0[lineIndex][fieldIndex]);
283         }
284     }
285 }
286 
287 // InputFieldReordering - Field combination tests.
288 @safe unittest
289 {
290     import std.conv : to;
291     import std.stdio;
292 
293     auto inputLines = [["00", "01", "02", "03"],
294                        ["10", "11", "12", "13"],
295                        ["20", "21", "22", "23"]];
296 
297     size_t[] fields_0 = [0];
298     size_t[] fields_3 = [3];
299     size_t[] fields_01 = [0, 1];
300     size_t[] fields_10 = [1, 0];
301     size_t[] fields_03 = [0, 3];
302     size_t[] fields_30 = [3, 0];
303     size_t[] fields_0123 = [0, 1, 2, 3];
304     size_t[] fields_3210 = [3, 2, 1, 0];
305     size_t[] fields_03001 = [0, 3, 0, 0, 1];
306 
307     auto expected_0 = to!(char[][][])([["00"],
308                                        ["10"],
309                                        ["20"]]);
310 
311     auto expected_3 = to!(char[][][])([["03"],
312                                        ["13"],
313                                        ["23"]]);
314 
315     auto expected_01 = to!(char[][][])([["00", "01"],
316                                         ["10", "11"],
317                                         ["20", "21"]]);
318 
319     auto expected_10 = to!(char[][][])([["01", "00"],
320                                         ["11", "10"],
321                                         ["21", "20"]]);
322 
323     auto expected_03 = to!(char[][][])([["00", "03"],
324                                         ["10", "13"],
325                                         ["20", "23"]]);
326 
327     auto expected_30 = to!(char[][][])([["03", "00"],
328                                         ["13", "10"],
329                                         ["23", "20"]]);
330 
331     auto expected_0123 = to!(char[][][])([["00", "01", "02", "03"],
332                                           ["10", "11", "12", "13"],
333                                           ["20", "21", "22", "23"]]);
334 
335     auto expected_3210 = to!(char[][][])([["03", "02", "01", "00"],
336                                           ["13", "12", "11", "10"],
337                                           ["23", "22", "21", "20"]]);
338 
339     auto expected_03001 = to!(char[][][])([["00", "03", "00", "00", "01"],
340                                            ["10", "13", "10", "10", "11"],
341                                            ["20", "23", "20", "20", "21"]]);
342 
343     auto ifr_0 = new InputFieldReordering!char(fields_0);
344     auto ifr_3 = new InputFieldReordering!char(fields_3);
345     auto ifr_01 = new InputFieldReordering!char(fields_01);
346     auto ifr_10 = new InputFieldReordering!char(fields_10);
347     auto ifr_03 = new InputFieldReordering!char(fields_03);
348     auto ifr_30 = new InputFieldReordering!char(fields_30);
349     auto ifr_0123 = new InputFieldReordering!char(fields_0123);
350     auto ifr_3210 = new InputFieldReordering!char(fields_3210);
351     auto ifr_03001 = new InputFieldReordering!char(fields_03001);
352 
353     foreach (lineIndex, line; inputLines)
354     {
355         ifr_0.initNewLine;
356         ifr_3.initNewLine;
357         ifr_01.initNewLine;
358         ifr_10.initNewLine;
359         ifr_03.initNewLine;
360         ifr_30.initNewLine;
361         ifr_0123.initNewLine;
362         ifr_3210.initNewLine;
363         ifr_03001.initNewLine;
364 
365         foreach (fieldIndex, fieldValue; line)
366         {
367             ifr_0.processNextField(fieldIndex, to!(char[])(fieldValue));
368             ifr_3.processNextField(fieldIndex, to!(char[])(fieldValue));
369             ifr_01.processNextField(fieldIndex, to!(char[])(fieldValue));
370             ifr_10.processNextField(fieldIndex, to!(char[])(fieldValue));
371             ifr_03.processNextField(fieldIndex, to!(char[])(fieldValue));
372             ifr_30.processNextField(fieldIndex, to!(char[])(fieldValue));
373             ifr_0123.processNextField(fieldIndex, to!(char[])(fieldValue));
374             ifr_3210.processNextField(fieldIndex, to!(char[])(fieldValue));
375             ifr_03001.processNextField(fieldIndex, to!(char[])(fieldValue));
376         }
377 
378         assert(ifr_0.outputFields == expected_0[lineIndex]);
379         assert(ifr_3.outputFields == expected_3[lineIndex]);
380         assert(ifr_01.outputFields == expected_01[lineIndex]);
381         assert(ifr_10.outputFields == expected_10[lineIndex]);
382         assert(ifr_03.outputFields == expected_03[lineIndex]);
383         assert(ifr_30.outputFields == expected_30[lineIndex]);
384         assert(ifr_0123.outputFields == expected_0123[lineIndex]);
385         assert(ifr_3210.outputFields == expected_3210[lineIndex]);
386         assert(ifr_03001.outputFields == expected_03001[lineIndex]);
387     }
388 }
389 
390 
391 import std.stdio : File, isFileHandle, KeepTerminator;
392 import std.range : isOutputRange;
393 import std.traits : Unqual;
394 
395 /**
396 BufferedOutputRange is a performance enhancement over writing directly to an output
397 stream. It holds a File open for write or an OutputRange. Ouput is accumulated in an
398 internal buffer and written to the output stream as a block.
399 
400 Writing to stdout is a key use case. BufferedOutputRange is often dramatically faster
401 than writing to stdout directly. This is especially noticable for outputs with short
402 lines, as it blocks many writes together in a single write.
403 
404 The internal buffer is written to the output stream after flushSize has been reached.
405 This is checked at newline boundaries, when appendln is called or when put is called
406 with a single newline character. Other writes check maxSize, which is used to avoid
407 runaway buffers.
408 
409 BufferedOutputRange has a put method allowing it to be used a range. It has a number
410 of other methods providing additional control.
411 
412 $(LIST
413     * `this(outputStream [, flushSize, reserveSize, maxSize])` - Constructor. Takes the
414       output stream, e.g. stdout. Other arguments are optional, defaults normally suffice.
415 
416     * `append(stuff)` - Append to the internal buffer.
417 
418     * `appendln(stuff)` - Append to the internal buffer, followed by a newline. The buffer
419       is flushed to the output stream if is has reached flushSize.
420 
421     * `appendln()` - Append a newline to the internal buffer. The buffer is flushed to the
422       output stream if is has reached flushSize.
423 
424     * `joinAppend(inputRange, delim)` - An optimization of `append(inputRange.joiner(delim))`.
425       For reasons that are not clear, joiner is quite slow.
426 
427     * `flushIfFull()` - Flush the internal buffer to the output stream if flushSize has been
428       reached.
429 
430     * `flush()` - Write the internal buffer to the output stream.
431 
432     * `put(stuff)` - Appends to the internal buffer. Acts as `appendln()` if passed a single
433       newline character, '\n' or "\n".
434 )
435 
436 The internal buffer is automatically flushed when the BufferedOutputRange goes out of
437 scope.
438 */
439 struct BufferedOutputRange(OutputTarget)
440 if (isFileHandle!(Unqual!OutputTarget) || isOutputRange!(Unqual!OutputTarget, char))
441 {
442     import std.range : isOutputRange;
443     import std.array : appender;
444     import std.format : format;
445 
446     /* Identify the output element type. Only supporting char and ubyte for now. */
447     static if (isFileHandle!OutputTarget || isOutputRange!(OutputTarget, char))
448     {
449         alias C = char;
450     }
451     else static if (isOutputRange!(OutputTarget, ubyte))
452     {
453         alias C = ubyte;
454     }
455     else static assert(false);
456 
457     private enum defaultReserveSize = 11264;
458     private enum defaultFlushSize = 10240;
459     private enum defaultMaxSize = 4194304;
460 
461     private OutputTarget _outputTarget;
462     private auto _outputBuffer = appender!(C[]);
463     private immutable size_t _flushSize;
464     private immutable size_t _maxSize;
465 
466     this(OutputTarget outputTarget,
467          size_t flushSize = defaultFlushSize,
468          size_t reserveSize = defaultReserveSize,
469          size_t maxSize = defaultMaxSize)
470     {
471         assert(flushSize <= maxSize);
472 
473         _outputTarget = outputTarget;
474         _flushSize = flushSize;
475         _maxSize = (flushSize <= maxSize) ? maxSize : flushSize;
476         _outputBuffer.reserve(reserveSize);
477     }
478 
479     ~this()
480     {
481         flush();
482     }
483 
484     void flush()
485     {
486         static if (isFileHandle!OutputTarget) _outputTarget.write(_outputBuffer.data);
487         else _outputTarget.put(_outputBuffer.data);
488 
489         _outputBuffer.clear;
490     }
491 
492     bool flushIfFull()
493     {
494         bool isFull = _outputBuffer.data.length >= _flushSize;
495         if (isFull) flush();
496         return isFull;
497     }
498 
499     /* flushIfMaxSize is a safety check to avoid runaway buffer growth. */
500     void flushIfMaxSize()
501     {
502         if (_outputBuffer.data.length >= _maxSize) flush();
503     }
504 
505     /* maybeFlush is intended for the case where put is called with a trailing newline.
506      *
507      * Flushing occurs if the buffer has a trailing newline and has reached flush size.
508      * Flushing also occurs if the buffer has reached max size.
509      */
510     private bool maybeFlush()
511     {
512         immutable bool doFlush =
513             _outputBuffer.data.length >= _flushSize &&
514             (_outputBuffer.data[$-1] == '\n' || _outputBuffer.data.length >= _maxSize);
515 
516         if (doFlush) flush();
517         return doFlush;
518     }
519 
520 
521     private void appendRaw(T)(T stuff) pure
522     {
523         import std.range : rangePut = put;
524         rangePut(_outputBuffer, stuff);
525     }
526 
527     void append(T)(T stuff)
528     {
529         appendRaw(stuff);
530         maybeFlush();
531     }
532 
533     bool appendln()
534     {
535         appendRaw('\n');
536         return flushIfFull();
537     }
538 
539     bool appendln(T)(T stuff)
540     {
541         appendRaw(stuff);
542         return appendln();
543     }
544 
545     /* joinAppend is an optimization of append(inputRange.joiner(delimiter).
546      * This form is quite a bit faster, 40%+ on some benchmarks.
547      */
548     void joinAppend(InputRange, E)(InputRange inputRange, E delimiter)
549     if (isInputRange!InputRange &&
550         is(ElementType!InputRange : const C[]) &&
551         (is(E : const C[]) || is(E : const C)))
552     {
553         if (!inputRange.empty)
554         {
555             appendRaw(inputRange.front);
556             inputRange.popFront;
557         }
558         foreach (x; inputRange)
559         {
560             appendRaw(delimiter);
561             appendRaw(x);
562         }
563         flushIfMaxSize();
564     }
565 
566     /* Make this an output range. */
567     void put(T)(T stuff)
568     {
569         import std.traits;
570         import std.stdio;
571 
572         static if (isSomeChar!T)
573         {
574             if (stuff == '\n') appendln();
575             else appendRaw(stuff);
576         }
577         else static if (isSomeString!T)
578         {
579             if (stuff == "\n") appendln();
580             else append(stuff);
581         }
582         else append(stuff);
583     }
584 }
585 
586 // BufferedOutputRange.
587 unittest
588 {
589     import tsv_utils.common.unittest_utils;
590     import std.file : rmdirRecurse, readText;
591     import std.path : buildPath;
592 
593     auto testDir = makeUnittestTempDir("tsv_utils_buffered_output");
594     scope(exit) testDir.rmdirRecurse;
595 
596     import std.algorithm : map, joiner;
597     import std.range : iota;
598     import std.conv : to;
599 
600     /* Basic test. Note that exiting the scope triggers flush. */
601     string filepath1 = buildPath(testDir, "file1.txt");
602     {
603         import std.stdio : File;
604 
605         auto ostream = BufferedOutputRange!File(filepath1.File("w"));
606         ostream.append("file1: ");
607         ostream.append("abc");
608         ostream.append(["def", "ghi", "jkl"]);
609         ostream.appendln(100.to!string);
610         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
611         ostream.appendln();
612     }
613     assert(filepath1.readText == "file1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
614 
615     /* Test with no reserve and no flush at every line. */
616     string filepath2 = buildPath(testDir, "file2.txt");
617     {
618         import std.stdio : File;
619 
620         auto ostream = BufferedOutputRange!File(filepath2.File("w"), 0, 0);
621         ostream.append("file2: ");
622         ostream.append("abc");
623         ostream.append(["def", "ghi", "jkl"]);
624         ostream.appendln("100");
625         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
626         ostream.appendln();
627     }
628     assert(filepath2.readText == "file2: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
629 
630     /* With a locking text writer. Requires version 2.078.0
631        See: https://issues.dlang.org/show_bug.cgi?id=9661
632      */
633     static if (__VERSION__ >= 2078)
634     {
635         string filepath3 = buildPath(testDir, "file3.txt");
636         {
637             import std.stdio : File;
638 
639             auto ltw = filepath3.File("w").lockingTextWriter;
640             {
641                 auto ostream = BufferedOutputRange!(typeof(ltw))(ltw);
642                 ostream.append("file3: ");
643                 ostream.append("abc");
644                 ostream.append(["def", "ghi", "jkl"]);
645                 ostream.appendln("100");
646                 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
647                 ostream.appendln();
648             }
649         }
650         assert(filepath3.readText == "file3: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
651     }
652 
653     /* With an Appender. */
654     import std.array : appender;
655     auto app1 = appender!(char[]);
656     {
657         auto ostream = BufferedOutputRange!(typeof(app1))(app1);
658         ostream.append("appender1: ");
659         ostream.append("abc");
660         ostream.append(["def", "ghi", "jkl"]);
661         ostream.appendln("100");
662         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
663         ostream.appendln();
664     }
665     assert(app1.data == "appender1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
666 
667     /* With an Appender, but checking flush boundaries. */
668     auto app2 = appender!(char[]);
669     {
670         auto ostream = BufferedOutputRange!(typeof(app2))(app2, 10, 0); // Flush if 10+
671         bool wasFlushed = false;
672 
673         assert(app2.data == "");
674 
675         ostream.append("12345678"); // Not flushed yet.
676         assert(app2.data == "");
677 
678         wasFlushed = ostream.appendln;  // Nineth char, not flushed yet.
679         assert(!wasFlushed);
680         assert(app2.data == "");
681 
682         wasFlushed = ostream.appendln;  // Tenth char, now flushed.
683         assert(wasFlushed);
684         assert(app2.data == "12345678\n\n");
685 
686         app2.clear;
687         assert(app2.data == "");
688 
689         ostream.append("12345678");
690 
691         wasFlushed = ostream.flushIfFull;
692         assert(!wasFlushed);
693         assert(app2.data == "");
694 
695         ostream.flush;
696         assert(app2.data == "12345678");
697 
698         app2.clear;
699         assert(app2.data == "");
700 
701         ostream.append("123456789012345");
702         assert(app2.data == "");
703     }
704     assert(app2.data == "123456789012345");
705 
706     /* Using joinAppend. */
707     auto app1b = appender!(char[]);
708     {
709         auto ostream = BufferedOutputRange!(typeof(app1b))(app1b);
710         ostream.append("appenderB: ");
711         ostream.joinAppend(["a", "bc", "def"], '-');
712         ostream.append(':');
713         ostream.joinAppend(["g", "hi", "jkl"], '-');
714         ostream.appendln("*100*");
715         ostream.joinAppend(iota(0, 6).map!(x => x.to!string), ' ');
716         ostream.append(' ');
717         ostream.joinAppend(iota(6, 10).map!(x => x.to!string), " ");
718         ostream.appendln();
719     }
720     assert(app1b.data == "appenderB: a-bc-def:g-hi-jkl*100*\n0 1 2 3 4 5 6 7 8 9\n",
721            "app1b.data: |" ~app1b.data ~ "|");
722 
723     /* Operating as an output range. When passed to a function as a ref, exiting
724      * the function does not flush. When passed as a value, it get flushed when
725      * the function returns. Also test both UCFS and non-UFCS styles.
726      */
727 
728     void outputStuffAsRef(T)(ref T range)
729     if (isOutputRange!(T, char))
730     {
731         range.put('1');
732         put(range, "23");
733         range.put('\n');
734         range.put(["5", "67"]);
735         put(range, iota(8, 10).map!(x => x.to!string));
736         put(range, "\n");
737     }
738 
739     void outputStuffAsVal(T)(T range)
740     if (isOutputRange!(T, char))
741     {
742         put(range, '1');
743         range.put("23");
744         put(range, '\n');
745         put(range, ["5", "67"]);
746         range.put(iota(8, 10).map!(x => x.to!string));
747         range.put("\n");
748     }
749 
750     auto app3 = appender!(char[]);
751     {
752         auto ostream = BufferedOutputRange!(typeof(app3))(app3, 12, 0);
753         outputStuffAsRef(ostream);
754         assert(app3.data == "", "app3.data: |" ~app3.data ~ "|");
755         outputStuffAsRef(ostream);
756         assert(app3.data == "123\n56789\n123\n", "app3.data: |" ~app3.data ~ "|");
757     }
758     assert(app3.data == "123\n56789\n123\n56789\n", "app3.data: |" ~app3.data ~ "|");
759 
760     auto app4 = appender!(char[]);
761     {
762         auto ostream = BufferedOutputRange!(typeof(app4))(app4, 12, 0);
763         outputStuffAsVal(ostream);
764         assert(app4.data == "123\n56789\n", "app4.data: |" ~app4.data ~ "|");
765         outputStuffAsVal(ostream);
766         assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|");
767     }
768     assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|");
769 
770     /* Test maxSize. */
771     auto app5 = appender!(char[]);
772     {
773         auto ostream = BufferedOutputRange!(typeof(app5))(app5, 5, 0, 10); // maxSize 10
774         assert(app5.data == "");
775 
776         ostream.append("1234567");  // Not flushed yet (no newline).
777         assert(app5.data == "");
778 
779         ostream.append("89012");    // Flushed by maxSize
780         assert(app5.data == "123456789012");
781 
782         ostream.put("1234567");     // Not flushed yet (no newline).
783         assert(app5.data == "123456789012");
784 
785         ostream.put("89012");       // Flushed by maxSize
786         assert(app5.data == "123456789012123456789012");
787 
788         ostream.joinAppend(["ab", "cd"], '-');        // Not flushed yet
789         ostream.joinAppend(["de", "gh", "ij"], '-');  // Flushed by maxSize
790         assert(app5.data == "123456789012123456789012ab-cdde-gh-ij");
791     }
792     assert(app5.data == "123456789012123456789012ab-cdde-gh-ij");
793 }
794 
795 /**
796 isFlushableOutputRange returns true if R is an output range with a flush member.
797 */
798 enum bool isFlushableOutputRange(R, E=char) = isOutputRange!(R, E)
799     && is(ReturnType!((R r) => r.flush) == void);
800 
801 @safe unittest
802 {
803     import std.array;
804     auto app = appender!(char[]);
805     auto ostream = BufferedOutputRange!(typeof(app))(app, 5, 0, 10); // maxSize 10
806 
807     static assert(isOutputRange!(typeof(app), char));
808     static assert(!isFlushableOutputRange!(typeof(app), char));
809     static assert(!isFlushableOutputRange!(typeof(app)));
810 
811     static assert(isOutputRange!(typeof(ostream), char));
812     static assert(isFlushableOutputRange!(typeof(ostream), char));
813     static assert(isFlushableOutputRange!(typeof(ostream)));
814 
815     static assert(isOutputRange!(Appender!string, string));
816     static assert(!isFlushableOutputRange!(Appender!string, string));
817     static assert(!isFlushableOutputRange!(Appender!string));
818 
819     static assert(isOutputRange!(Appender!(char[]), char));
820     static assert(!isFlushableOutputRange!(Appender!(char[]), char));
821     static assert(!isFlushableOutputRange!(Appender!(char[])));
822 
823     static assert(isOutputRange!(BufferedOutputRange!(Appender!(char[])), char));
824     static assert(isFlushableOutputRange!(BufferedOutputRange!(Appender!(char[]))));
825     static assert(isFlushableOutputRange!(BufferedOutputRange!(Appender!(char[])), char));
826 }
827 
828 
829 /**
830 bufferedByLine is a performance enhancement over std.stdio.File.byLine. It works by
831 reading a large buffer from the input stream rather than just a single line.
832 
833 The file argument needs to be a File object open for reading, typically a filesystem
834 file or standard input. Use the Yes.keepTerminator template parameter to keep the
835 newline. This is similar to stdio.File.byLine, except specified as a template paramter
836 rather than a runtime parameter.
837 
838 Reading in blocks does mean that input is not read until a full buffer is available or
839 end-of-file is reached. For this reason, bufferedByLine is not appropriate for
840 interactive input.
841 */
842 
843 auto bufferedByLine(KeepTerminator keepTerminator = No.keepTerminator, Char = char,
844                     ubyte terminator = '\n', size_t readSize = 1024 * 128, size_t growSize = 1024 * 16)
845     (File file)
846 if (is(Char == char) || is(Char == ubyte))
847 {
848     static assert(0 < growSize && growSize <= readSize);
849 
850     static final class BufferedByLineImpl
851     {
852         /* Buffer state variables
853          *   - _buffer.length - Full length of allocated buffer.
854          *   - _dataEnd - End of currently valid data (end of last read).
855          *   - _lineStart - Start of current line.
856          *   - _lineEnd - End of current line.
857          */
858         private File _file;
859         private ubyte[] _buffer;
860         private size_t _lineStart = 0;
861         private size_t _lineEnd = 0;
862         private size_t _dataEnd = 0;
863 
864         this (File f)
865         {
866             _file = f;
867             _buffer = new ubyte[readSize + growSize];
868         }
869 
870         bool empty() const pure
871         {
872             return _file.eof && _lineStart == _dataEnd;
873         }
874 
875         Char[] front() pure
876         {
877             assert(!empty, "Attempt to take the front of an empty bufferedByLine.");
878 
879             static if (keepTerminator == Yes.keepTerminator)
880             {
881                 return cast(Char[]) _buffer[_lineStart .. _lineEnd];
882             }
883             else
884             {
885                 assert(_lineStart < _lineEnd);
886                 immutable end = (_buffer[_lineEnd - 1] == terminator) ? _lineEnd - 1 : _lineEnd;
887                 return cast(Char[]) _buffer[_lineStart .. end];
888             }
889         }
890 
891         /* Note: Call popFront at initialization to do the initial read. */
892         void popFront()
893         {
894             import std.algorithm: copy, find;
895             assert(!empty, "Attempt to popFront an empty bufferedByLine.");
896 
897             /* Pop the current line. */
898             _lineStart = _lineEnd;
899 
900             /* Set up the next line if more data is available, either in the buffer or
901              * the file. The next line ends at the next newline, if there is one.
902              *
903              * Notes:
904              * - 'find' returns the slice starting with the character searched for, or
905              *   an empty range if not found.
906              * - _lineEnd is set to _dataEnd both when the current buffer does not have
907              *   a newline and when it ends with one.
908              */
909             auto found = _buffer[_lineStart .. _dataEnd].find(terminator);
910             _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1;
911 
912             if (found.empty && !_file.eof)
913             {
914                 /* No newline in current buffer. Read from the file until the next
915                  * newline is found.
916                  */
917                 assert(_lineEnd == _dataEnd);
918 
919                 if (_lineStart > 0)
920                 {
921                     /* Move remaining data to the start of the buffer. */
922                     immutable remainingLength = _dataEnd - _lineStart;
923                     copy(_buffer[_lineStart .. _dataEnd], _buffer[0 .. remainingLength]);
924                     _lineStart = 0;
925                     _lineEnd = _dataEnd = remainingLength;
926                 }
927 
928                 do
929                 {
930                     /* Grow the buffer if necessary. */
931                     immutable availableSize = _buffer.length - _dataEnd;
932                     if (availableSize < readSize)
933                     {
934                         size_t growBy = growSize;
935                         while (availableSize + growBy < readSize) growBy += growSize;
936                         _buffer.length += growBy;
937                     }
938 
939                     /* Read the next block. */
940                     _dataEnd +=
941                         _file.rawRead(_buffer[_dataEnd .. _dataEnd + readSize])
942                         .length;
943 
944                     found = _buffer[_lineEnd .. _dataEnd].find(terminator);
945                     _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1;
946 
947                 } while (found.empty && !_file.eof);
948             }
949         }
950     }
951 
952     assert(file.isOpen, "bufferedByLine passed a closed file.");
953 
954     auto r = new BufferedByLineImpl(file);
955     if (!r.empty) r.popFront;
956     return r;
957 }
958 
959 // BufferedByLine.
960 unittest
961 {
962     import std.array : appender;
963     import std.conv : to;
964     import std.file : rmdirRecurse, readText;
965     import std.path : buildPath;
966     import std.range : lockstep;
967     import std.stdio;
968     import tsv_utils.common.unittest_utils;
969 
970     auto testDir = makeUnittestTempDir("tsv_utils_buffered_byline");
971     scope(exit) testDir.rmdirRecurse;
972 
973     /* Create two data files with the same data. Read both in parallel with byLine and
974      * bufferedByLine and compare each line.
975      */
976     auto data1 = appender!(char[])();
977 
978     foreach (i; 1 .. 1001) data1.put('\n');
979     foreach (i; 1 .. 1001) data1.put("a\n");
980     foreach (i; 1 .. 1001) { data1.put(i.to!string); data1.put('\n'); }
981     foreach (i; 1 .. 1001)
982     {
983         foreach (j; 1 .. i+1) data1.put('x');
984         data1.put('\n');
985     }
986 
987     string file1a = buildPath(testDir, "file1a.txt");
988     string file1b = buildPath(testDir, "file1b.txt");
989     {
990 
991         file1a.File("w").write(data1.data);
992         file1b.File("w").write(data1.data);
993     }
994 
995     /* Default parameters. */
996     {
997         auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator);
998         auto f1bIn = file1b.File().byLine(No.keepTerminator);
999         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1000     }
1001     {
1002         auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator);
1003         auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
1004         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1005     }
1006 
1007     /* Smaller read size. This will trigger buffer growth. */
1008     {
1009         auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', 512, 256);
1010         auto f1bIn = file1b.File().byLine(No.keepTerminator);
1011         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1012     }
1013 
1014     /* Exercise boundary cases in buffer growth.
1015      * Note: static-foreach requires DMD 2.076 / LDC 1.6
1016      */
1017     static foreach (readSize; [1, 2, 4])
1018     {
1019         static foreach (growSize; 1 .. readSize + 1)
1020         {{
1021             auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1022             auto f1bIn = file1b.File().byLine(No.keepTerminator);
1023             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1024         }}
1025         static foreach (growSize; 1 .. readSize + 1)
1026         {{
1027             auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1028             auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
1029             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1030         }}
1031     }
1032 
1033 
1034     /* Files that do not end in a newline. */
1035 
1036     string file2a = buildPath(testDir, "file2a.txt");
1037     string file2b = buildPath(testDir, "file2b.txt");
1038     string file3a = buildPath(testDir, "file3a.txt");
1039     string file3b = buildPath(testDir, "file3b.txt");
1040     string file4a = buildPath(testDir, "file4a.txt");
1041     string file4b = buildPath(testDir, "file4b.txt");
1042     {
1043         file1a.File("w").write("a");
1044         file1b.File("w").write("a");
1045         file2a.File("w").write("ab");
1046         file2b.File("w").write("ab");
1047         file3a.File("w").write("abc");
1048         file3b.File("w").write("abc");
1049     }
1050 
1051     static foreach (readSize; [1, 2, 4])
1052     {
1053         static foreach (growSize; 1 .. readSize + 1)
1054         {{
1055             auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1056             auto f1bIn = file1b.File().byLine(No.keepTerminator);
1057             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1058 
1059             auto f2aIn = file2a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1060             auto f2bIn = file2b.File().byLine(No.keepTerminator);
1061             foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1062 
1063             auto f3aIn = file3a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1064             auto f3bIn = file3b.File().byLine(No.keepTerminator);
1065             foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1066         }}
1067         static foreach (growSize; 1 .. readSize + 1)
1068         {{
1069             auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1070             auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
1071             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1072 
1073             auto f2aIn = file2a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1074             auto f2bIn = file2b.File().byLine(Yes.keepTerminator);
1075             foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1076 
1077             auto f3aIn = file3a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1078             auto f3bIn = file3b.File().byLine(Yes.keepTerminator);
1079             foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1080         }}
1081     }
1082 }
1083 
1084 /**
1085 joinAppend performs a join operation on an input range, appending the results to
1086 an output range.
1087 
1088 joinAppend was written as a performance enhancement over using std.algorithm.joiner
1089 or std.array.join with writeln. Using joiner with writeln is quite slow, 3-4x slower
1090 than std.array.join with writeln. The joiner performance may be due to interaction
1091 with writeln, this was not investigated. Using joiner with stdout.lockingTextWriter
1092 is better, but still substantially slower than join. Using join works reasonably well,
1093 but is allocating memory unnecessarily.
1094 
1095 Using joinAppend with Appender is a bit faster than join, and allocates less memory.
1096 The Appender re-uses the underlying data buffer, saving memory. The example below
1097 illustrates. It is a modification of the InputFieldReordering example. The role
1098 Appender plus joinAppend are playing is to buffer the output. BufferedOutputRange
1099 uses a similar technique to buffer multiple lines.
1100 
1101 Note: The original uses joinAppend have been replaced by BufferedOutputRange, which has
1102 its own joinAppend method. However, joinAppend remains useful when constructing internal
1103 buffers where BufferedOutputRange is not appropriate.
1104 
1105 ---
1106 int main(string[] args)
1107 {
1108     import tsvutil;
1109     import std.algorithm, std.array, std.range, std.stdio;
1110     size_t[] fieldIndicies = [3, 0, 2];
1111     auto fieldReordering = new InputFieldReordering!char(fieldIndicies);
1112     auto outputBuffer = appender!(char[]);
1113     foreach (line; stdin.byLine)
1114     {
1115         fieldReordering.initNewLine;
1116         foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate)
1117         {
1118             fieldReordering.processNextField(fieldIndex, fieldValue);
1119             if (fieldReordering.allFieldsFilled) break;
1120         }
1121         if (fieldReordering.allFieldsFilled)
1122         {
1123             outputBuffer.clear;
1124             writeln(fieldReordering.outputFields.joinAppend(outputBuffer, ('\t')));
1125         }
1126         else
1127         {
1128             writeln("Error: Insufficient number of field on the line.");
1129         }
1130     }
1131     return 0;
1132 }
1133 ---
1134 */
1135 OutputRange joinAppend(InputRange, OutputRange, E)
1136     (InputRange inputRange, ref OutputRange outputRange, E delimiter)
1137 if (isInputRange!InputRange &&
1138     (is(ElementType!InputRange : const E[]) &&
1139      isOutputRange!(OutputRange, E[]))
1140      ||
1141     (is(ElementType!InputRange : const E) &&
1142      isOutputRange!(OutputRange, E))
1143     )
1144 {
1145     if (!inputRange.empty)
1146     {
1147         outputRange.put(inputRange.front);
1148         inputRange.popFront;
1149     }
1150     foreach (x; inputRange)
1151     {
1152         outputRange.put(delimiter);
1153         outputRange.put(x);
1154     }
1155     return outputRange;
1156 }
1157 
1158 // joinAppend.
1159 @safe unittest
1160 {
1161     import std.array : appender;
1162     import std.algorithm : equal;
1163 
1164     char[] c1 = ['a', 'b', 'c'];
1165     char[] c2 = ['d', 'e', 'f'];
1166     char[] c3 = ['g', 'h', 'i'];
1167     auto cvec = [c1, c2, c3];
1168 
1169     auto s1 = "abc";
1170     auto s2 = "def";
1171     auto s3 = "ghi";
1172     auto svec = [s1, s2, s3];
1173 
1174     auto charAppender = appender!(char[])();
1175 
1176     assert(cvec.joinAppend(charAppender, '_').data == "abc_def_ghi");
1177     assert(equal(cvec, [c1, c2, c3]));
1178 
1179     charAppender.put('$');
1180     assert(svec.joinAppend(charAppender, '|').data == "abc_def_ghi$abc|def|ghi");
1181     assert(equal(cvec, [s1, s2, s3]));
1182 
1183     charAppender.clear;
1184     assert(svec.joinAppend(charAppender, '|').data == "abc|def|ghi");
1185 
1186     auto intAppender = appender!(int[])();
1187 
1188     auto i1 = [100, 101, 102];
1189     auto i2 = [200, 201, 202];
1190     auto i3 = [300, 301, 302];
1191     auto ivec = [i1, i2, i3];
1192 
1193     assert(ivec.joinAppend(intAppender, 0).data ==
1194            [100, 101, 102, 0, 200, 201, 202, 0, 300, 301, 302]);
1195 
1196     intAppender.clear;
1197     assert(i1.joinAppend(intAppender, 0).data ==
1198            [100, 0, 101, 0, 102]);
1199     assert(i2.joinAppend(intAppender, 1).data ==
1200            [100, 0, 101, 0, 102,
1201             200, 1, 201, 1, 202]);
1202     assert(i3.joinAppend(intAppender, 2).data ==
1203            [100, 0, 101, 0, 102,
1204             200, 1, 201, 1, 202,
1205             300, 2, 301, 2, 302]);
1206 }
1207 
1208 /**
1209 getTsvFieldValue extracts the value of a single field from a delimited text string.
1210 
1211 This is a convenience function intended for cases when only a single field from an
1212 input line is needed. If multiple values are needed, it will be more efficient to
1213 work directly with std.algorithm.splitter or the InputFieldReordering class.
1214 
1215 The input text is split by a delimiter character. The specified field is converted
1216 to the desired type and the value returned.
1217 
1218 An exception is thrown if there are not enough fields on the line or if conversion
1219 fails. Conversion is done with std.conv.to, it throws a std.conv.ConvException on
1220 failure. If not enough fields, the exception text is generated referencing 1-upped
1221 field numbers as would be provided by command line users.
1222  */
1223 T getTsvFieldValue(T, C)(const C[] line, size_t fieldIndex, C delim)
1224 if (isSomeChar!C)
1225 {
1226     import std.algorithm : splitter;
1227     import std.conv : to;
1228     import std.format : format;
1229     import std.range;
1230 
1231     auto splitLine = line.splitter(delim);
1232     size_t atField = 0;
1233 
1234     while (atField < fieldIndex && !splitLine.empty)
1235     {
1236         splitLine.popFront;
1237         atField++;
1238     }
1239 
1240     T val;
1241     if (splitLine.empty)
1242     {
1243         if (fieldIndex == 0)
1244         {
1245             /* This is a workaround to a splitter special case - If the input is empty,
1246              * the returned split range is empty. This doesn't properly represent a single
1247              * column file. More correct mathematically, and for this case, would be a
1248              * single value representing an empty string. The input line is a convenient
1249              * source of an empty line. Info:
1250              *   Bug: https://issues.dlang.org/show_bug.cgi?id=15735
1251              *   Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030
1252              */
1253             assert(line.empty);
1254             val = line.to!T;
1255         }
1256         else
1257         {
1258             throw new Exception(
1259                 format("Not enough fields on line. Number required: %d; Number found: %d",
1260                        fieldIndex + 1, atField));
1261         }
1262     }
1263     else
1264     {
1265         val = splitLine.front.to!T;
1266     }
1267 
1268     return val;
1269 }
1270 
1271 // getTsvFieldValue.
1272 @safe unittest
1273 {
1274     import std.conv : ConvException, to;
1275     import std.exception;
1276 
1277     /* Common cases. */
1278     assert(getTsvFieldValue!double("123", 0, '\t') == 123.0);
1279     assert(getTsvFieldValue!double("-10.5", 0, '\t') == -10.5);
1280     assert(getTsvFieldValue!size_t("abc|123", 1, '|') == 123);
1281     assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99);
1282     assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99);
1283     assert(getTsvFieldValue!string("紅\t红\t99", 2, '\t') == "99");
1284     assert(getTsvFieldValue!string("紅\t红\t99", 1, '\t') == "红");
1285     assert(getTsvFieldValue!string("紅\t红\t99", 0, '\t') == "紅");
1286     assert(getTsvFieldValue!string("红色和绿色\tred and green\t赤と緑\t10.5", 2, '\t') == "赤と緑");
1287     assert(getTsvFieldValue!double("红色和绿色\tred and green\t赤と緑\t10.5", 3, '\t') == 10.5);
1288 
1289     /* The empty field cases. */
1290     assert(getTsvFieldValue!string("", 0, '\t') == "");
1291     assert(getTsvFieldValue!string("\t", 0, '\t') == "");
1292     assert(getTsvFieldValue!string("\t", 1, '\t') == "");
1293     assert(getTsvFieldValue!string("", 0, ':') == "");
1294     assert(getTsvFieldValue!string(":", 0, ':') == "");
1295     assert(getTsvFieldValue!string(":", 1, ':') == "");
1296 
1297     /* Tests with different data types. */
1298     string stringLine = "orange and black\tნარინჯისფერი და შავი\t88.5";
1299     char[] charLine = "orange and black\tნარინჯისფერი და შავი\t88.5".to!(char[]);
1300     dchar[] dcharLine = stringLine.to!(dchar[]);
1301     wchar[] wcharLine = stringLine.to!(wchar[]);
1302 
1303     assert(getTsvFieldValue!string(stringLine, 0, '\t') == "orange and black");
1304     assert(getTsvFieldValue!string(stringLine, 1, '\t') == "ნარინჯისფერი და შავი");
1305     assert(getTsvFieldValue!wstring(stringLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1306     assert(getTsvFieldValue!double(stringLine, 2, '\t') == 88.5);
1307 
1308     assert(getTsvFieldValue!string(charLine, 0, '\t') == "orange and black");
1309     assert(getTsvFieldValue!string(charLine, 1, '\t') == "ნარინჯისფერი და შავი");
1310     assert(getTsvFieldValue!wstring(charLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1311     assert(getTsvFieldValue!double(charLine, 2, '\t') == 88.5);
1312 
1313     assert(getTsvFieldValue!string(dcharLine, 0, '\t') == "orange and black");
1314     assert(getTsvFieldValue!string(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი");
1315     assert(getTsvFieldValue!wstring(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1316     assert(getTsvFieldValue!double(dcharLine, 2, '\t') == 88.5);
1317 
1318     assert(getTsvFieldValue!string(wcharLine, 0, '\t') == "orange and black");
1319     assert(getTsvFieldValue!string(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი");
1320     assert(getTsvFieldValue!wstring(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1321     assert(getTsvFieldValue!double(wcharLine, 2, '\t') == 88.5);
1322 
1323     /* Conversion errors. */
1324     assertThrown!ConvException(getTsvFieldValue!double("", 0, '\t'));
1325     assertThrown!ConvException(getTsvFieldValue!double("abc", 0, '|'));
1326     assertThrown!ConvException(getTsvFieldValue!size_t("-1", 0, '|'));
1327     assertThrown!ConvException(getTsvFieldValue!size_t("a23|23.4", 1, '|'));
1328     assertThrown!ConvException(getTsvFieldValue!double("23.5|def", 1, '|'));
1329 
1330     /* Not enough field errors. These should throw, but not a ConvException.*/
1331     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("", 1, '\t')));
1332     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc", 1, '\t')));
1333     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc\tdef", 2, '\t')));
1334 }
1335 
1336 /** [Yes|No.newlineWasRemoved] is a template parameter to throwIfWindowsNewlineOnUnix.
1337  *  A Yes value indicates the Unix newline was already removed, as might be done via
1338  *  std.File.byLine or similar mechanism.
1339  */
1340 alias NewlineWasRemoved = Flag!"newlineWasRemoved";
1341 
1342 /**
1343 throwIfWindowsLineNewlineOnUnix is used to throw an exception if a Windows/DOS
1344 line ending is found on a build compiled for a Unix platform. This is used by
1345 the TSV Utilities to detect Window/DOS line endings and terminate processing
1346 with an error message to the user.
1347  */
1348 void throwIfWindowsNewlineOnUnix
1349     (NewlineWasRemoved nlWasRemoved = Yes.newlineWasRemoved)
1350     (const char[] line, const char[] filename, size_t lineNum)
1351 {
1352     version(Posix)
1353     {
1354         static if (nlWasRemoved)
1355         {
1356             immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r';
1357         }
1358         else
1359         {
1360             immutable bool hasWindowsLineEnding =
1361                 line.length > 1 &&
1362                 line[$ - 2] == '\r' &&
1363                 line[$ - 1] == '\n';
1364         }
1365 
1366         if (hasWindowsLineEnding)
1367         {
1368             import std.format;
1369             throw new Exception(
1370                 format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n  File: %s, Line: %s",
1371                        (filename == "-") ? "Standard Input" : filename, lineNum));
1372         }
1373     }
1374 }
1375 
1376 // throwIfWindowsNewlineOnUnix
1377 @safe unittest
1378 {
1379     /* Note: Currently only building on Posix. Need to add non-Posix test cases
1380      * if Windows builds are ever done.
1381      */
1382     version(Posix)
1383     {
1384         import std.exception;
1385 
1386         assertNotThrown(throwIfWindowsNewlineOnUnix("", "afile.tsv", 1));
1387         assertNotThrown(throwIfWindowsNewlineOnUnix("a", "afile.tsv", 2));
1388         assertNotThrown(throwIfWindowsNewlineOnUnix("ab", "afile.tsv", 3));
1389         assertNotThrown(throwIfWindowsNewlineOnUnix("abc", "afile.tsv", 4));
1390 
1391         assertThrown(throwIfWindowsNewlineOnUnix("\r", "afile.tsv", 1));
1392         assertThrown(throwIfWindowsNewlineOnUnix("a\r", "afile.tsv", 2));
1393         assertThrown(throwIfWindowsNewlineOnUnix("ab\r", "afile.tsv", 3));
1394         assertThrown(throwIfWindowsNewlineOnUnix("abc\r", "afile.tsv", 4));
1395 
1396         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\n", "afile.tsv", 1));
1397         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\n", "afile.tsv", 2));
1398         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3));
1399         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4));
1400 
1401         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5));
1402         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6));
1403         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7));
1404         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8));
1405 
1406         /* Standard Input formatting. */
1407         import std.algorithm : endsWith;
1408         bool exceptionCaught = false;
1409 
1410         try (throwIfWindowsNewlineOnUnix("\r", "-", 99));
1411         catch (Exception e)
1412         {
1413             assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1414             exceptionCaught = true;
1415         }
1416         finally
1417         {
1418             assert(exceptionCaught);
1419             exceptionCaught = false;
1420         }
1421 
1422         try (throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "-", 99));
1423         catch (Exception e)
1424         {
1425             assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1426             exceptionCaught = true;
1427         }
1428         finally
1429         {
1430             assert(exceptionCaught);
1431             exceptionCaught = false;
1432         }
1433     }
1434 }
1435 
1436 /** Flag used by InputSourceRange to determine if the header line should be when
1437 opening a file.
1438 */
1439 alias ReadHeader = Flag!"readHeader";
1440 
1441 /**
1442 inputSourceRange is a helper function for creating new InputSourceRange objects.
1443 */
1444 InputSourceRange inputSourceRange(string[] filepaths, ReadHeader readHeader)
1445 {
1446     return new InputSourceRange(filepaths, readHeader);
1447 }
1448 
1449 /**
1450 InputSourceRange is an input range that iterates over a set of input files.
1451 
1452 InputSourceRange is used to iterate over a set of files passed on the command line.
1453 Files are automatically opened and closed during iteration. The caller can choose to
1454 have header lines read automatically.
1455 
1456 The range is created from a set of filepaths. These filepaths are mapped to
1457 InputSource objects during the iteration. This is what enables automatically opening
1458 and closing files and reading the header line.
1459 
1460 The motivation for an InputSourceRange is to provide a standard way to look at the
1461 header line of the first input file during command line argument processing, and then
1462 pass the open input file and the header line along to the main processing functions.
1463 This enables a features like named fields to be implemented in a standard way.
1464 
1465 Both InputSourceRange and InputSource are reference objects. This keeps their use
1466 limited to a single iteration over the set of files. The files can be iterated again
1467 by creating a new InputSourceRange against the same filepaths.
1468 
1469 Currently, InputSourceRange supports files and standard input. It is possible other
1470 types of input sources will be added in the future.
1471  */
1472 final class InputSourceRange
1473 {
1474     import std.range;
1475 
1476     private string[] _filepaths;
1477     private ReadHeader _readHeader;
1478     private InputSource _front;
1479 
1480     this(string[] filepaths, ReadHeader readHeader)
1481     {
1482         _filepaths = filepaths.dup;
1483         _readHeader = readHeader;
1484         _front = null;
1485 
1486         if (!_filepaths.empty)
1487         {
1488             _front = new InputSource(_filepaths.front, _readHeader);
1489             _front.open;
1490             _filepaths.popFront;
1491         }
1492     }
1493 
1494     size_t length() const pure nothrow @safe
1495     {
1496         return empty ? 0 : _filepaths.length + 1;
1497     }
1498 
1499     bool empty() const pure nothrow @safe
1500     {
1501         return _front is null;
1502     }
1503 
1504     InputSource front() pure @safe
1505     {
1506         assert(!empty, "Attempt to take the front of an empty InputSourceRange");
1507         return _front;
1508     }
1509 
1510     void popFront()
1511     {
1512         assert(!empty, "Attempt to popFront an empty InputSourceRange");
1513 
1514         _front.close;
1515 
1516         if (!_filepaths.empty)
1517         {
1518             _front = new InputSource(_filepaths.front, _readHeader);
1519             _front.open;
1520             _filepaths.popFront;
1521         }
1522         else
1523         {
1524             _front = null;
1525         }
1526     }
1527 }
1528 
1529 /**
1530 InputSource is a class of objects produced by iterating over an InputSourceRange.
1531 
1532 An InputSource object provides access to the open file currently the front element
1533 of an InputSourceRange. The main methods application code is likely to need are:
1534 
1535 $(LIST
1536     * `file()` - Returns the File object. The file will be open for reading as long
1537       InputSource instance is the front element of the InputSourceRange it came from.
1538 
1539     * `header(KeepTerminator keepTerminator = No.keepTerminator)` - Returns the
1540       header line from the file. An empty string is returned if InputSource range
1541       was created with readHeader=false.
1542 
1543     * `name()` - The name of the input source. The name returned is intended for
1544       user error messages. For files, this is the filepath that was passed to
1545       InputSourceRange. For standard input, it is "Standard Input".
1546 )
1547 
1548 An InputSource is a reference object, so the copies will retain the state of the
1549 InputSourceRange front element. In particular, all copies will have the open
1550 state of the front element of the InputSourceRange.
1551 
1552 This class is not intended for use outside the context of an InputSourceRange.
1553 */
1554 final class InputSource
1555 {
1556     import std.range;
1557     import std.stdio;
1558 
1559     private immutable string _filepath;
1560     private immutable bool _isStdin;
1561     private bool _isOpen;
1562     private ReadHeader _readHeader;
1563     private bool _hasBeenOpened;
1564     private string _header;
1565     private File _file;
1566 
1567     private this(string filepath, ReadHeader readHeader) pure nothrow @safe
1568     {
1569         _filepath = filepath;
1570         _isStdin = filepath == "-";
1571         _isOpen = false;
1572         _readHeader = readHeader;
1573         _hasBeenOpened = false;
1574     }
1575 
1576     /** file returns the File object held by the InputSource.
1577      *
1578      * The File will be open for reading as long as the InputSource instance is the
1579      * front element of the InputSourceRange it came from.
1580      */
1581     File file() nothrow @safe
1582     {
1583         return _file;
1584     }
1585 
1586     /** isReadHeaderEnabled returns true if the header line is being read.
1587      */
1588     bool isReadHeaderEnabled() const pure nothrow @safe
1589     {
1590         return _readHeader == Yes.readHeader;
1591     }
1592 
1593     /** header returns the header line from the input file.
1594      *
1595      * An empty string is returned if InputSource range was created with
1596      * readHeader=false.
1597      */
1598     string header(KeepTerminator keepTerminator = No.keepTerminator) const pure nothrow @safe
1599     {
1600         assert(_hasBeenOpened);
1601         return (keepTerminator == Yes.keepTerminator ||
1602                 _header.length == 0 ||
1603                 _header[$ - 1] != '\n') ?
1604             _header : _header[0 .. $-1];
1605     }
1606 
1607     /** isHeaderEmpty returns true if there is no data for a header, including the
1608      * terminator.
1609      *
1610      * When headers are being read, this true only if the file is empty.
1611      */
1612     bool isHeaderEmpty() const pure nothrow @safe
1613     {
1614         assert(_hasBeenOpened);
1615         return _header.empty;
1616     }
1617 
1618     /** name returns a user friendly name representing the input source.
1619      *
1620      * For files, it is the filepath provided to InputSourceRange. For standard
1621      * input, it is "Standard Input". (Use isStdin() to test for standard input,
1622      * not name().
1623      */
1624     string name() const pure nothrow @safe
1625     {
1626         return _isStdin ? "Standard Input" : _filepath;
1627     }
1628 
1629     /** isStdin returns true if the input source is Standard Input, false otherwise.
1630     */
1631     bool isStdin() const pure nothrow @safe
1632     {
1633         return _isStdin;
1634     }
1635 
1636     /** isOpen returns true if the input source is open for reading, false otherwise.
1637      *
1638      * "Open" in this context is whether the InputSource object is currently open,
1639      * meaning that it is the front element of the InputSourceRange that created it.
1640      *
1641      * For files, this is also reflected in the state of the underlying File object.
1642      * However, standard input is never actually closed.
1643      */
1644     bool isOpen() const pure nothrow @safe
1645     {
1646         return _isOpen;
1647     }
1648 
1649     private void open()
1650     {
1651         assert(!_isOpen);
1652         assert(!_hasBeenOpened);
1653 
1654         _file = isStdin ? stdin : _filepath.File("rb");
1655         if (_readHeader) _header = _file.readln;
1656         _isOpen = true;
1657         _hasBeenOpened = true;
1658     }
1659 
1660     private void close()
1661     {
1662         if (!_isStdin) _file.close;
1663         _isOpen = false;
1664     }
1665 }
1666 
1667 // InputSourceRange and InputSource
1668 unittest
1669 {
1670     import std.algorithm : all, each;
1671     import std.array : appender;
1672     import std.exception : assertThrown;
1673     import std.file : rmdirRecurse;
1674     import std.path : buildPath;
1675     import std.range;
1676     import std.stdio;
1677     import tsv_utils.common.unittest_utils;
1678 
1679     auto testDir = makeUnittestTempDir("tsv_utils_input_source_range");
1680     scope(exit) testDir.rmdirRecurse;
1681 
1682     string file0 = buildPath(testDir, "file0.txt");
1683     string file1 = buildPath(testDir, "file1.txt");
1684     string file2 = buildPath(testDir, "file2.txt");
1685     string file3 = buildPath(testDir, "file3.txt");
1686 
1687     string file0Header = "";
1688     string file1Header = "file 1 header\n";
1689     string file2Header = "file 2 header\n";
1690     string file3Header = "file 3 header\n";
1691 
1692     string file0Body = "";
1693     string file1Body = "";
1694     string file2Body = "file 2 line 1\n";
1695     string file3Body = "file 3 line 1\nfile 3 line 2\n";
1696 
1697     string file0Data = file0Header ~ file0Body;
1698     string file1Data = file1Header ~ file1Body;
1699     string file2Data = file2Header ~ file2Body;
1700     string file3Data = file3Header ~ file3Body;
1701 
1702     {
1703         file0.File("w").write(file0Data);
1704         file1.File("w").write(file1Data);
1705         file2.File("w").write(file2Data);
1706         file3.File("w").write(file3Data);
1707     }
1708 
1709     auto inputFiles = [file0, file1, file2, file3];
1710     auto fileHeaders = [file0Header, file1Header, file2Header, file3Header];
1711     auto fileBodies = [file0Body, file1Body, file2Body, file3Body];
1712     auto fileData = [file0Data, file1Data, file2Data, file3Data];
1713 
1714     auto readSources = appender!(InputSource[]);
1715     auto buffer = new char[1024];    // Must be large enough to hold the test files.
1716 
1717     /* Tests without standard input. Don't want to count on state of standard
1718      * input or modifying it when doing unit tests, so avoid reading from it.
1719      */
1720 
1721     foreach(numFiles; 1 .. inputFiles.length + 1)
1722     {
1723         /* Reading headers. */
1724 
1725         readSources.clear;
1726         auto inputSourcesYesHeader = inputSourceRange(inputFiles[0 .. numFiles], Yes.readHeader);
1727         assert(inputSourcesYesHeader.length == numFiles);
1728 
1729         foreach(fileNum, source; inputSourcesYesHeader.enumerate)
1730         {
1731             readSources.put(source);
1732             assert(source.isOpen);
1733             assert(source.file.isOpen);
1734             assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen));
1735             assert(readSources.data[fileNum].isOpen);
1736 
1737             assert(source.header(Yes.keepTerminator) == fileHeaders[fileNum]);
1738 
1739             auto headerNoTerminatorLength = fileHeaders[fileNum].length;
1740             if (headerNoTerminatorLength > 0) --headerNoTerminatorLength;
1741             assert(source.header(No.keepTerminator) ==
1742                    fileHeaders[fileNum][0 .. headerNoTerminatorLength]);
1743 
1744             assert(source.name == inputFiles[fileNum]);
1745             assert(!source.isStdin);
1746             assert(source.isReadHeaderEnabled);
1747 
1748             assert(source.file.rawRead(buffer) == fileBodies[fileNum]);
1749         }
1750 
1751         /* The InputSourceRange is a reference range, consumed by the foreach. */
1752         assert(inputSourcesYesHeader.empty);
1753 
1754         /* Without reading headers. */
1755 
1756         readSources.clear;
1757         auto inputSourcesNoHeader = inputSourceRange(inputFiles[0 .. numFiles], No.readHeader);
1758         assert(inputSourcesNoHeader.length == numFiles);
1759 
1760         foreach(fileNum, source; inputSourcesNoHeader.enumerate)
1761         {
1762             readSources.put(source);
1763             assert(source.isOpen);
1764             assert(source.file.isOpen);
1765             assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen));
1766             assert(readSources.data[fileNum].isOpen);
1767 
1768             assert(source.header(Yes.keepTerminator).empty);
1769             assert(source.header(No.keepTerminator).empty);
1770 
1771             assert(source.name == inputFiles[fileNum]);
1772             assert(!source.isStdin);
1773             assert(!source.isReadHeaderEnabled);
1774 
1775             assert(source.file.rawRead(buffer) == fileData[fileNum]);
1776         }
1777 
1778         /* The InputSourceRange is a reference range, consumed by the foreach. */
1779         assert(inputSourcesNoHeader.empty);
1780     }
1781 
1782     /* Tests with standard input. No actual reading in these tests.
1783      */
1784 
1785     readSources.clear;
1786     foreach(fileNum, source; inputSourceRange(["-", "-"], No.readHeader).enumerate)
1787     {
1788         readSources.put(source);
1789         assert(source.isOpen);
1790         assert(source.file.isOpen);
1791         assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen));      // InputSource objects are "closed".
1792         assert(readSources.data[0 .. fileNum].all!(s => s.file.isOpen));  // Actual stdin should not be closed.
1793         assert(readSources.data[fileNum].isOpen);
1794 
1795         assert(source.header(Yes.keepTerminator).empty);
1796         assert(source.header(No.keepTerminator).empty);
1797 
1798         assert(source.name == "Standard Input");
1799         assert(source.isStdin);
1800     }
1801 
1802     /* Empty filelist. */
1803     string[] nofiles;
1804     {
1805         auto sources = inputSourceRange(nofiles, No.readHeader);
1806         assert(sources.empty);
1807     }
1808     {
1809         auto sources = inputSourceRange(nofiles, Yes.readHeader);
1810         assert(sources.empty);
1811     }
1812 
1813     /* Error cases. */
1814     assertThrown(inputSourceRange([file0, "no_such_file.txt"], No.readHeader).each);
1815     assertThrown(inputSourceRange(["no_such_file.txt", file1], Yes.readHeader).each);
1816 }
1817 
1818 /**
1819 byLineSourceRange is a helper function for creating new byLineSourceRange objects.
1820 */
1821 auto byLineSourceRange(
1822     KeepTerminator keepTerminator = No.keepTerminator, Char = char, ubyte terminator = '\n')
1823 (string[] filepaths)
1824 if (is(Char == char) || is(Char == ubyte))
1825 {
1826     return new ByLineSourceRange!(keepTerminator, Char, terminator)(filepaths);
1827 }
1828 
1829 /**
1830 ByLineSourceRange is an input range that iterates over a set of input files. It
1831 provides bufferedByLine access to each file.
1832 
1833 A ByLineSourceRange is used to iterate over a set of files passed on the command line.
1834 Files are automatically opened and closed during iteration. The front element of the
1835 range provides access to a bufferedByLine for iterating over the lines in the file.
1836 
1837 The range is created from a set of filepaths. These filepaths are mapped to
1838 ByLineSource objects during the iteration. This is what enables automatically opening
1839 and closing files and providing bufferedByLine access.
1840 
1841 The motivation behind ByLineSourceRange is to provide a standard way to look at the
1842 header line of the first input file during command line argument processing, and then
1843 pass the open input file along to the main processing functions. This enables
1844 features like named fields to be implemented in a standard way.
1845 
1846 Access to the first line of the first file is available after creating the
1847 ByLineSourceRange instance. The first file is opened and a bufferedByLine created.
1848 The first line of the first file is via byLine.front (after checking !byLine.empty).
1849 
1850 Both ByLineSourceRange and ByLineSource are reference objects. This keeps their use
1851 limited to a single iteration over the set of files. The files can be iterated again
1852 by creating a new InputSourceRange against the same filepaths.
1853 
1854 Currently, ByLineSourceRange supports files and standard input. It is possible other
1855 types of input sources will be added in the future.
1856  */
1857 final class ByLineSourceRange(
1858     KeepTerminator keepTerminator = No.keepTerminator, Char = char, ubyte terminator = '\n')
1859 if (is(Char == char) || is(Char == ubyte))
1860 {
1861     import std.range;
1862 
1863     alias ByLineSourceType = ByLineSource!(keepTerminator, char, terminator);
1864 
1865     private string[] _filepaths;
1866     private ByLineSourceType _front;
1867 
1868     this(string[] filepaths)
1869     {
1870         _filepaths = filepaths.dup;
1871         _front = null;
1872 
1873         if (!_filepaths.empty)
1874         {
1875             _front = new ByLineSourceType(_filepaths.front);
1876             _front.open;
1877             _filepaths.popFront;
1878         }
1879     }
1880 
1881     size_t length() const pure nothrow @safe
1882     {
1883         return empty ? 0 : _filepaths.length + 1;
1884     }
1885 
1886     bool empty() const pure nothrow @safe
1887     {
1888         return _front is null;
1889     }
1890 
1891     ByLineSourceType front() pure @safe
1892     {
1893         assert(!empty, "Attempt to take the front of an empty ByLineSourceRange");
1894         return _front;
1895     }
1896 
1897     void popFront()
1898     {
1899         assert(!empty, "Attempt to popFront an empty ByLineSourceRange");
1900 
1901         _front.close;
1902 
1903         if (!_filepaths.empty)
1904         {
1905             _front = new ByLineSourceType(_filepaths.front);
1906             _front.open;
1907             _filepaths.popFront;
1908         }
1909         else
1910         {
1911             _front = null;
1912         }
1913     }
1914 }
1915 
1916 /**
1917 ByLineSource is a class of objects produced by iterating over an ByLineSourceRange.
1918 
1919 A ByLineSource instance provides a bufferedByLine range for the current the front
1920 element of a ByLineSourceRange. The main methods application code is likely to
1921 need are:
1922 
1923 $(LIST
1924     * `byLine()` - Returns the bufferedByLine range accessing the open file. The file
1925        will be open for reading (using the bufferedByLine range) as long as the
1926        ByLineSource instance is the front element of the ByLineSourceRange
1927        it came from.
1928 
1929     * `name()` - The name of the input source. The name returned is intended for
1930       user error messages. For files, this is the filepath that was passed to
1931       ByLineSourceRange. For standard input, it is "Standard Input".
1932 )
1933 
1934 A ByLineSource is a reference object, so the copies have the same state as the
1935 ByLineSourceRange front element. In particular, all copies will have the open
1936 state of the front element of the ByLineSourceRange.
1937 
1938 This class is not intended for use outside the context of an ByLineSourceRange.
1939 */
1940 final class ByLineSource(
1941     KeepTerminator keepTerminator, Char = char, ubyte terminator = '\n')
1942 if (is(Char == char) || is(Char == ubyte))
1943 {
1944     import std.range;
1945     import std.stdio;
1946     import std.traits : ReturnType;
1947 
1948     alias newByLineFn = bufferedByLine!(keepTerminator, char, terminator);
1949     alias ByLineType = ReturnType!newByLineFn;
1950 
1951     private immutable string _filepath;
1952     private immutable bool _isStdin;
1953     private bool _isOpen;
1954     private bool _hasBeenOpened;
1955     private File _file;
1956     private ByLineType _byLineRange;
1957 
1958     private this(string filepath) pure nothrow @safe
1959     {
1960         _filepath = filepath;
1961         _isStdin = filepath == "-";
1962         _isOpen = false;
1963         _hasBeenOpened = false;
1964     }
1965 
1966     /** byLine returns the bufferedByLine object held by the ByLineSource instance.
1967      *
1968      * The File underlying the BufferedByLine object is open for reading as long as
1969      * the ByLineSource instance is the front element of the ByLineSourceRange it
1970      * came from.
1971      */
1972     ByLineType byLine() nothrow @safe
1973     {
1974         return _byLineRange;
1975     }
1976 
1977     /** name returns a user friendly name representing the underlying input source.
1978      *
1979      * For files, it is the filepath provided to ByLineSourceRange. For standard
1980      * input, it is "Standard Input". (Use isStdin() to test for standard input,
1981      * compare against name().)
1982      */
1983     string name() const pure nothrow @safe
1984     {
1985         return _isStdin ? "Standard Input" : _filepath;
1986     }
1987 
1988     /** isStdin returns true if the underlying input source is Standard Input, false
1989      * otherwise.
1990      */
1991     bool isStdin() const pure nothrow @safe
1992     {
1993         return _isStdin;
1994     }
1995 
1996     /** isOpen returns true if the ByLineSource instance is open for reading, false
1997      * otherwise.
1998      *
1999      * "Open" in this context is whether the ByLineSource object is currently "open".
2000      * The underlying input source backing it does not necessarily have the same
2001      * state. The ByLineSource instance is "open" if is the front element of the
2002      * ByLineSourceRange that created it.
2003      *
2004      * The underlying input source object follows the same open/close state as makes
2005      * sense. In particular, real files are closed when the ByLineSource object is
2006      * closed. The exception is standard input, which is never actually closed.
2007      */
2008     bool isOpen() const pure nothrow @safe
2009     {
2010         return _isOpen;
2011     }
2012 
2013     private void open()
2014     {
2015         assert(!_isOpen);
2016         assert(!_hasBeenOpened);
2017 
2018         _file = isStdin ? stdin : _filepath.File("rb");
2019         _byLineRange = newByLineFn(_file);
2020         _isOpen = true;
2021         _hasBeenOpened = true;
2022     }
2023 
2024     private void close()
2025     {
2026         if (!_isStdin) _file.close;
2027         _isOpen = false;
2028     }
2029 }
2030 
2031 // ByLineSourceRange and ByLineSource
2032 unittest
2033 {
2034     import std.algorithm : all, each;
2035     import std.array : appender;
2036     import std.exception : assertThrown;
2037     import std.file : rmdirRecurse;
2038     import std.path : buildPath;
2039     import std.range;
2040     import std.stdio;
2041     import tsv_utils.common.unittest_utils;
2042 
2043     auto testDir = makeUnittestTempDir("tsv_utils_byline_input_source_range");
2044     scope(exit) testDir.rmdirRecurse;
2045 
2046     string file0 = buildPath(testDir, "file0.txt");
2047     string file1 = buildPath(testDir, "file1.txt");
2048     string file2 = buildPath(testDir, "file2.txt");
2049     string file3 = buildPath(testDir, "file3.txt");
2050 
2051     string file0Header = "";
2052     string file1Header = "file 1 header\n";
2053     string file2Header = "file 2 header\n";
2054     string file3Header = "file 3 header\n";
2055 
2056     string file0Body = "";
2057     string file1Body = "";
2058     string file2Body = "file 2 line 1\n";
2059     string file3Body = "file 3 line 1\nfile 3 line 2\n";
2060 
2061     string file0Data = file0Header ~ file0Body;
2062     string file1Data = file1Header ~ file1Body;
2063     string file2Data = file2Header ~ file2Body;
2064     string file3Data = file3Header ~ file3Body;
2065 
2066     {
2067         file0.File("w").write(file0Data);
2068         file1.File("w").write(file1Data);
2069         file2.File("w").write(file2Data);
2070         file3.File("w").write(file3Data);
2071     }
2072 
2073     auto inputFiles = [file0, file1, file2, file3];
2074     auto fileHeaders = [file0Header, file1Header, file2Header, file3Header];
2075     auto fileBodies = [file0Body, file1Body, file2Body, file3Body];
2076     auto fileData = [file0Data, file1Data, file2Data, file3Data];
2077 
2078     auto buffer = new char[1024];    // Must be large enough to hold the test files.
2079 
2080     /* Tests without standard input. Don't want to count on state of standard
2081      * input or modifying it when doing unit tests, so avoid reading from it.
2082      */
2083 
2084     auto readSourcesNoTerminator = appender!(ByLineSource!(No.keepTerminator)[]);
2085     auto readSourcesYesTerminator = appender!(ByLineSource!(Yes.keepTerminator)[]);
2086 
2087     foreach(numFiles; 1 .. inputFiles.length + 1)
2088     {
2089         /* Using No.keepTerminator. */
2090         readSourcesNoTerminator.clear;
2091         auto inputSourcesNoTerminator = byLineSourceRange!(No.keepTerminator)(inputFiles[0 .. numFiles]);
2092         assert(inputSourcesNoTerminator.length == numFiles);
2093 
2094         foreach(fileNum, source; inputSourcesNoTerminator.enumerate)
2095         {
2096             readSourcesNoTerminator.put(source);
2097             assert(source.isOpen);
2098             assert(source._file.isOpen);
2099             assert(readSourcesNoTerminator.data[0 .. fileNum].all!(s => !s.isOpen));
2100             assert(readSourcesNoTerminator.data[fileNum].isOpen);
2101 
2102             auto headerNoTerminatorLength = fileHeaders[fileNum].length;
2103             if (headerNoTerminatorLength > 0) --headerNoTerminatorLength;
2104 
2105             assert(source.byLine.empty ||
2106                    source.byLine.front == fileHeaders[fileNum][0 .. headerNoTerminatorLength]);
2107 
2108             assert(source.name == inputFiles[fileNum]);
2109             assert(!source.isStdin);
2110 
2111             auto readFileData = appender!(char[]);
2112             foreach(line; source.byLine)
2113             {
2114                 readFileData.put(line);
2115                 readFileData.put('\n');
2116             }
2117 
2118             assert(readFileData.data == fileData[fileNum]);
2119         }
2120 
2121         /* The ByLineSourceRange is a reference range, consumed by the foreach. */
2122         assert(inputSourcesNoTerminator.empty);
2123 
2124         /* Using Yes.keepTerminator. */
2125         readSourcesYesTerminator.clear;
2126         auto inputSourcesYesTerminator = byLineSourceRange!(Yes.keepTerminator)(inputFiles[0 .. numFiles]);
2127         assert(inputSourcesYesTerminator.length == numFiles);
2128 
2129         foreach(fileNum, source; inputSourcesYesTerminator.enumerate)
2130         {
2131             readSourcesYesTerminator.put(source);
2132             assert(source.isOpen);
2133             assert(source._file.isOpen);
2134             assert(readSourcesYesTerminator.data[0 .. fileNum].all!(s => !s.isOpen));
2135             assert(readSourcesYesTerminator.data[fileNum].isOpen);
2136 
2137             assert(source.byLine.empty || source.byLine.front == fileHeaders[fileNum]);
2138 
2139             assert(source.name == inputFiles[fileNum]);
2140             assert(!source.isStdin);
2141 
2142             auto readFileData = appender!(char[]);
2143             foreach(line; source.byLine)
2144             {
2145                 readFileData.put(line);
2146             }
2147 
2148             assert(readFileData.data == fileData[fileNum]);
2149         }
2150 
2151         /* The ByLineSourceRange is a reference range, consumed by the foreach. */
2152         assert(inputSourcesYesTerminator.empty);
2153     }
2154 
2155     /* Empty filelist. */
2156     string[] nofiles;
2157     {
2158         auto sources = byLineSourceRange!(No.keepTerminator)(nofiles);
2159         assert(sources.empty);
2160     }
2161     {
2162         auto sources = byLineSourceRange!(Yes.keepTerminator)(nofiles);
2163         assert(sources.empty);
2164     }
2165 
2166     /* Error cases. */
2167     assertThrown(byLineSourceRange!(No.keepTerminator)([file0, "no_such_file.txt"]).each);
2168     assertThrown(byLineSourceRange!(Yes.keepTerminator)(["no_such_file.txt", file1]).each);
2169 }