tsv_utils.common.utils source code

1 /**
2 Utilities used by tsv-utils applications. InputFieldReordering, BufferedOutputRange,
3 and a several others.
4 
5 Utilities in this file:
6 $(LIST
7     * [InputFieldReordering] - A class that creates a reordered subset of fields from
8       an input line. Fields in the subset are accessed by array indicies. This is
9       especially useful when processing the subset in a specific order, such as the
10       order listed on the command-line at run-time.
11 
12     * [BufferedOutputRange] - An OutputRange with an internal buffer used to buffer
13       output. Intended for use with stdout, it is a significant performance benefit.
14 
15     * [bufferedByLine] - An input range that reads from a File handle line by line.
16       It is similar to the standard library method std.stdio.File.byLine, but quite a
17       bit faster. This is achieved by reading in larger blocks and buffering.
18 
19     * [InputSourceRange] - An input range that provides open file access to a set of
20       files. It is used to iterate over files passed as command line arguments. This
21       enable reading header line of a file during command line argument process, then
22       passing the open file to the main processing functions.
23 
24     * [ByLineSourceRange] - Similar to an InputSourceRange, except that it provides
25       access to a byLine iterator (bufferedByLine) rather than an open file. This is
26       used by tools that run the same processing logic both header non-header lines.
27 
28     * [joinAppend] - A function that performs a join, but appending the join output to
29       an output stream. It is a performance improvement over using join or joiner with
30       writeln.
31 
32     * [getTsvFieldValue] - A convenience function when only a single value is needed from
33       an input line.
34 
35     * Field-lists: [parseFieldList], [makeFieldListOptionHandler] - Helper functions for
36       parsing field-lists entered on the command line.
37 
38     * [throwIfWindowsNewlineOnUnix] - A utility for Unix platform builds to detecting
39       Windows newlines in input.
40 )
41 
42 Copyright (c) 2015-2020, eBay Inc.
43 Initially written by Jon Degenhardt
44 
45 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt)
46 */
47 
48 module tsv_utils.common.utils;
49 
50 import std.range;
51 import std.traits : isIntegral, isSomeChar, isSomeString, isUnsigned;
52 import std.typecons : Flag, No, Yes;
53 
54 // InputFieldReording class.
55 
56 /** Flag used by the InputFieldReordering template. */
57 alias EnablePartialLines = Flag!"enablePartialLines";
58 
59 /**
60 InputFieldReordering - Move select fields from an input line to an output array,
61 reordering along the way.
62 
63 The InputFieldReordering class is used to reorder a subset of fields from an input line.
64 The caller instantiates an InputFieldReordering object at the start of input processing.
65 The instance contains a mapping from input index to output index, plus a buffer holding
66 the reordered fields. The caller processes each input line by calling initNewLine,
67 splitting the line into fields, and calling processNextField on each field. The output
68 buffer is ready when the allFieldsFilled method returns true.
69 
70 Fields are not copied, instead the output buffer points to the fields passed by the caller.
71 The caller needs to use or copy the output buffer while the fields are still valid, which
72 is normally until reading the next input line. The program below illustrates the basic use
73 case. It reads stdin and outputs fields [3, 0, 2], in that order. (See also joinAppend,
74 below, which has a performance improvement over join used here.)
75 
76 ---
77 int main(string[] args)
78 {
79     import tsv_utils.common.utils;
80     import std.algorithm, std.array, std.range, std.stdio;
81     size_t[] fieldIndicies = [3, 0, 2];
82     auto fieldReordering = new InputFieldReordering!char(fieldIndicies);
83     foreach (line; stdin.byLine)
84     {
85         fieldReordering.initNewLine;
86         foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate)
87         {
88             fieldReordering.processNextField(fieldIndex, fieldValue);
89             if (fieldReordering.allFieldsFilled) break;
90         }
91         if (fieldReordering.allFieldsFilled)
92         {
93             writeln(fieldReordering.outputFields.join('\t'));
94         }
95         else
96         {
97             writeln("Error: Insufficient number of field on the line.");
98         }
99     }
100     return 0;
101 }
102 ---
103 
104 Field indicies are zero-based. An individual field can be listed multiple times. The
105 outputFields array is not valid until all the specified fields have been processed. The
106 allFieldsFilled method tests this. If a line does not have enough fields the outputFields
107 buffer cannot be used. For most TSV applications this is okay, as it means the line is
108 invalid and cannot be used. However, if partial lines are okay, the template can be
109 instantiated with EnablePartialLines.yes. This will ensure that any fields not filled-in
110 are empty strings in the outputFields return.
111 */
112 final class InputFieldReordering(C, EnablePartialLines partialLinesOk = EnablePartialLines.no)
113 if (isSomeChar!C)
114 {
115     /* Implementation: The class works by creating an array of tuples mapping the input
116      * field index to the location in the outputFields array. The 'fromToMap' array is
117      * sorted in input field order, enabling placement in the outputFields buffer during a
118      * pass over the input fields. The map is created by the constructor. An example:
119      *
120      *    inputFieldIndicies: [3, 0, 7, 7, 1, 0, 9]
121      *             fromToMap: [<0,1>, <0,5>, <1,4>, <3,0>, <7,2>, <7,3>, <9,6>]
122      *
123      * During processing of an a line, an array slice, mapStack, is used to track how
124      * much of the fromToMap remains to be processed.
125      */
126     import std.range;
127     import std.typecons : Tuple;
128 
129     alias TupleFromTo = Tuple!(size_t, "from", size_t, "to");
130 
131     private C[][] outputFieldsBuf;
132     private TupleFromTo[] fromToMap;
133     private TupleFromTo[] mapStack;
134 
135     final this(const ref size_t[] inputFieldIndicies, size_t start = 0) pure nothrow @safe
136     {
137         import std.algorithm : sort;
138 
139         outputFieldsBuf = new C[][](inputFieldIndicies.length);
140         fromToMap.reserve(inputFieldIndicies.length);
141 
142         foreach (to, from; inputFieldIndicies.enumerate(start))
143         {
144             fromToMap ~= TupleFromTo(from, to);
145         }
146 
147         sort(fromToMap);
148         initNewLine;
149     }
150 
151     /** initNewLine initializes the object for a new line. */
152     final void initNewLine() pure nothrow @safe
153     {
154         mapStack = fromToMap;
155         static if (partialLinesOk)
156         {
157             import std.algorithm : each;
158             outputFieldsBuf.each!((ref s) => s.length = 0);
159         }
160     }
161 
162     /** processNextField maps an input field to the correct locations in the
163      * outputFields array.
164      *
165      * processNextField should be called once for each field on the line, in the order
166      * found. The processing of the line can terminate once allFieldsFilled returns
167      * true.
168      *
169      * The return value is the number of output fields the input field maps to. Zero
170      * means the field is not mapped to the output fields array.
171      *
172      * If, prior to allFieldsProcessed returning true, any fields on the input line
173      * are not passed to processNextField, the caller should either ensure the fields
174      * are not part of the output fields or have partial lines enabled.
175      */
176     final size_t processNextField(size_t fieldIndex, C[] fieldValue) pure nothrow @safe @nogc
177     {
178         size_t numFilled = 0;
179         while (!mapStack.empty && fieldIndex == mapStack.front.from)
180         {
181             outputFieldsBuf[mapStack.front.to] = fieldValue;
182             mapStack.popFront;
183             numFilled++;
184         }
185         return numFilled;
186     }
187 
188     /** allFieldsFilled returned true if all fields expected have been processed. */
189     final bool allFieldsFilled() const pure nothrow @safe @nogc
190     {
191         return mapStack.empty;
192     }
193 
194     /** outputFields is the assembled output fields. Unless partial lines are enabled,
195      * it is only valid after allFieldsFilled is true.
196      */
197     final C[][] outputFields() pure nothrow @safe @nogc
198     {
199         return outputFieldsBuf[];
200     }
201 }
202 
203 // InputFieldReordering - Tests using different character types.
204 @safe unittest
205 {
206     import std.conv : to;
207 
208     auto inputLines = [["r1f0", "r1f1", "r1f2",   "r1f3"],
209                        ["r2f0", "abc",  "ÀBCßßZ", "ghi"],
210                        ["r3f0", "123",  "456",    "789"]];
211 
212     size_t[] fields_2_0 = [2, 0];
213 
214     auto expected_2_0 = [["r1f2",   "r1f0"],
215                          ["ÀBCßßZ", "r2f0"],
216                          ["456",    "r3f0"]];
217 
218     char[][][]  charExpected_2_0 = to!(char[][][])(expected_2_0);
219     wchar[][][] wcharExpected_2_0 = to!(wchar[][][])(expected_2_0);
220     dchar[][][] dcharExpected_2_0 = to!(dchar[][][])(expected_2_0);
221     dstring[][] dstringExpected_2_0 = to!(dstring[][])(expected_2_0);
222 
223     auto charIFR  = new InputFieldReordering!char(fields_2_0);
224     auto wcharIFR = new InputFieldReordering!wchar(fields_2_0);
225     auto dcharIFR = new InputFieldReordering!dchar(fields_2_0);
226 
227     foreach (lineIndex, line; inputLines)
228     {
229         charIFR.initNewLine;
230         wcharIFR.initNewLine;
231         dcharIFR.initNewLine;
232 
233         foreach (fieldIndex, fieldValue; line)
234         {
235             charIFR.processNextField(fieldIndex, to!(char[])(fieldValue));
236             wcharIFR.processNextField(fieldIndex, to!(wchar[])(fieldValue));
237             dcharIFR.processNextField(fieldIndex, to!(dchar[])(fieldValue));
238 
239             assert ((fieldIndex >= 2) == charIFR.allFieldsFilled);
240             assert ((fieldIndex >= 2) == wcharIFR.allFieldsFilled);
241             assert ((fieldIndex >= 2) == dcharIFR.allFieldsFilled);
242         }
243         assert(charIFR.allFieldsFilled);
244         assert(wcharIFR.allFieldsFilled);
245         assert(dcharIFR.allFieldsFilled);
246 
247         assert(charIFR.outputFields == charExpected_2_0[lineIndex]);
248         assert(wcharIFR.outputFields == wcharExpected_2_0[lineIndex]);
249         assert(dcharIFR.outputFields == dcharExpected_2_0[lineIndex]);
250     }
251 }
252 
253 // InputFieldReordering - Test of partial line support.
254 @safe unittest
255 {
256     import std.conv : to;
257 
258     auto inputLines = [["r1f0", "r1f1", "r1f2",   "r1f3"],
259                        ["r2f0", "abc",  "ÀBCßßZ", "ghi"],
260                        ["r3f0", "123",  "456",    "789"]];
261 
262     size_t[] fields_2_0 = [2, 0];
263 
264     // The expected states of the output field while each line and field are processed.
265     auto expectedBylineByfield_2_0 =
266         [
267             [["", "r1f0"], ["", "r1f0"], ["r1f2", "r1f0"],   ["r1f2", "r1f0"]],
268             [["", "r2f0"], ["", "r2f0"], ["ÀBCßßZ", "r2f0"], ["ÀBCßßZ", "r2f0"]],
269             [["", "r3f0"], ["", "r3f0"], ["456", "r3f0"],    ["456", "r3f0"]],
270         ];
271 
272     char[][][][]  charExpectedBylineByfield_2_0 = to!(char[][][][])(expectedBylineByfield_2_0);
273 
274     auto charIFR  = new InputFieldReordering!(char, EnablePartialLines.yes)(fields_2_0);
275 
276     foreach (lineIndex, line; inputLines)
277     {
278         charIFR.initNewLine;
279         foreach (fieldIndex, fieldValue; line)
280         {
281             charIFR.processNextField(fieldIndex, to!(char[])(fieldValue));
282             assert(charIFR.outputFields == charExpectedBylineByfield_2_0[lineIndex][fieldIndex]);
283         }
284     }
285 }
286 
287 // InputFieldReordering - Field combination tests.
288 @safe unittest
289 {
290     import std.conv : to;
291     import std.stdio;
292 
293     auto inputLines = [["00", "01", "02", "03"],
294                        ["10", "11", "12", "13"],
295                        ["20", "21", "22", "23"]];
296 
297     size_t[] fields_0 = [0];
298     size_t[] fields_3 = [3];
299     size_t[] fields_01 = [0, 1];
300     size_t[] fields_10 = [1, 0];
301     size_t[] fields_03 = [0, 3];
302     size_t[] fields_30 = [3, 0];
303     size_t[] fields_0123 = [0, 1, 2, 3];
304     size_t[] fields_3210 = [3, 2, 1, 0];
305     size_t[] fields_03001 = [0, 3, 0, 0, 1];
306 
307     auto expected_0 = to!(char[][][])([["00"],
308                                        ["10"],
309                                        ["20"]]);
310 
311     auto expected_3 = to!(char[][][])([["03"],
312                                        ["13"],
313                                        ["23"]]);
314 
315     auto expected_01 = to!(char[][][])([["00", "01"],
316                                         ["10", "11"],
317                                         ["20", "21"]]);
318 
319     auto expected_10 = to!(char[][][])([["01", "00"],
320                                         ["11", "10"],
321                                         ["21", "20"]]);
322 
323     auto expected_03 = to!(char[][][])([["00", "03"],
324                                         ["10", "13"],
325                                         ["20", "23"]]);
326 
327     auto expected_30 = to!(char[][][])([["03", "00"],
328                                         ["13", "10"],
329                                         ["23", "20"]]);
330 
331     auto expected_0123 = to!(char[][][])([["00", "01", "02", "03"],
332                                           ["10", "11", "12", "13"],
333                                           ["20", "21", "22", "23"]]);
334 
335     auto expected_3210 = to!(char[][][])([["03", "02", "01", "00"],
336                                           ["13", "12", "11", "10"],
337                                           ["23", "22", "21", "20"]]);
338 
339     auto expected_03001 = to!(char[][][])([["00", "03", "00", "00", "01"],
340                                            ["10", "13", "10", "10", "11"],
341                                            ["20", "23", "20", "20", "21"]]);
342 
343     auto ifr_0 = new InputFieldReordering!char(fields_0);
344     auto ifr_3 = new InputFieldReordering!char(fields_3);
345     auto ifr_01 = new InputFieldReordering!char(fields_01);
346     auto ifr_10 = new InputFieldReordering!char(fields_10);
347     auto ifr_03 = new InputFieldReordering!char(fields_03);
348     auto ifr_30 = new InputFieldReordering!char(fields_30);
349     auto ifr_0123 = new InputFieldReordering!char(fields_0123);
350     auto ifr_3210 = new InputFieldReordering!char(fields_3210);
351     auto ifr_03001 = new InputFieldReordering!char(fields_03001);
352 
353     foreach (lineIndex, line; inputLines)
354     {
355         ifr_0.initNewLine;
356         ifr_3.initNewLine;
357         ifr_01.initNewLine;
358         ifr_10.initNewLine;
359         ifr_03.initNewLine;
360         ifr_30.initNewLine;
361         ifr_0123.initNewLine;
362         ifr_3210.initNewLine;
363         ifr_03001.initNewLine;
364 
365         foreach (fieldIndex, fieldValue; line)
366         {
367             ifr_0.processNextField(fieldIndex, to!(char[])(fieldValue));
368             ifr_3.processNextField(fieldIndex, to!(char[])(fieldValue));
369             ifr_01.processNextField(fieldIndex, to!(char[])(fieldValue));
370             ifr_10.processNextField(fieldIndex, to!(char[])(fieldValue));
371             ifr_03.processNextField(fieldIndex, to!(char[])(fieldValue));
372             ifr_30.processNextField(fieldIndex, to!(char[])(fieldValue));
373             ifr_0123.processNextField(fieldIndex, to!(char[])(fieldValue));
374             ifr_3210.processNextField(fieldIndex, to!(char[])(fieldValue));
375             ifr_03001.processNextField(fieldIndex, to!(char[])(fieldValue));
376         }
377 
378         assert(ifr_0.outputFields == expected_0[lineIndex]);
379         assert(ifr_3.outputFields == expected_3[lineIndex]);
380         assert(ifr_01.outputFields == expected_01[lineIndex]);
381         assert(ifr_10.outputFields == expected_10[lineIndex]);
382         assert(ifr_03.outputFields == expected_03[lineIndex]);
383         assert(ifr_30.outputFields == expected_30[lineIndex]);
384         assert(ifr_0123.outputFields == expected_0123[lineIndex]);
385         assert(ifr_3210.outputFields == expected_3210[lineIndex]);
386         assert(ifr_03001.outputFields == expected_03001[lineIndex]);
387     }
388 }
389 
390 
391 import std.stdio : File, isFileHandle, KeepTerminator;
392 import std.range : isOutputRange;
393 import std.traits : Unqual;
394 
395 /**
396 BufferedOutputRange is a performance enhancement over writing directly to an output
397 stream. It holds a File open for write or an OutputRange. Ouput is accumulated in an
398 internal buffer and written to the output stream as a block.
399 
400 Writing to stdout is a key use case. BufferedOutputRange is often dramatically faster
401 than writing to stdout directly. This is especially noticable for outputs with short
402 lines, as it blocks many writes together in a single write.
403 
404 The internal buffer is written to the output stream after flushSize has been reached.
405 This is checked at newline boundaries, when appendln is called or when put is called
406 with a single newline character. Other writes check maxSize, which is used to avoid
407 runaway buffers.
408 
409 
410 BufferedOutputRange has a put method allowing it to be used a range. It has a number
411 of other methods providing additional control.
412 
413 $(LIST
414     * `this(outputStream [, flushSize, reserveSize, maxSize])` - Constructor. Takes the
415       output stream, e.g. stdout. Other arguments are optional, defaults normally suffice.
416 
417     * `append(stuff)` - Append to the internal buffer.
418 
419     * `appendln(stuff)` - Append to the internal buffer, followed by a newline. The buffer
420       is flushed to the output stream if is has reached flushSize.
421 
422     * `appendln()` - Append a newline to the internal buffer. The buffer is flushed to the
423       output stream if is has reached flushSize.
424 
425     * `joinAppend(inputRange, delim)` - An optimization of `append(inputRange.joiner(delim))`.
426       For reasons that are not clear, joiner is quite slow.
427 
428     * `flushIfFull()` - Flush the internal buffer to the output stream if flushSize has been
429       reached.
430 
431     * `flush()` - Write the internal buffer to the output stream.
432 
433     * `put(stuff)` - Appends to the internal buffer. Acts as `appendln()` if passed a single
434       newline character, '\n' or "\n".
435 )
436 
437 The internal buffer is automatically flushed when the BufferedOutputRange goes out of
438 scope.
439 */
440 struct BufferedOutputRange(OutputTarget)
441 if (isFileHandle!(Unqual!OutputTarget) || isOutputRange!(Unqual!OutputTarget, char))
442 {
443     import std.range : isOutputRange;
444     import std.array : appender;
445     import std.format : format;
446 
447     /* Identify the output element type. Only supporting char and ubyte for now. */
448     static if (isFileHandle!OutputTarget || isOutputRange!(OutputTarget, char))
449     {
450         alias C = char;
451     }
452     else static if (isOutputRange!(OutputTarget, ubyte))
453     {
454         alias C = ubyte;
455     }
456     else static assert(false);
457 
458     private enum defaultReserveSize = 11264;
459     private enum defaultFlushSize = 10240;
460     private enum defaultMaxSize = 4194304;
461 
462     private OutputTarget _outputTarget;
463     private auto _outputBuffer = appender!(C[]);
464     private immutable size_t _flushSize;
465     private immutable size_t _maxSize;
466 
467     this(OutputTarget outputTarget,
468          size_t flushSize = defaultFlushSize,
469          size_t reserveSize = defaultReserveSize,
470          size_t maxSize = defaultMaxSize)
471     {
472         assert(flushSize <= maxSize);
473 
474         _outputTarget = outputTarget;
475         _flushSize = flushSize;
476         _maxSize = (flushSize <= maxSize) ? maxSize : flushSize;
477         _outputBuffer.reserve(reserveSize);
478     }
479 
480     ~this()
481     {
482         flush();
483     }
484 
485     void flush()
486     {
487         static if (isFileHandle!OutputTarget) _outputTarget.write(_outputBuffer.data);
488         else _outputTarget.put(_outputBuffer.data);
489 
490         _outputBuffer.clear;
491     }
492 
493     bool flushIfFull()
494     {
495         bool isFull = _outputBuffer.data.length >= _flushSize;
496         if (isFull) flush();
497         return isFull;
498     }
499 
500     /* flushIfMaxSize is a safety check to avoid runaway buffer growth. */
501     void flushIfMaxSize()
502     {
503         if (_outputBuffer.data.length >= _maxSize) flush();
504     }
505 
506     /* maybeFlush is intended for the case where put is called with a trailing newline.
507      *
508      * Flushing occurs if the buffer has a trailing newline and has reached flush size.
509      * Flushing also occurs if the buffer has reached max size.
510      */
511     private bool maybeFlush()
512     {
513         immutable bool doFlush =
514             _outputBuffer.data.length >= _flushSize &&
515             (_outputBuffer.data[$-1] == '\n' || _outputBuffer.data.length >= _maxSize);
516 
517         if (doFlush) flush();
518         return doFlush;
519     }
520 
521 
522     private void appendRaw(T)(T stuff) pure
523     {
524         import std.range : rangePut = put;
525         rangePut(_outputBuffer, stuff);
526     }
527 
528     void append(T)(T stuff)
529     {
530         appendRaw(stuff);
531         maybeFlush();
532     }
533 
534     bool appendln()
535     {
536         appendRaw('\n');
537         return flushIfFull();
538     }
539 
540     bool appendln(T)(T stuff)
541     {
542         appendRaw(stuff);
543         return appendln();
544     }
545 
546     /* joinAppend is an optimization of append(inputRange.joiner(delimiter).
547      * This form is quite a bit faster, 40%+ on some benchmarks.
548      */
549     void joinAppend(InputRange, E)(InputRange inputRange, E delimiter)
550     if (isInputRange!InputRange &&
551         is(ElementType!InputRange : const C[]) &&
552         (is(E : const C[]) || is(E : const C)))
553     {
554         if (!inputRange.empty)
555         {
556             appendRaw(inputRange.front);
557             inputRange.popFront;
558         }
559         foreach (x; inputRange)
560         {
561             appendRaw(delimiter);
562             appendRaw(x);
563         }
564         flushIfMaxSize();
565     }
566 
567     /* Make this an output range. */
568     void put(T)(T stuff)
569     {
570         import std.traits;
571         import std.stdio;
572 
573         static if (isSomeChar!T)
574         {
575             if (stuff == '\n') appendln();
576             else appendRaw(stuff);
577         }
578         else static if (isSomeString!T)
579         {
580             if (stuff == "\n") appendln();
581             else append(stuff);
582         }
583         else append(stuff);
584     }
585 }
586 
587 // BufferedOutputRange.
588 unittest
589 {
590     import tsv_utils.common.unittest_utils;
591     import std.file : rmdirRecurse, readText;
592     import std.path : buildPath;
593 
594     auto testDir = makeUnittestTempDir("tsv_utils_buffered_output");
595     scope(exit) testDir.rmdirRecurse;
596 
597     import std.algorithm : map, joiner;
598     import std.range : iota;
599     import std.conv : to;
600 
601     /* Basic test. Note that exiting the scope triggers flush. */
602     string filepath1 = buildPath(testDir, "file1.txt");
603     {
604         import std.stdio : File;
605 
606         auto ostream = BufferedOutputRange!File(filepath1.File("w"));
607         ostream.append("file1: ");
608         ostream.append("abc");
609         ostream.append(["def", "ghi", "jkl"]);
610         ostream.appendln(100.to!string);
611         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
612         ostream.appendln();
613     }
614     assert(filepath1.readText == "file1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
615 
616     /* Test with no reserve and no flush at every line. */
617     string filepath2 = buildPath(testDir, "file2.txt");
618     {
619         import std.stdio : File;
620 
621         auto ostream = BufferedOutputRange!File(filepath2.File("w"), 0, 0);
622         ostream.append("file2: ");
623         ostream.append("abc");
624         ostream.append(["def", "ghi", "jkl"]);
625         ostream.appendln("100");
626         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
627         ostream.appendln();
628     }
629     assert(filepath2.readText == "file2: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
630 
631     /* With a locking text writer. Requires version 2.078.0
632        See: https://issues.dlang.org/show_bug.cgi?id=9661
633      */
634     static if (__VERSION__ >= 2078)
635     {
636         string filepath3 = buildPath(testDir, "file3.txt");
637         {
638             import std.stdio : File;
639 
640             auto ltw = filepath3.File("w").lockingTextWriter;
641             {
642                 auto ostream = BufferedOutputRange!(typeof(ltw))(ltw);
643                 ostream.append("file3: ");
644                 ostream.append("abc");
645                 ostream.append(["def", "ghi", "jkl"]);
646                 ostream.appendln("100");
647                 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
648                 ostream.appendln();
649             }
650         }
651         assert(filepath3.readText == "file3: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
652     }
653 
654     /* With an Appender. */
655     import std.array : appender;
656     auto app1 = appender!(char[]);
657     {
658         auto ostream = BufferedOutputRange!(typeof(app1))(app1);
659         ostream.append("appender1: ");
660         ostream.append("abc");
661         ostream.append(["def", "ghi", "jkl"]);
662         ostream.appendln("100");
663         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
664         ostream.appendln();
665     }
666     assert(app1.data == "appender1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
667 
668     /* With an Appender, but checking flush boundaries. */
669     auto app2 = appender!(char[]);
670     {
671         auto ostream = BufferedOutputRange!(typeof(app2))(app2, 10, 0); // Flush if 10+
672         bool wasFlushed = false;
673 
674         assert(app2.data == "");
675 
676         ostream.append("12345678"); // Not flushed yet.
677         assert(app2.data == "");
678 
679         wasFlushed = ostream.appendln;  // Nineth char, not flushed yet.
680         assert(!wasFlushed);
681         assert(app2.data == "");
682 
683         wasFlushed = ostream.appendln;  // Tenth char, now flushed.
684         assert(wasFlushed);
685         assert(app2.data == "12345678\n\n");
686 
687         app2.clear;
688         assert(app2.data == "");
689 
690         ostream.append("12345678");
691 
692         wasFlushed = ostream.flushIfFull;
693         assert(!wasFlushed);
694         assert(app2.data == "");
695 
696         ostream.flush;
697         assert(app2.data == "12345678");
698 
699         app2.clear;
700         assert(app2.data == "");
701 
702         ostream.append("123456789012345");
703         assert(app2.data == "");
704     }
705     assert(app2.data == "123456789012345");
706 
707     /* Using joinAppend. */
708     auto app1b = appender!(char[]);
709     {
710         auto ostream = BufferedOutputRange!(typeof(app1b))(app1b);
711         ostream.append("appenderB: ");
712         ostream.joinAppend(["a", "bc", "def"], '-');
713         ostream.append(':');
714         ostream.joinAppend(["g", "hi", "jkl"], '-');
715         ostream.appendln("*100*");
716         ostream.joinAppend(iota(0, 6).map!(x => x.to!string), ' ');
717         ostream.append(' ');
718         ostream.joinAppend(iota(6, 10).map!(x => x.to!string), " ");
719         ostream.appendln();
720     }
721     assert(app1b.data == "appenderB: a-bc-def:g-hi-jkl*100*\n0 1 2 3 4 5 6 7 8 9\n",
722            "app1b.data: |" ~app1b.data ~ "|");
723 
724     /* Operating as an output range. When passed to a function as a ref, exiting
725      * the function does not flush. When passed as a value, it get flushed when
726      * the function returns. Also test both UCFS and non-UFCS styles.
727      */
728 
729     void outputStuffAsRef(T)(ref T range)
730     if (isOutputRange!(T, char))
731     {
732         range.put('1');
733         put(range, "23");
734         range.put('\n');
735         range.put(["5", "67"]);
736         put(range, iota(8, 10).map!(x => x.to!string));
737         put(range, "\n");
738     }
739 
740     void outputStuffAsVal(T)(T range)
741     if (isOutputRange!(T, char))
742     {
743         put(range, '1');
744         range.put("23");
745         put(range, '\n');
746         put(range, ["5", "67"]);
747         range.put(iota(8, 10).map!(x => x.to!string));
748         range.put("\n");
749     }
750 
751     auto app3 = appender!(char[]);
752     {
753         auto ostream = BufferedOutputRange!(typeof(app3))(app3, 12, 0);
754         outputStuffAsRef(ostream);
755         assert(app3.data == "", "app3.data: |" ~app3.data ~ "|");
756         outputStuffAsRef(ostream);
757         assert(app3.data == "123\n56789\n123\n", "app3.data: |" ~app3.data ~ "|");
758     }
759     assert(app3.data == "123\n56789\n123\n56789\n", "app3.data: |" ~app3.data ~ "|");
760 
761     auto app4 = appender!(char[]);
762     {
763         auto ostream = BufferedOutputRange!(typeof(app4))(app4, 12, 0);
764         outputStuffAsVal(ostream);
765         assert(app4.data == "123\n56789\n", "app4.data: |" ~app4.data ~ "|");
766         outputStuffAsVal(ostream);
767         assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|");
768     }
769     assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|");
770 
771     /* Test maxSize. */
772     auto app5 = appender!(char[]);
773     {
774         auto ostream = BufferedOutputRange!(typeof(app5))(app5, 5, 0, 10); // maxSize 10
775         assert(app5.data == "");
776 
777         ostream.append("1234567");  // Not flushed yet (no newline).
778         assert(app5.data == "");
779 
780         ostream.append("89012");    // Flushed by maxSize
781         assert(app5.data == "123456789012");
782 
783         ostream.put("1234567");     // Not flushed yet (no newline).
784         assert(app5.data == "123456789012");
785 
786         ostream.put("89012");       // Flushed by maxSize
787         assert(app5.data == "123456789012123456789012");
788 
789         ostream.joinAppend(["ab", "cd"], '-');        // Not flushed yet
790         ostream.joinAppend(["de", "gh", "ij"], '-');  // Flushed by maxSize
791         assert(app5.data == "123456789012123456789012ab-cdde-gh-ij");
792     }
793     assert(app5.data == "123456789012123456789012ab-cdde-gh-ij");
794 }
795 
796 /**
797 bufferedByLine is a performance enhancement over std.stdio.File.byLine. It works by
798 reading a large buffer from the input stream rather than just a single line.
799 
800 The file argument needs to be a File object open for reading, typically a filesystem
801 file or standard input. Use the Yes.keepTerminator template parameter to keep the
802 newline. This is similar to stdio.File.byLine, except specified as a template paramter
803 rather than a runtime parameter.
804 
805 Reading in blocks does mean that input is not read until a full buffer is available or
806 end-of-file is reached. For this reason, bufferedByLine is not appropriate for
807 interactive input.
808 */
809 
810 auto bufferedByLine(KeepTerminator keepTerminator = No.keepTerminator, Char = char,
811                     ubyte terminator = '\n', size_t readSize = 1024 * 128, size_t growSize = 1024 * 16)
812     (File file)
813 if (is(Char == char) || is(Char == ubyte))
814 {
815     static assert(0 < growSize && growSize <= readSize);
816 
817     static final class BufferedByLineImpl
818     {
819         /* Buffer state variables
820          *   - _buffer.length - Full length of allocated buffer.
821          *   - _dataEnd - End of currently valid data (end of last read).
822          *   - _lineStart - Start of current line.
823          *   - _lineEnd - End of current line.
824          */
825         private File _file;
826         private ubyte[] _buffer;
827         private size_t _lineStart = 0;
828         private size_t _lineEnd = 0;
829         private size_t _dataEnd = 0;
830 
831         this (File f)
832         {
833             _file = f;
834             _buffer = new ubyte[readSize + growSize];
835         }
836 
837         bool empty() const pure
838         {
839             return _file.eof && _lineStart == _dataEnd;
840         }
841 
842         Char[] front() pure
843         {
844             assert(!empty, "Attempt to take the front of an empty bufferedByLine.");
845 
846             static if (keepTerminator == Yes.keepTerminator)
847             {
848                 return cast(Char[]) _buffer[_lineStart .. _lineEnd];
849             }
850             else
851             {
852                 assert(_lineStart < _lineEnd);
853                 immutable end = (_buffer[_lineEnd - 1] == terminator) ? _lineEnd - 1 : _lineEnd;
854                 return cast(Char[]) _buffer[_lineStart .. end];
855             }
856         }
857 
858         /* Note: Call popFront at initialization to do the initial read. */
859         void popFront()
860         {
861             import std.algorithm: copy, find;
862             assert(!empty, "Attempt to popFront an empty bufferedByLine.");
863 
864             /* Pop the current line. */
865             _lineStart = _lineEnd;
866 
867             /* Set up the next line if more data is available, either in the buffer or
868              * the file. The next line ends at the next newline, if there is one.
869              *
870              * Notes:
871              * - 'find' returns the slice starting with the character searched for, or
872              *   an empty range if not found.
873              * - _lineEnd is set to _dataEnd both when the current buffer does not have
874              *   a newline and when it ends with one.
875              */
876             auto found = _buffer[_lineStart .. _dataEnd].find(terminator);
877             _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1;
878 
879             if (found.empty && !_file.eof)
880             {
881                 /* No newline in current buffer. Read from the file until the next
882                  * newline is found.
883                  */
884                 assert(_lineEnd == _dataEnd);
885 
886                 if (_lineStart > 0)
887                 {
888                     /* Move remaining data to the start of the buffer. */
889                     immutable remainingLength = _dataEnd - _lineStart;
890                     copy(_buffer[_lineStart .. _dataEnd], _buffer[0 .. remainingLength]);
891                     _lineStart = 0;
892                     _lineEnd = _dataEnd = remainingLength;
893                 }
894 
895                 do
896                 {
897                     /* Grow the buffer if necessary. */
898                     immutable availableSize = _buffer.length - _dataEnd;
899                     if (availableSize < readSize)
900                     {
901                         size_t growBy = growSize;
902                         while (availableSize + growBy < readSize) growBy += growSize;
903                         _buffer.length += growBy;
904                     }
905 
906                     /* Read the next block. */
907                     _dataEnd +=
908                         _file.rawRead(_buffer[_dataEnd .. _dataEnd + readSize])
909                         .length;
910 
911                     found = _buffer[_lineEnd .. _dataEnd].find(terminator);
912                     _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1;
913 
914                 } while (found.empty && !_file.eof);
915             }
916         }
917     }
918 
919     assert(file.isOpen, "bufferedByLine passed a closed file.");
920 
921     auto r = new BufferedByLineImpl(file);
922     if (!r.empty) r.popFront;
923     return r;
924 }
925 
926 // BufferedByLine.
927 unittest
928 {
929     import std.array : appender;
930     import std.conv : to;
931     import std.file : rmdirRecurse, readText;
932     import std.path : buildPath;
933     import std.range : lockstep;
934     import std.stdio;
935     import tsv_utils.common.unittest_utils;
936 
937     auto testDir = makeUnittestTempDir("tsv_utils_buffered_byline");
938     scope(exit) testDir.rmdirRecurse;
939 
940     /* Create two data files with the same data. Read both in parallel with byLine and
941      * bufferedByLine and compare each line.
942      */
943     auto data1 = appender!(char[])();
944 
945     foreach (i; 1 .. 1001) data1.put('\n');
946     foreach (i; 1 .. 1001) data1.put("a\n");
947     foreach (i; 1 .. 1001) { data1.put(i.to!string); data1.put('\n'); }
948     foreach (i; 1 .. 1001)
949     {
950         foreach (j; 1 .. i+1) data1.put('x');
951         data1.put('\n');
952     }
953 
954     string file1a = buildPath(testDir, "file1a.txt");
955     string file1b = buildPath(testDir, "file1b.txt");
956     {
957 
958         file1a.File("w").write(data1.data);
959         file1b.File("w").write(data1.data);
960     }
961 
962     /* Default parameters. */
963     {
964         auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator);
965         auto f1bIn = file1b.File().byLine(No.keepTerminator);
966         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
967     }
968     {
969         auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator);
970         auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
971         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
972     }
973 
974     /* Smaller read size. This will trigger buffer growth. */
975     {
976         auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', 512, 256);
977         auto f1bIn = file1b.File().byLine(No.keepTerminator);
978         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
979     }
980 
981     /* Exercise boundary cases in buffer growth.
982      * Note: static-foreach requires DMD 2.076 / LDC 1.6
983      */
984     static foreach (readSize; [1, 2, 4])
985     {
986         static foreach (growSize; 1 .. readSize + 1)
987         {{
988             auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
989             auto f1bIn = file1b.File().byLine(No.keepTerminator);
990             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
991         }}
992         static foreach (growSize; 1 .. readSize + 1)
993         {{
994             auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
995             auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
996             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
997         }}
998     }
999 
1000 
1001     /* Files that do not end in a newline. */
1002 
1003     string file2a = buildPath(testDir, "file2a.txt");
1004     string file2b = buildPath(testDir, "file2b.txt");
1005     string file3a = buildPath(testDir, "file3a.txt");
1006     string file3b = buildPath(testDir, "file3b.txt");
1007     string file4a = buildPath(testDir, "file4a.txt");
1008     string file4b = buildPath(testDir, "file4b.txt");
1009     {
1010         file1a.File("w").write("a");
1011         file1b.File("w").write("a");
1012         file2a.File("w").write("ab");
1013         file2b.File("w").write("ab");
1014         file3a.File("w").write("abc");
1015         file3b.File("w").write("abc");
1016     }
1017 
1018     static foreach (readSize; [1, 2, 4])
1019     {
1020         static foreach (growSize; 1 .. readSize + 1)
1021         {{
1022             auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1023             auto f1bIn = file1b.File().byLine(No.keepTerminator);
1024             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1025 
1026             auto f2aIn = file2a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1027             auto f2bIn = file2b.File().byLine(No.keepTerminator);
1028             foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1029 
1030             auto f3aIn = file3a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1031             auto f3bIn = file3b.File().byLine(No.keepTerminator);
1032             foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1033         }}
1034         static foreach (growSize; 1 .. readSize + 1)
1035         {{
1036             auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1037             auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
1038             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1039 
1040             auto f2aIn = file2a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1041             auto f2bIn = file2b.File().byLine(Yes.keepTerminator);
1042             foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1043 
1044             auto f3aIn = file3a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1045             auto f3bIn = file3b.File().byLine(Yes.keepTerminator);
1046             foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1047         }}
1048     }
1049 }
1050 
1051 /**
1052 joinAppend performs a join operation on an input range, appending the results to
1053 an output range.
1054 
1055 joinAppend was written as a performance enhancement over using std.algorithm.joiner
1056 or std.array.join with writeln. Using joiner with writeln is quite slow, 3-4x slower
1057 than std.array.join with writeln. The joiner performance may be due to interaction
1058 with writeln, this was not investigated. Using joiner with stdout.lockingTextWriter
1059 is better, but still substantially slower than join. Using join works reasonably well,
1060 but is allocating memory unnecessarily.
1061 
1062 Using joinAppend with Appender is a bit faster than join, and allocates less memory.
1063 The Appender re-uses the underlying data buffer, saving memory. The example below
1064 illustrates. It is a modification of the InputFieldReordering example. The role
1065 Appender plus joinAppend are playing is to buffer the output. BufferedOutputRange
1066 uses a similar technique to buffer multiple lines.
1067 
1068 Note: The original uses joinAppend have been replaced by BufferedOutputRange, which has
1069 its own joinAppend method. However, joinAppend remains useful when constructing internal
1070 buffers where BufferedOutputRange is not appropriate.
1071 
1072 ---
1073 int main(string[] args)
1074 {
1075     import tsvutil;
1076     import std.algorithm, std.array, std.range, std.stdio;
1077     size_t[] fieldIndicies = [3, 0, 2];
1078     auto fieldReordering = new InputFieldReordering!char(fieldIndicies);
1079     auto outputBuffer = appender!(char[]);
1080     foreach (line; stdin.byLine)
1081     {
1082         fieldReordering.initNewLine;
1083         foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate)
1084         {
1085             fieldReordering.processNextField(fieldIndex, fieldValue);
1086             if (fieldReordering.allFieldsFilled) break;
1087         }
1088         if (fieldReordering.allFieldsFilled)
1089         {
1090             outputBuffer.clear;
1091             writeln(fieldReordering.outputFields.joinAppend(outputBuffer, ('\t')));
1092         }
1093         else
1094         {
1095             writeln("Error: Insufficient number of field on the line.");
1096         }
1097     }
1098     return 0;
1099 }
1100 ---
1101 */
1102 OutputRange joinAppend(InputRange, OutputRange, E)
1103     (InputRange inputRange, ref OutputRange outputRange, E delimiter)
1104 if (isInputRange!InputRange &&
1105     (is(ElementType!InputRange : const E[]) &&
1106      isOutputRange!(OutputRange, E[]))
1107      ||
1108     (is(ElementType!InputRange : const E) &&
1109      isOutputRange!(OutputRange, E))
1110     )
1111 {
1112     if (!inputRange.empty)
1113     {
1114         outputRange.put(inputRange.front);
1115         inputRange.popFront;
1116     }
1117     foreach (x; inputRange)
1118     {
1119         outputRange.put(delimiter);
1120         outputRange.put(x);
1121     }
1122     return outputRange;
1123 }
1124 
1125 // joinAppend.
1126 @safe unittest
1127 {
1128     import std.array : appender;
1129     import std.algorithm : equal;
1130 
1131     char[] c1 = ['a', 'b', 'c'];
1132     char[] c2 = ['d', 'e', 'f'];
1133     char[] c3 = ['g', 'h', 'i'];
1134     auto cvec = [c1, c2, c3];
1135 
1136     auto s1 = "abc";
1137     auto s2 = "def";
1138     auto s3 = "ghi";
1139     auto svec = [s1, s2, s3];
1140 
1141     auto charAppender = appender!(char[])();
1142 
1143     assert(cvec.joinAppend(charAppender, '_').data == "abc_def_ghi");
1144     assert(equal(cvec, [c1, c2, c3]));
1145 
1146     charAppender.put('$');
1147     assert(svec.joinAppend(charAppender, '|').data == "abc_def_ghi$abc|def|ghi");
1148     assert(equal(cvec, [s1, s2, s3]));
1149 
1150     charAppender.clear;
1151     assert(svec.joinAppend(charAppender, '|').data == "abc|def|ghi");
1152 
1153     auto intAppender = appender!(int[])();
1154 
1155     auto i1 = [100, 101, 102];
1156     auto i2 = [200, 201, 202];
1157     auto i3 = [300, 301, 302];
1158     auto ivec = [i1, i2, i3];
1159 
1160     assert(ivec.joinAppend(intAppender, 0).data ==
1161            [100, 101, 102, 0, 200, 201, 202, 0, 300, 301, 302]);
1162 
1163     intAppender.clear;
1164     assert(i1.joinAppend(intAppender, 0).data ==
1165            [100, 0, 101, 0, 102]);
1166     assert(i2.joinAppend(intAppender, 1).data ==
1167            [100, 0, 101, 0, 102,
1168             200, 1, 201, 1, 202]);
1169     assert(i3.joinAppend(intAppender, 2).data ==
1170            [100, 0, 101, 0, 102,
1171             200, 1, 201, 1, 202,
1172             300, 2, 301, 2, 302]);
1173 }
1174 
1175 /**
1176 getTsvFieldValue extracts the value of a single field from a delimited text string.
1177 
1178 This is a convenience function intended for cases when only a single field from an
1179 input line is needed. If multiple values are needed, it will be more efficient to
1180 work directly with std.algorithm.splitter or the InputFieldReordering class.
1181 
1182 The input text is split by a delimiter character. The specified field is converted
1183 to the desired type and the value returned.
1184 
1185 An exception is thrown if there are not enough fields on the line or if conversion
1186 fails. Conversion is done with std.conv.to, it throws a std.conv.ConvException on
1187 failure. If not enough fields, the exception text is generated referencing 1-upped
1188 field numbers as would be provided by command line users.
1189  */
1190 T getTsvFieldValue(T, C)(const C[] line, size_t fieldIndex, C delim)
1191 if (isSomeChar!C)
1192 {
1193     import std.algorithm : splitter;
1194     import std.conv : to;
1195     import std.format : format;
1196     import std.range;
1197 
1198     auto splitLine = line.splitter(delim);
1199     size_t atField = 0;
1200 
1201     while (atField < fieldIndex && !splitLine.empty)
1202     {
1203         splitLine.popFront;
1204         atField++;
1205     }
1206 
1207     T val;
1208     if (splitLine.empty)
1209     {
1210         if (fieldIndex == 0)
1211         {
1212             /* This is a workaround to a splitter special case - If the input is empty,
1213              * the returned split range is empty. This doesn't properly represent a single
1214              * column file. More correct mathematically, and for this case, would be a
1215              * single value representing an empty string. The input line is a convenient
1216              * source of an empty line. Info:
1217              *   Bug: https://issues.dlang.org/show_bug.cgi?id=15735
1218              *   Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030
1219              */
1220             assert(line.empty);
1221             val = line.to!T;
1222         }
1223         else
1224         {
1225             throw new Exception(
1226                 format("Not enough fields on line. Number required: %d; Number found: %d",
1227                        fieldIndex + 1, atField));
1228         }
1229     }
1230     else
1231     {
1232         val = splitLine.front.to!T;
1233     }
1234 
1235     return val;
1236 }
1237 
1238 // getTsvFieldValue.
1239 @safe unittest
1240 {
1241     import std.conv : ConvException, to;
1242     import std.exception;
1243 
1244     /* Common cases. */
1245     assert(getTsvFieldValue!double("123", 0, '\t') == 123.0);
1246     assert(getTsvFieldValue!double("-10.5", 0, '\t') == -10.5);
1247     assert(getTsvFieldValue!size_t("abc|123", 1, '|') == 123);
1248     assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99);
1249     assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99);
1250     assert(getTsvFieldValue!string("紅\t红\t99", 2, '\t') == "99");
1251     assert(getTsvFieldValue!string("紅\t红\t99", 1, '\t') == "红");
1252     assert(getTsvFieldValue!string("紅\t红\t99", 0, '\t') == "紅");
1253     assert(getTsvFieldValue!string("红色和绿色\tred and green\t赤と緑\t10.5", 2, '\t') == "赤と緑");
1254     assert(getTsvFieldValue!double("红色和绿色\tred and green\t赤と緑\t10.5", 3, '\t') == 10.5);
1255 
1256     /* The empty field cases. */
1257     assert(getTsvFieldValue!string("", 0, '\t') == "");
1258     assert(getTsvFieldValue!string("\t", 0, '\t') == "");
1259     assert(getTsvFieldValue!string("\t", 1, '\t') == "");
1260     assert(getTsvFieldValue!string("", 0, ':') == "");
1261     assert(getTsvFieldValue!string(":", 0, ':') == "");
1262     assert(getTsvFieldValue!string(":", 1, ':') == "");
1263 
1264     /* Tests with different data types. */
1265     string stringLine = "orange and black\tნარინჯისფერი და შავი\t88.5";
1266     char[] charLine = "orange and black\tნარინჯისფერი და შავი\t88.5".to!(char[]);
1267     dchar[] dcharLine = stringLine.to!(dchar[]);
1268     wchar[] wcharLine = stringLine.to!(wchar[]);
1269 
1270     assert(getTsvFieldValue!string(stringLine, 0, '\t') == "orange and black");
1271     assert(getTsvFieldValue!string(stringLine, 1, '\t') == "ნარინჯისფერი და შავი");
1272     assert(getTsvFieldValue!wstring(stringLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1273     assert(getTsvFieldValue!double(stringLine, 2, '\t') == 88.5);
1274 
1275     assert(getTsvFieldValue!string(charLine, 0, '\t') == "orange and black");
1276     assert(getTsvFieldValue!string(charLine, 1, '\t') == "ნარინჯისფერი და შავი");
1277     assert(getTsvFieldValue!wstring(charLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1278     assert(getTsvFieldValue!double(charLine, 2, '\t') == 88.5);
1279 
1280     assert(getTsvFieldValue!string(dcharLine, 0, '\t') == "orange and black");
1281     assert(getTsvFieldValue!string(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი");
1282     assert(getTsvFieldValue!wstring(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1283     assert(getTsvFieldValue!double(dcharLine, 2, '\t') == 88.5);
1284 
1285     assert(getTsvFieldValue!string(wcharLine, 0, '\t') == "orange and black");
1286     assert(getTsvFieldValue!string(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი");
1287     assert(getTsvFieldValue!wstring(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1288     assert(getTsvFieldValue!double(wcharLine, 2, '\t') == 88.5);
1289 
1290     /* Conversion errors. */
1291     assertThrown!ConvException(getTsvFieldValue!double("", 0, '\t'));
1292     assertThrown!ConvException(getTsvFieldValue!double("abc", 0, '|'));
1293     assertThrown!ConvException(getTsvFieldValue!size_t("-1", 0, '|'));
1294     assertThrown!ConvException(getTsvFieldValue!size_t("a23|23.4", 1, '|'));
1295     assertThrown!ConvException(getTsvFieldValue!double("23.5|def", 1, '|'));
1296 
1297     /* Not enough field errors. These should throw, but not a ConvException.*/
1298     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("", 1, '\t')));
1299     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc", 1, '\t')));
1300     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc\tdef", 2, '\t')));
1301 }
1302 
1303 /**
1304 Field-lists - A field-list is a string entered on the command line identifying one or more
1305 field numbers. They are used by the majority of the tsv utility applications. There are
1306 two helper functions, makeFieldListOptionHandler and parseFieldList. Most applications
1307 will use makeFieldListOptionHandler, it creates a delegate that can be passed to
1308 std.getopt to process the command option. Actual processing of the option text is done by
1309 parseFieldList. It can be called directly when the text of the option value contains more
1310 than just the field number.
1311 
1312 Syntax and behavior:
1313 
1314 A 'field-list' is a list of numeric field numbers entered on the command line. Fields are
1315 1-upped integers representing locations in an input line, in the traditional meaning of
1316 Unix command line tools. Fields can be entered as single numbers or a range. Multiple
1317 entries are separated by commas. Some examples (with 'fields' as the command line option):
1318 
1319    --fields 3                 // Single field
1320    --fields 4,1               // Two fields
1321    --fields 3-9               // A range, fields 3 to 9 inclusive
1322    --fields 1,2,7-34,11       // A mix of ranges and fields
1323    --fields 15-5,3-1          // Two ranges in reverse order.
1324 
1325 Incomplete ranges are not supported, for example, '6-'. Zero is disallowed as a field
1326 value by default, but can be enabled to support the notion of zero as representing the
1327 entire line. However, zero cannot be part of a range. Field numbers are one-based by
1328 default, but can be converted to zero-based. If conversion to zero-based is enabled, field
1329 number zero must be disallowed or a signed integer type specified for the returned range.
1330 
1331 An error is thrown if an invalid field specification is encountered. Error text is
1332 intended for display. Error conditions include:
1333   - Empty fields list
1334   - Empty value, e.g. Two consequtive commas, a trailing comma, or a leading comma
1335   - String that does not parse as a valid integer
1336   - Negative integers, or zero if zero is disallowed.
1337   - An incomplete range
1338   - Zero used as part of a range.
1339 
1340 No other behaviors are enforced. Repeated values are accepted. If zero is allowed, other
1341 field numbers can be entered as well. Additional restrictions need to be applied by the
1342 caller.
1343 
1344 Notes:
1345   - The data type determines the max field number that can be entered. Enabling conversion
1346     to zero restricts to the signed version of the data type.
1347   - Use 'import std.typecons : Yes, No' to use the convertToZeroBasedIndex and
1348     allowFieldNumZero template parameters.
1349 */
1350 
1351 /** [Yes|No].convertToZeroBasedIndex parameter controls whether field numbers are
1352  *  converted to zero-based indices by makeFieldListOptionHander and parseFieldList.
1353  */
1354 alias ConvertToZeroBasedIndex = Flag!"convertToZeroBasedIndex";
1355 
1356 /** [Yes|No].allowFieldNumZero parameter controls whether zero is a valid field. This is
1357  *  used by makeFieldListOptionHander and parseFieldList.
1358  */
1359 alias AllowFieldNumZero = Flag!"allowFieldNumZero";
1360 
1361 alias OptionHandlerDelegate = void delegate(string option, string value);
1362 
1363 /**
1364 makeFieldListOptionHandler creates a std.getopt option hander for processing field lists
1365 entered on the command line. A field list is as defined by parseFieldList.
1366 */
1367 OptionHandlerDelegate makeFieldListOptionHandler(
1368     T,
1369     ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex,
1370     AllowFieldNumZero allowZero = No.allowFieldNumZero)
1371     (ref T[] fieldsArray)
1372 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T))
1373 {
1374     void fieldListOptionHandler(ref T[] fieldArray, string option, string value) pure @safe
1375     {
1376         import std.algorithm : each;
1377         try value.parseFieldList!(T, convertToZero, allowZero).each!(x => fieldArray ~= x);
1378         catch (Exception exc)
1379         {
1380             import std.format : format;
1381             exc.msg = format("[--%s] %s", option, exc.msg);
1382             throw exc;
1383         }
1384     }
1385 
1386     return (option, value) => fieldListOptionHandler(fieldsArray, option, value);
1387 }
1388 
1389 // makeFieldListOptionHandler.
1390 unittest
1391 {
1392     import std.exception : assertThrown, assertNotThrown;
1393     import std.getopt;
1394 
1395     {
1396         size_t[] fields;
1397         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1398         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1399         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1400     }
1401     {
1402         size_t[] fields;
1403         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1404         getopt(args,
1405                "f|fields", fields.makeFieldListOptionHandler!(size_t, Yes.convertToZeroBasedIndex));
1406         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1407     }
1408     {
1409         size_t[] fields;
1410         auto args = ["program", "-f", "0"];
1411         getopt(args,
1412                "f|fields", fields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1413         assert(fields == [0]);
1414     }
1415     {
1416         size_t[] fields;
1417         auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1"];
1418         getopt(args,
1419                "f|fields", fields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1420         assert(fields == [0, 1, 0, 0, 1]);
1421     }
1422     {
1423         size_t[] ints;
1424         size_t[] fields;
1425         auto args = ["program", "--ints", "1,2,3", "--fields", "1", "--ints", "4,5,6", "--fields", "2,4,7-9,23-21"];
1426         std.getopt.arraySep = ",";
1427         getopt(args,
1428                "i|ints", "Built-in list of integers.", &ints,
1429                "f|fields", "Field-list style integers.", fields.makeFieldListOptionHandler);
1430         assert(ints == [1, 2, 3, 4, 5, 6]);
1431         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1432     }
1433 
1434     /* Basic cases involved unsinged types smaller than size_t. */
1435     {
1436         uint[] fields;
1437         auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1", "-f", "55-58"];
1438         getopt(args,
1439                "f|fields", fields.makeFieldListOptionHandler!(uint, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1440         assert(fields == [0, 1, 0, 0, 1, 55, 56, 57, 58]);
1441     }
1442     {
1443         ushort[] fields;
1444         auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1", "-f", "55-58"];
1445         getopt(args,
1446                "f|fields", fields.makeFieldListOptionHandler!(ushort, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1447         assert(fields == [0, 1, 0, 0, 1, 55, 56, 57, 58]);
1448     }
1449 
1450     /* Basic cases involving unsigned types. */
1451     {
1452         long[] fields;
1453         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1454         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1455         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1456     }
1457     {
1458         long[] fields;
1459         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1460         getopt(args,
1461                "f|fields", fields.makeFieldListOptionHandler!(long, Yes.convertToZeroBasedIndex));
1462         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1463     }
1464     {
1465         long[] fields;
1466         auto args = ["program", "-f", "0"];
1467         getopt(args,
1468                "f|fields", fields.makeFieldListOptionHandler!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1469         assert(fields == [-1]);
1470     }
1471     {
1472         int[] fields;
1473         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1474         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1475         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1476     }
1477     {
1478         int[] fields;
1479         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1480         getopt(args,
1481                "f|fields", fields.makeFieldListOptionHandler!(int, Yes.convertToZeroBasedIndex));
1482         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1483     }
1484     {
1485         int[] fields;
1486         auto args = ["program", "-f", "0"];
1487         getopt(args,
1488                "f|fields", fields.makeFieldListOptionHandler!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1489         assert(fields == [-1]);
1490     }
1491     {
1492         short[] fields;
1493         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1494         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1495         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1496     }
1497     {
1498         short[] fields;
1499         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1500         getopt(args,
1501                "f|fields", fields.makeFieldListOptionHandler!(short, Yes.convertToZeroBasedIndex));
1502         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1503     }
1504     {
1505         short[] fields;
1506         auto args = ["program", "-f", "0"];
1507         getopt(args,
1508                "f|fields", fields.makeFieldListOptionHandler!(short, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1509         assert(fields == [-1]);
1510     }
1511 
1512     {
1513         /* Error cases. */
1514         size_t[] fields;
1515         auto args = ["program", "-f", "0"];
1516         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1517 
1518         args = ["program", "-f", "-1"];
1519         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1520 
1521         args = ["program", "-f", "--fields", "1"];
1522         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1523 
1524         args = ["program", "-f", "a"];
1525         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1526 
1527         args = ["program", "-f", "1.5"];
1528         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1529 
1530         args = ["program", "-f", "2-"];
1531         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1532 
1533         args = ["program", "-f", "3,5,-7"];
1534         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1535 
1536         args = ["program", "-f", "3,5,"];
1537         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1538 
1539         args = ["program", "-f", "-1"];
1540         assertThrown(getopt(args,
1541                             "f|fields", fields.makeFieldListOptionHandler!(
1542                                 size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)));
1543     }
1544 }
1545 
1546 /**
1547 parseFieldList lazily generates a range of fields numbers from a 'field-list' string.
1548 */
1549 auto parseFieldList(T = size_t,
1550                     ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex,
1551                     AllowFieldNumZero allowZero = No.allowFieldNumZero)
1552     (string fieldList, char delim = ',')
1553 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T))
1554 {
1555     import std.algorithm : splitter;
1556 
1557     auto _splitFieldList = fieldList.splitter(delim);
1558     auto _currFieldParse =
1559         (_splitFieldList.empty ? "" : _splitFieldList.front)
1560         .parseFieldRange!(T, convertToZero, allowZero);
1561 
1562     if (!_splitFieldList.empty) _splitFieldList.popFront;
1563 
1564     struct Result
1565     {
1566         @property bool empty() pure nothrow @safe @nogc
1567         {
1568             return _currFieldParse.empty;
1569         }
1570 
1571         @property T front() pure @safe
1572         {
1573             import std.conv : to;
1574 
1575             assert(!empty, "Attempting to fetch the front of an empty field-list.");
1576             assert(!_currFieldParse.empty, "Internal error. Call to front with an empty _currFieldParse.");
1577 
1578             return _currFieldParse.front.to!T;
1579         }
1580 
1581         void popFront() pure @safe
1582         {
1583             assert(!empty, "Attempting to popFront an empty field-list.");
1584 
1585             _currFieldParse.popFront;
1586             if (_currFieldParse.empty && !_splitFieldList.empty)
1587             {
1588                 _currFieldParse = _splitFieldList.front.parseFieldRange!(T, convertToZero, allowZero);
1589                 _splitFieldList.popFront;
1590             }
1591         }
1592     }
1593 
1594     return Result();
1595 }
1596 
1597 // parseFieldList.
1598 @safe unittest
1599 {
1600     import std.algorithm : each, equal;
1601     import std.exception : assertThrown, assertNotThrown;
1602 
1603     /* Basic tests. */
1604     assert("1".parseFieldList.equal([1]));
1605     assert("1,2".parseFieldList.equal([1, 2]));
1606     assert("1,2,3".parseFieldList.equal([1, 2, 3]));
1607     assert("1-2".parseFieldList.equal([1, 2]));
1608     assert("1-2,6-4".parseFieldList.equal([1, 2, 6, 5, 4]));
1609     assert("1-2,1,1-2,2,2-1".parseFieldList.equal([1, 2, 1, 1, 2, 2, 2, 1]));
1610     assert("1-2,5".parseFieldList!size_t.equal([1, 2, 5]));
1611 
1612     /* Signed Int tests */
1613     assert("1".parseFieldList!int.equal([1]));
1614     assert("1,2,3".parseFieldList!int.equal([1, 2, 3]));
1615     assert("1-2".parseFieldList!int.equal([1, 2]));
1616     assert("1-2,6-4".parseFieldList!int.equal([1, 2, 6, 5, 4]));
1617     assert("1-2,5".parseFieldList!int.equal([1, 2, 5]));
1618 
1619     /* Convert to zero tests */
1620     assert("1".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0]));
1621     assert("1,2,3".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 2]));
1622     assert("1-2".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1]));
1623     assert("1-2,6-4".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 5, 4, 3]));
1624     assert("1-2,5".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 4]));
1625 
1626     assert("1".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0]));
1627     assert("1,2,3".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 2]));
1628     assert("1-2".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1]));
1629     assert("1-2,6-4".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 5, 4, 3]));
1630     assert("1-2,5".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 4]));
1631 
1632     /* Allow zero tests. */
1633     assert("0".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1634     assert("1,0,3".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 0, 3]));
1635     assert("1-2,5".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 2, 5]));
1636     assert("0".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1637     assert("1,0,3".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 0, 3]));
1638     assert("1-2,5".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 2, 5]));
1639     assert("0".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1640     assert("1,0,3".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0, -1, 2]));
1641     assert("1-2,5".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0, 1, 4]));
1642 
1643     /* Error cases. */
1644     assertThrown("".parseFieldList.each);
1645     assertThrown(" ".parseFieldList.each);
1646     assertThrown(",".parseFieldList.each);
1647     assertThrown("5 6".parseFieldList.each);
1648     assertThrown(",7".parseFieldList.each);
1649     assertThrown("8,".parseFieldList.each);
1650     assertThrown("8,9,".parseFieldList.each);
1651     assertThrown("10,,11".parseFieldList.each);
1652     assertThrown("".parseFieldList!(long, Yes.convertToZeroBasedIndex).each);
1653     assertThrown("1,2-3,".parseFieldList!(long, Yes.convertToZeroBasedIndex).each);
1654     assertThrown("2-,4".parseFieldList!(long, Yes.convertToZeroBasedIndex).each);
1655     assertThrown("1,2,3,,4".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1656     assertThrown(",7".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1657     assertThrown("8,".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1658     assertThrown("10,0,,11".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1659     assertThrown("8,9,".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1660 
1661     assertThrown("0".parseFieldList.each);
1662     assertThrown("1,0,3".parseFieldList.each);
1663     assertThrown("0".parseFieldList!(int, Yes.convertToZeroBasedIndex, No.allowFieldNumZero).each);
1664     assertThrown("1,0,3".parseFieldList!(int, Yes.convertToZeroBasedIndex, No.allowFieldNumZero).each);
1665     assertThrown("0-2,6-0".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1666     assertThrown("0-2,6-0".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1667     assertThrown("0-2,6-0".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1668 }
1669 
1670 /* parseFieldRange parses a single number or number range. E.g. '5' or '5-8'. These are
1671  * the values in a field-list separated by a comma or other delimiter. It returns a range
1672  * that iterates over all the values in the range.
1673  */
1674 private auto parseFieldRange(T = size_t,
1675                              ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex,
1676                              AllowFieldNumZero allowZero = No.allowFieldNumZero)
1677     (string fieldRange)
1678 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T))
1679 {
1680     import std.algorithm : findSplit;
1681     import std.conv : to;
1682     import std.exception : enforce;
1683     import std.format : format;
1684     import std.range : iota;
1685     import std.traits : Signed;
1686 
1687     /* Pick the largest compatible integral type for the IOTA range. This must be the
1688      * signed type if convertToZero is true, as a reverse order range may end at -1.
1689      */
1690     static if (convertToZero) alias S = Signed!T;
1691     else alias S = T;
1692 
1693     enforce(fieldRange.length != 0, "Empty field number.");
1694 
1695     auto rangeSplit = findSplit(fieldRange, "-");
1696 
1697     /* Make sure the range does not start or end with a dash. */
1698     enforce(rangeSplit[1].empty || (!rangeSplit[0].empty && !rangeSplit[2].empty),
1699             format("Incomplete ranges are not supported: '%s'", fieldRange));
1700 
1701     S start = rangeSplit[0].to!S;
1702     S last = rangeSplit[1].empty ? start : rangeSplit[2].to!S;
1703     Signed!T increment = (start <= last) ? 1 : -1;
1704 
1705     static if (allowZero)
1706     {
1707         enforce(rangeSplit[1].empty || (start != 0 && last != 0),
1708                 format("Zero cannot be used as part of a range: '%s'", fieldRange));
1709     }
1710 
1711     static if (allowZero)
1712     {
1713         enforce(start >= 0 && last >= 0,
1714                 format("Field numbers must be non-negative integers: '%d'",
1715                        (start < 0) ? start : last));
1716     }
1717     else
1718     {
1719         enforce(start >= 1 && last >= 1,
1720                 format("Field numbers must be greater than zero: '%d'",
1721                        (start < 1) ? start : last));
1722     }
1723 
1724     static if (convertToZero)
1725     {
1726         start--;
1727         last--;
1728     }
1729 
1730     return iota(start, last + increment, increment);
1731 }
1732 
1733 // parseFieldRange.
1734 @safe unittest
1735 {
1736     import std.algorithm : equal;
1737     import std.exception : assertThrown, assertNotThrown;
1738 
1739     /* Basic cases */
1740     assert(parseFieldRange("1").equal([1]));
1741     assert("2".parseFieldRange.equal([2]));
1742     assert("3-4".parseFieldRange.equal([3, 4]));
1743     assert("3-5".parseFieldRange.equal([3, 4, 5]));
1744     assert("4-3".parseFieldRange.equal([4, 3]));
1745     assert("10-1".parseFieldRange.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1746 
1747     /* Convert to zero-based indices */
1748     assert(parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)("1").equal([0]));
1749     assert("2".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([1]));
1750     assert("3-4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([2, 3]));
1751     assert("3-5".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([2, 3, 4]));
1752     assert("4-3".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([3, 2]));
1753     assert("10-1".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]));
1754 
1755     /* Allow zero. */
1756     assert("0".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1757     assert(parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)("1").equal([1]));
1758     assert("3-4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([3, 4]));
1759     assert("10-1".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1760 
1761     /* Allow zero, convert to zero-based index. */
1762     assert("0".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1763     assert(parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)("1").equal([0]));
1764     assert("3-4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([2, 3]));
1765     assert("10-1".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]));
1766 
1767     /* Alternate integer types. */
1768     assert("2".parseFieldRange!uint.equal([2]));
1769     assert("3-5".parseFieldRange!uint.equal([3, 4, 5]));
1770     assert("10-1".parseFieldRange!uint.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1771     assert("2".parseFieldRange!int.equal([2]));
1772     assert("3-5".parseFieldRange!int.equal([3, 4, 5]));
1773     assert("10-1".parseFieldRange!int.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1774     assert("2".parseFieldRange!ushort.equal([2]));
1775     assert("3-5".parseFieldRange!ushort.equal([3, 4, 5]));
1776     assert("10-1".parseFieldRange!ushort.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1777     assert("2".parseFieldRange!short.equal([2]));
1778     assert("3-5".parseFieldRange!short.equal([3, 4, 5]));
1779     assert("10-1".parseFieldRange!short.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1780 
1781     assert("0".parseFieldRange!(long, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1782     assert("0".parseFieldRange!(uint, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1783     assert("0".parseFieldRange!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1784     assert("0".parseFieldRange!(ushort, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1785     assert("0".parseFieldRange!(short, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1786     assert("0".parseFieldRange!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1787     assert("0".parseFieldRange!(short, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1788 
1789     /* Max field value cases. */
1790     assert("65535".parseFieldRange!ushort.equal([65535]));   // ushort max
1791     assert("65533-65535".parseFieldRange!ushort.equal([65533, 65534, 65535]));
1792     assert("32767".parseFieldRange!short.equal([32767]));    // short max
1793     assert("32765-32767".parseFieldRange!short.equal([32765, 32766, 32767]));
1794     assert("32767".parseFieldRange!(short, Yes.convertToZeroBasedIndex).equal([32766]));
1795 
1796     /* Error cases. */
1797     assertThrown("".parseFieldRange);
1798     assertThrown(" ".parseFieldRange);
1799     assertThrown("-".parseFieldRange);
1800     assertThrown(" -".parseFieldRange);
1801     assertThrown("- ".parseFieldRange);
1802     assertThrown("1-".parseFieldRange);
1803     assertThrown("-2".parseFieldRange);
1804     assertThrown("-1".parseFieldRange);
1805     assertThrown("1.0".parseFieldRange);
1806     assertThrown("0".parseFieldRange);
1807     assertThrown("0-3".parseFieldRange);
1808     assertThrown("3-0".parseFieldRange);
1809     assertThrown("-2-4".parseFieldRange);
1810     assertThrown("2--4".parseFieldRange);
1811     assertThrown("2-".parseFieldRange);
1812     assertThrown("a".parseFieldRange);
1813     assertThrown("0x3".parseFieldRange);
1814     assertThrown("3U".parseFieldRange);
1815     assertThrown("1_000".parseFieldRange);
1816     assertThrown(".".parseFieldRange);
1817 
1818     assertThrown("".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1819     assertThrown(" ".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1820     assertThrown("-".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1821     assertThrown("1-".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1822     assertThrown("-2".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1823     assertThrown("-1".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1824     assertThrown("0".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1825     assertThrown("0-3".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1826     assertThrown("3-0".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1827     assertThrown("-2-4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1828     assertThrown("2--4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1829 
1830     assertThrown("".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1831     assertThrown(" ".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1832     assertThrown("-".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1833     assertThrown("1-".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1834     assertThrown("-2".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1835     assertThrown("-1".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1836     assertThrown("0-3".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1837     assertThrown("3-0".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1838     assertThrown("-2-4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1839     assertThrown("2--4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1840 
1841     assertThrown("".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1842     assertThrown(" ".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1843     assertThrown("-".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1844     assertThrown("1-".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1845     assertThrown("-2".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1846     assertThrown("-1".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1847     assertThrown("0-3".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1848     assertThrown("3-0".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1849     assertThrown("-2-4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1850     assertThrown("2--4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1851 
1852     /* Value out of range cases. */
1853     assertThrown("65536".parseFieldRange!ushort);   // One more than ushort max.
1854     assertThrown("65535-65536".parseFieldRange!ushort);
1855     assertThrown("32768".parseFieldRange!short);    // One more than short max.
1856     assertThrown("32765-32768".parseFieldRange!short);
1857     // Convert to zero limits signed range.
1858     assertThrown("32768".parseFieldRange!(ushort, Yes.convertToZeroBasedIndex));
1859     assert("32767".parseFieldRange!(ushort, Yes.convertToZeroBasedIndex).equal([32766]));
1860 }
1861 
1862 /** [Yes|No.newlineWasRemoved] is a template parameter to throwIfWindowsNewlineOnUnix.
1863  *  A Yes value indicates the Unix newline was already removed, as might be done via
1864  *  std.File.byLine or similar mechanism.
1865  */
1866 alias NewlineWasRemoved = Flag!"newlineWasRemoved";
1867 
1868 /**
1869 throwIfWindowsLineNewlineOnUnix is used to throw an exception if a Windows/DOS
1870 line ending is found on a build compiled for a Unix platform. This is used by
1871 the TSV Utilities to detect Window/DOS line endings and terminate processing
1872 with an error message to the user.
1873  */
1874 void throwIfWindowsNewlineOnUnix
1875     (NewlineWasRemoved nlWasRemoved = Yes.newlineWasRemoved)
1876     (const char[] line, const char[] filename, size_t lineNum)
1877 {
1878     version(Posix)
1879     {
1880         static if (nlWasRemoved)
1881         {
1882             immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r';
1883         }
1884         else
1885         {
1886             immutable bool hasWindowsLineEnding =
1887                 line.length > 1 &&
1888                 line[$ - 2] == '\r' &&
1889                 line[$ - 1] == '\n';
1890         }
1891 
1892         if (hasWindowsLineEnding)
1893         {
1894             import std.format;
1895             throw new Exception(
1896                 format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n  File: %s, Line: %s",
1897                        (filename == "-") ? "Standard Input" : filename, lineNum));
1898         }
1899     }
1900 }
1901 
1902 // throwIfWindowsNewlineOnUnix
1903 @safe unittest
1904 {
1905     /* Note: Currently only building on Posix. Need to add non-Posix test cases
1906      * if Windows builds are ever done.
1907      */
1908     version(Posix)
1909     {
1910         import std.exception;
1911 
1912         assertNotThrown(throwIfWindowsNewlineOnUnix("", "afile.tsv", 1));
1913         assertNotThrown(throwIfWindowsNewlineOnUnix("a", "afile.tsv", 2));
1914         assertNotThrown(throwIfWindowsNewlineOnUnix("ab", "afile.tsv", 3));
1915         assertNotThrown(throwIfWindowsNewlineOnUnix("abc", "afile.tsv", 4));
1916 
1917         assertThrown(throwIfWindowsNewlineOnUnix("\r", "afile.tsv", 1));
1918         assertThrown(throwIfWindowsNewlineOnUnix("a\r", "afile.tsv", 2));
1919         assertThrown(throwIfWindowsNewlineOnUnix("ab\r", "afile.tsv", 3));
1920         assertThrown(throwIfWindowsNewlineOnUnix("abc\r", "afile.tsv", 4));
1921 
1922         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\n", "afile.tsv", 1));
1923         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\n", "afile.tsv", 2));
1924         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3));
1925         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4));
1926 
1927         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5));
1928         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6));
1929         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7));
1930         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8));
1931 
1932         /* Standard Input formatting. */
1933         import std.algorithm : endsWith;
1934         bool exceptionCaught = false;
1935 
1936         try (throwIfWindowsNewlineOnUnix("\r", "-", 99));
1937         catch (Exception e)
1938         {
1939             assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1940             exceptionCaught = true;
1941         }
1942         finally
1943         {
1944             assert(exceptionCaught);
1945             exceptionCaught = false;
1946         }
1947 
1948         try (throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "-", 99));
1949         catch (Exception e)
1950         {
1951             assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1952             exceptionCaught = true;
1953         }
1954         finally
1955         {
1956             assert(exceptionCaught);
1957             exceptionCaught = false;
1958         }
1959     }
1960 }
1961 
1962 /** Flag used by InputSourceRange to determine if the header line should be when
1963 opening a file.
1964 */
1965 alias ReadHeader = Flag!"readHeader";
1966 
1967 /**
1968 inputSourceRange is a helper function for creating new InputSourceRange objects.
1969 */
1970 InputSourceRange inputSourceRange(string[] filepaths, ReadHeader readHeader)
1971 {
1972     return new InputSourceRange(filepaths, readHeader);
1973 }
1974 
1975 /**
1976 InputSourceRange is an input range that iterates over a set of input files.
1977 
1978 InputSourceRange is used to iterate over a set of files passed on the command line.
1979 Files are automatically opened and closed during iteration. The caller can choose to
1980 have header lines read automatically.
1981 
1982 The range is created from a set of filepaths. These filepaths are mapped to
1983 InputSource objects during the iteration. This is what enables automatically opening
1984 and closing files and reading the header line.
1985 
1986 The motivation for an InputSourceRange is to provide a standard way to look at the
1987 header line of the first input file during command line argument processing, and then
1988 pass the open input file and the header line along to the main processing functions.
1989 This enables a features like named fields to be implemented in a standard way.
1990 
1991 Both InputSourceRange and InputSource are reference objects. This keeps their use
1992 limited to a single iteration over the set of files. The files can be iterated again
1993 by creating a new InputSourceRange against the same filepaths.
1994 
1995 Currently, InputSourceRange supports files and standard input. It is possible other
1996 types of input sources will be added in the future.
1997  */
1998 final class InputSourceRange
1999 {
2000     import std.range;
2001 
2002     private string[] _filepaths;
2003     private ReadHeader _readHeader;
2004     private InputSource _front;
2005 
2006     this(string[] filepaths, ReadHeader readHeader)
2007     {
2008         _filepaths = filepaths.dup;
2009         _readHeader = readHeader;
2010         _front = null;
2011 
2012         if (!_filepaths.empty)
2013         {
2014             _front = new InputSource(_filepaths.front, _readHeader);
2015             _front.open;
2016             _filepaths.popFront;
2017         }
2018     }
2019 
2020     size_t length() const pure nothrow @safe
2021     {
2022         return empty ? 0 : _filepaths.length + 1;
2023     }
2024 
2025     bool empty() const pure nothrow @safe
2026     {
2027         return _front is null;
2028     }
2029 
2030     InputSource front() pure @safe
2031     {
2032         assert(!empty, "Attempt to take the front of an empty InputSourceRange");
2033         return _front;
2034     }
2035 
2036     void popFront()
2037     {
2038         assert(!empty, "Attempt to popFront an empty InputSourceRange");
2039 
2040         _front.close;
2041 
2042         if (!_filepaths.empty)
2043         {
2044             _front = new InputSource(_filepaths.front, _readHeader);
2045             _front.open;
2046             _filepaths.popFront;
2047         }
2048         else
2049         {
2050             _front = null;
2051         }
2052     }
2053 }
2054 
2055 /**
2056 InputSource is a class of objects produced by iterating over an InputSourceRange.
2057 
2058 An InputSource object provides access to the open file currently the front element
2059 of an InputSourceRange. The main methods application code is likely to need are:
2060 
2061 $(LIST
2062     * `file()` - Returns the File object. The file will be open for reading as long
2063       InputSource instance is the front element of the InputSourceRange it came from.
2064 
2065     * `header(KeepTerminator keepTerminator = No.keepTerminator)` - Returns the
2066       header line from the file. An empty string is returned if InputSource range
2067       was created with readHeader=false.
2068 
2069     * `name()` - The name of the input source. The name returned is intended for
2070       user error messages. For files, this is the filepath that was passed to
2071       InputSourceRange. For standard input, it is "Standard Input".
2072 )
2073 
2074 An InputSource is a reference object, so the copies will retain the state of the
2075 InputSourceRange front element. In particular, all copies will have the open
2076 state of the front element of the InputSourceRange.
2077 
2078 This class is not intended for use outside the context of an InputSourceRange.
2079 */
2080 final class InputSource
2081 {
2082     import std.range;
2083     import std.stdio;
2084 
2085     private immutable string _filepath;
2086     private immutable bool _isStdin;
2087     private bool _isOpen;
2088     private ReadHeader _readHeader;
2089     private bool _hasBeenOpened;
2090     private string _header;
2091     private File _file;
2092 
2093     private this(string filepath, ReadHeader readHeader) pure nothrow @safe
2094     {
2095         _filepath = filepath;
2096         _isStdin = filepath == "-";
2097         _isOpen = false;
2098         _readHeader = readHeader;
2099         _hasBeenOpened = false;
2100     }
2101 
2102     /** file returns the File object held by the InputSource.
2103      *
2104      * The File will be open for reading as long as the InputSource instance is the
2105      * front element of the InputSourceRange it came from.
2106      */
2107     File file() nothrow @safe
2108     {
2109         return _file;
2110     }
2111 
2112     /** isReadHeaderEnabled returns true if the header line is being read.
2113      */
2114     bool isReadHeaderEnabled() const pure nothrow @safe
2115     {
2116         return _readHeader == Yes.readHeader;
2117     }
2118 
2119     /** header returns the header line from the input file.
2120      *
2121      * An empty string is returned if InputSource range was created with
2122      * readHeader=false.
2123      */
2124     string header(KeepTerminator keepTerminator = No.keepTerminator) const pure nothrow @safe
2125     {
2126         assert(_hasBeenOpened);
2127         return (keepTerminator == Yes.keepTerminator ||
2128                 _header.length == 0 ||
2129                 _header[$ - 1] != '\n') ?
2130             _header : _header[0 .. $-1];
2131     }
2132 
2133     /** isHeaderEmpty returns true if there is no data for a header, including the
2134      * terminator.
2135      *
2136      * When headers are being read, this true only if the file is empty.
2137      */
2138     bool isHeaderEmpty() const pure nothrow @safe
2139     {
2140         assert(_hasBeenOpened);
2141         return _header.empty;
2142     }
2143 
2144     /** name returns a user friendly name representing the input source.
2145      *
2146      * For files, it is the filepath provided to InputSourceRange. For standard
2147      * input, it is "Standard Input". (Use isStdin() to test for standard input,
2148      * not name().
2149      */
2150     string name() const pure nothrow @safe
2151     {
2152         return _isStdin ? "Standard Input" : _filepath;
2153     }
2154 
2155     /** isStdin returns true if the input source is Standard Input, false otherwise.
2156     */
2157     bool isStdin() const pure nothrow @safe
2158     {
2159         return _isStdin;
2160     }
2161 
2162     /** isOpen returns true if the input source is open for reading, false otherwise.
2163      *
2164      * "Open" in this context is whether the InputSource object is currently open,
2165      * meaning that it is the front element of the InputSourceRange that created it.
2166      *
2167      * For files, this is also reflected in the state of the underlying File object.
2168      * However, standard input is never actually closed.
2169      */
2170     bool isOpen() const pure nothrow @safe
2171     {
2172         return _isOpen;
2173     }
2174 
2175     private void open()
2176     {
2177         assert(!_isOpen);
2178         assert(!_hasBeenOpened);
2179 
2180         _file = isStdin ? stdin : _filepath.File("rb");
2181         if (_readHeader) _header = _file.readln;
2182         _isOpen = true;
2183         _hasBeenOpened = true;
2184     }
2185 
2186     private void close()
2187     {
2188         if (!_isStdin) _file.close;
2189         _isOpen = false;
2190     }
2191 }
2192 
2193 // InputSourceRange and InputSource
2194 unittest
2195 {
2196     import std.algorithm : all, each;
2197     import std.array : appender;
2198     import std.exception : assertThrown;
2199     import std.file : rmdirRecurse;
2200     import std.path : buildPath;
2201     import std.range;
2202     import std.stdio;
2203     import tsv_utils.common.unittest_utils;
2204 
2205     auto testDir = makeUnittestTempDir("tsv_utils_input_source_range");
2206     scope(exit) testDir.rmdirRecurse;
2207 
2208     string file0 = buildPath(testDir, "file0.txt");
2209     string file1 = buildPath(testDir, "file1.txt");
2210     string file2 = buildPath(testDir, "file2.txt");
2211     string file3 = buildPath(testDir, "file3.txt");
2212 
2213     string file0Header = "";
2214     string file1Header = "file 1 header\n";
2215     string file2Header = "file 2 header\n";
2216     string file3Header = "file 3 header\n";
2217 
2218     string file0Body = "";
2219     string file1Body = "";
2220     string file2Body = "file 2 line 1\n";
2221     string file3Body = "file 3 line 1\nfile 3 line 2\n";
2222 
2223     string file0Data = file0Header ~ file0Body;
2224     string file1Data = file1Header ~ file1Body;
2225     string file2Data = file2Header ~ file2Body;
2226     string file3Data = file3Header ~ file3Body;
2227 
2228     {
2229         file0.File("w").write(file0Data);
2230         file1.File("w").write(file1Data);
2231         file2.File("w").write(file2Data);
2232         file3.File("w").write(file3Data);
2233     }
2234 
2235     auto inputFiles = [file0, file1, file2, file3];
2236     auto fileHeaders = [file0Header, file1Header, file2Header, file3Header];
2237     auto fileBodies = [file0Body, file1Body, file2Body, file3Body];
2238     auto fileData = [file0Data, file1Data, file2Data, file3Data];
2239 
2240     auto readSources = appender!(InputSource[]);
2241     auto buffer = new char[1024];    // Must be large enough to hold the test files.
2242 
2243     /* Tests without standard input. Don't want to count on state of standard
2244      * input or modifying it when doing unit tests, so avoid reading from it.
2245      */
2246 
2247     foreach(numFiles; 1 .. inputFiles.length + 1)
2248     {
2249         /* Reading headers. */
2250 
2251         readSources.clear;
2252         auto inputSourcesYesHeader = inputSourceRange(inputFiles[0 .. numFiles], Yes.readHeader);
2253         assert(inputSourcesYesHeader.length == numFiles);
2254 
2255         foreach(fileNum, source; inputSourcesYesHeader.enumerate)
2256         {
2257             readSources.put(source);
2258             assert(source.isOpen);
2259             assert(source.file.isOpen);
2260             assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen));
2261             assert(readSources.data[fileNum].isOpen);
2262 
2263             assert(source.header(Yes.keepTerminator) == fileHeaders[fileNum]);
2264 
2265             auto headerNoTerminatorLength = fileHeaders[fileNum].length;
2266             if (headerNoTerminatorLength > 0) --headerNoTerminatorLength;
2267             assert(source.header(No.keepTerminator) ==
2268                    fileHeaders[fileNum][0 .. headerNoTerminatorLength]);
2269 
2270             assert(source.name == inputFiles[fileNum]);
2271             assert(!source.isStdin);
2272             assert(source.isReadHeaderEnabled);
2273 
2274             assert(source.file.rawRead(buffer) == fileBodies[fileNum]);
2275         }
2276 
2277         /* The InputSourceRange is a reference range, consumed by the foreach. */
2278         assert(inputSourcesYesHeader.empty);
2279 
2280         /* Without reading headers. */
2281 
2282         readSources.clear;
2283         auto inputSourcesNoHeader = inputSourceRange(inputFiles[0 .. numFiles], No.readHeader);
2284         assert(inputSourcesNoHeader.length == numFiles);
2285 
2286         foreach(fileNum, source; inputSourcesNoHeader.enumerate)
2287         {
2288             readSources.put(source);
2289             assert(source.isOpen);
2290             assert(source.file.isOpen);
2291             assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen));
2292             assert(readSources.data[fileNum].isOpen);
2293 
2294             assert(source.header(Yes.keepTerminator).empty);
2295             assert(source.header(No.keepTerminator).empty);
2296 
2297             assert(source.name == inputFiles[fileNum]);
2298             assert(!source.isStdin);
2299             assert(!source.isReadHeaderEnabled);
2300 
2301             assert(source.file.rawRead(buffer) == fileData[fileNum]);
2302         }
2303 
2304         /* The InputSourceRange is a reference range, consumed by the foreach. */
2305         assert(inputSourcesNoHeader.empty);
2306     }
2307 
2308     /* Tests with standard input. No actual reading in these tests.
2309      */
2310 
2311     readSources.clear;
2312     foreach(fileNum, source; inputSourceRange(["-", "-"], No.readHeader).enumerate)
2313     {
2314         readSources.put(source);
2315         assert(source.isOpen);
2316         assert(source.file.isOpen);
2317         assert(readSources.data[0 .. fileNum].all!(s => !s.isOpen));      // InputSource objects are "closed".
2318         assert(readSources.data[0 .. fileNum].all!(s => s.file.isOpen));  // Actual stdin should not be closed.
2319         assert(readSources.data[fileNum].isOpen);
2320 
2321         assert(source.header(Yes.keepTerminator).empty);
2322         assert(source.header(No.keepTerminator).empty);
2323 
2324         assert(source.name == "Standard Input");
2325         assert(source.isStdin);
2326     }
2327 
2328     /* Empty filelist. */
2329     string[] nofiles;
2330     {
2331         auto sources = inputSourceRange(nofiles, No.readHeader);
2332         assert(sources.empty);
2333     }
2334     {
2335         auto sources = inputSourceRange(nofiles, Yes.readHeader);
2336         assert(sources.empty);
2337     }
2338 
2339     /* Error cases. */
2340     assertThrown(inputSourceRange([file0, "no_such_file.txt"], No.readHeader).each);
2341     assertThrown(inputSourceRange(["no_such_file.txt", file1], Yes.readHeader).each);
2342 }
2343 
2344 /**
2345 byLineSourceRange is a helper function for creating new byLineSourceRange objects.
2346 */
2347 auto byLineSourceRange(
2348     KeepTerminator keepTerminator = No.keepTerminator, Char = char, ubyte terminator = '\n')
2349 (string[] filepaths)
2350 if (is(Char == char) || is(Char == ubyte))
2351 {
2352     return new ByLineSourceRange!(keepTerminator, Char, terminator)(filepaths);
2353 }
2354 
2355 /**
2356 ByLineSourceRange is an input range that iterates over a set of input files. It
2357 provides bufferedByLine access to each file.
2358 
2359 A ByLineSourceRange is used to iterate over a set of files passed on the command line.
2360 Files are automatically opened and closed during iteration. The front element of the
2361 range provides access to a bufferedByLine for iterating over the lines in the file.
2362 
2363 The range is created from a set of filepaths. These filepaths are mapped to
2364 ByLineSource objects during the iteration. This is what enables automatically opening
2365 and closing files and providing bufferedByLine access.
2366 
2367 The motivation behind ByLineSourceRange is to provide a standard way to look at the
2368 header line of the first input file during command line argument processing, and then
2369 pass the open input file along to the main processing functions. This enables
2370 features like named fields to be implemented in a standard way.
2371 
2372 Access to the first line of the first file is available after creating the
2373 ByLineSourceRange instance. The first file is opened and a bufferedByLine created.
2374 The first line of the first file is via byLine.front (after checking !byLine.empty).
2375 
2376 Both ByLineSourceRange and ByLineSource are reference objects. This keeps their use
2377 limited to a single iteration over the set of files. The files can be iterated again
2378 by creating a new InputSourceRange against the same filepaths.
2379 
2380 Currently, ByLineSourceRange supports files and standard input. It is possible other
2381 types of input sources will be added in the future.
2382  */
2383 final class ByLineSourceRange(
2384     KeepTerminator keepTerminator = No.keepTerminator, Char = char, ubyte terminator = '\n')
2385 if (is(Char == char) || is(Char == ubyte))
2386 {
2387     import std.range;
2388 
2389     alias ByLineSourceType = ByLineSource!(keepTerminator, char, terminator);
2390 
2391     private string[] _filepaths;
2392     private ByLineSourceType _front;
2393 
2394     this(string[] filepaths)
2395     {
2396         _filepaths = filepaths.dup;
2397         _front = null;
2398 
2399         if (!_filepaths.empty)
2400         {
2401             _front = new ByLineSourceType(_filepaths.front);
2402             _front.open;
2403             _filepaths.popFront;
2404         }
2405     }
2406 
2407     size_t length() const pure nothrow @safe
2408     {
2409         return empty ? 0 : _filepaths.length + 1;
2410     }
2411 
2412     bool empty() const pure nothrow @safe
2413     {
2414         return _front is null;
2415     }
2416 
2417     ByLineSourceType front() pure @safe
2418     {
2419         assert(!empty, "Attempt to take the front of an empty ByLineSourceRange");
2420         return _front;
2421     }
2422 
2423     void popFront()
2424     {
2425         assert(!empty, "Attempt to popFront an empty ByLineSourceRange");
2426 
2427         _front.close;
2428 
2429         if (!_filepaths.empty)
2430         {
2431             _front = new ByLineSourceType(_filepaths.front);
2432             _front.open;
2433             _filepaths.popFront;
2434         }
2435         else
2436         {
2437             _front = null;
2438         }
2439     }
2440 }
2441 
2442 /**
2443 ByLineSource is a class of objects produced by iterating over an ByLineSourceRange.
2444 
2445 A ByLineSource instance provides a bufferedByLine range for the current the front
2446 element of a ByLineSourceRange. The main methods application code is likely to
2447 need are:
2448 
2449 $(LIST
2450     * `byLine()` - Returns the bufferedByLine range accessing the open file. The file
2451        will be open for reading (using the bufferedByLine range) as long as the
2452        ByLineSource instance is the front element of the ByLineSourceRange
2453        it came from.
2454 
2455     * `name()` - The name of the input source. The name returned is intended for
2456       user error messages. For files, this is the filepath that was passed to
2457       ByLineSourceRange. For standard input, it is "Standard Input".
2458 )
2459 
2460 A ByLineSource is a reference object, so the copies have the same state as the
2461 ByLineSourceRange front element. In particular, all copies will have the open
2462 state of the front element of the ByLineSourceRange.
2463 
2464 This class is not intended for use outside the context of an ByLineSourceRange.
2465 */
2466 final class ByLineSource(
2467     KeepTerminator keepTerminator, Char = char, ubyte terminator = '\n')
2468 if (is(Char == char) || is(Char == ubyte))
2469 {
2470     import std.range;
2471     import std.stdio;
2472     import std.traits : ReturnType;
2473 
2474     alias newByLineFn = bufferedByLine!(keepTerminator, char, terminator);
2475     alias ByLineType = ReturnType!newByLineFn;
2476 
2477     private immutable string _filepath;
2478     private immutable bool _isStdin;
2479     private bool _isOpen;
2480     private bool _hasBeenOpened;
2481     private File _file;
2482     private ByLineType _byLineRange;
2483 
2484     private this(string filepath) pure nothrow @safe
2485     {
2486         _filepath = filepath;
2487         _isStdin = filepath == "-";
2488         _isOpen = false;
2489         _hasBeenOpened = false;
2490     }
2491 
2492     /** byLine returns the bufferedByLine object held by the ByLineSource instance.
2493      *
2494      * The File underlying the BufferedByLine object is open for reading as long as
2495      * the ByLineSource instance is the front element of the ByLineSourceRange it
2496      * came from.
2497      */
2498     ByLineType byLine() nothrow @safe
2499     {
2500         return _byLineRange;
2501     }
2502 
2503     /** name returns a user friendly name representing the underlying input source.
2504      *
2505      * For files, it is the filepath provided to ByLineSourceRange. For standard
2506      * input, it is "Standard Input". (Use isStdin() to test for standard input,
2507      * compare against name().)
2508      */
2509     string name() const pure nothrow @safe
2510     {
2511         return _isStdin ? "Standard Input" : _filepath;
2512     }
2513 
2514     /** isStdin returns true if the underlying input source is Standard Input, false
2515      * otherwise.
2516      */
2517     bool isStdin() const pure nothrow @safe
2518     {
2519         return _isStdin;
2520     }
2521 
2522     /** isOpen returns true if the ByLineSource instance is open for reading, false
2523      * otherwise.
2524      *
2525      * "Open" in this context is whether the ByLineSource object is currently "open".
2526      * The underlying input source backing it does not necessarily have the same
2527      * state. The ByLineSource instance is "open" if is the front element of the
2528      * ByLineSourceRange that created it.
2529      *
2530      * The underlying input source object follows the same open/close state as makes
2531      * sense. In particular, real files are closed when the ByLineSource object is
2532      * closed. The exception is standard input, which is never actually closed.
2533      */
2534     bool isOpen() const pure nothrow @safe
2535     {
2536         return _isOpen;
2537     }
2538 
2539     private void open()
2540     {
2541         assert(!_isOpen);
2542         assert(!_hasBeenOpened);
2543 
2544         _file = isStdin ? stdin : _filepath.File("rb");
2545         _byLineRange = newByLineFn(_file);
2546         _isOpen = true;
2547         _hasBeenOpened = true;
2548     }
2549 
2550     private void close()
2551     {
2552         if (!_isStdin) _file.close;
2553         _isOpen = false;
2554     }
2555 }
2556 
2557 // ByLineSourceRange and ByLineSource
2558 unittest
2559 {
2560     import std.algorithm : all, each;
2561     import std.array : appender;
2562     import std.exception : assertThrown;
2563     import std.file : rmdirRecurse;
2564     import std.path : buildPath;
2565     import std.range;
2566     import std.stdio;
2567     import tsv_utils.common.unittest_utils;
2568 
2569     auto testDir = makeUnittestTempDir("tsv_utils_byline_input_source_range");
2570     scope(exit) testDir.rmdirRecurse;
2571 
2572     string file0 = buildPath(testDir, "file0.txt");
2573     string file1 = buildPath(testDir, "file1.txt");
2574     string file2 = buildPath(testDir, "file2.txt");
2575     string file3 = buildPath(testDir, "file3.txt");
2576 
2577     string file0Header = "";
2578     string file1Header = "file 1 header\n";
2579     string file2Header = "file 2 header\n";
2580     string file3Header = "file 3 header\n";
2581 
2582     string file0Body = "";
2583     string file1Body = "";
2584     string file2Body = "file 2 line 1\n";
2585     string file3Body = "file 3 line 1\nfile 3 line 2\n";
2586 
2587     string file0Data = file0Header ~ file0Body;
2588     string file1Data = file1Header ~ file1Body;
2589     string file2Data = file2Header ~ file2Body;
2590     string file3Data = file3Header ~ file3Body;
2591 
2592     {
2593         file0.File("w").write(file0Data);
2594         file1.File("w").write(file1Data);
2595         file2.File("w").write(file2Data);
2596         file3.File("w").write(file3Data);
2597     }
2598 
2599     auto inputFiles = [file0, file1, file2, file3];
2600     auto fileHeaders = [file0Header, file1Header, file2Header, file3Header];
2601     auto fileBodies = [file0Body, file1Body, file2Body, file3Body];
2602     auto fileData = [file0Data, file1Data, file2Data, file3Data];
2603 
2604     auto buffer = new char[1024];    // Must be large enough to hold the test files.
2605 
2606     /* Tests without standard input. Don't want to count on state of standard
2607      * input or modifying it when doing unit tests, so avoid reading from it.
2608      */
2609 
2610     auto readSourcesNoTerminator = appender!(ByLineSource!(No.keepTerminator)[]);
2611     auto readSourcesYesTerminator = appender!(ByLineSource!(Yes.keepTerminator)[]);
2612 
2613     foreach(numFiles; 1 .. inputFiles.length + 1)
2614     {
2615         /* Using No.keepTerminator. */
2616         readSourcesNoTerminator.clear;
2617         auto inputSourcesNoTerminator = byLineSourceRange!(No.keepTerminator)(inputFiles[0 .. numFiles]);
2618         assert(inputSourcesNoTerminator.length == numFiles);
2619 
2620         foreach(fileNum, source; inputSourcesNoTerminator.enumerate)
2621         {
2622             readSourcesNoTerminator.put(source);
2623             assert(source.isOpen);
2624             assert(source._file.isOpen);
2625             assert(readSourcesNoTerminator.data[0 .. fileNum].all!(s => !s.isOpen));
2626             assert(readSourcesNoTerminator.data[fileNum].isOpen);
2627 
2628             auto headerNoTerminatorLength = fileHeaders[fileNum].length;
2629             if (headerNoTerminatorLength > 0) --headerNoTerminatorLength;
2630 
2631             assert(source.byLine.empty ||
2632                    source.byLine.front == fileHeaders[fileNum][0 .. headerNoTerminatorLength]);
2633 
2634             assert(source.name == inputFiles[fileNum]);
2635             assert(!source.isStdin);
2636 
2637             auto readFileData = appender!(char[]);
2638             foreach(line; source.byLine)
2639             {
2640                 readFileData.put(line);
2641                 readFileData.put('\n');
2642             }
2643 
2644             assert(readFileData.data == fileData[fileNum]);
2645         }
2646 
2647         /* The ByLineSourceRange is a reference range, consumed by the foreach. */
2648         assert(inputSourcesNoTerminator.empty);
2649 
2650         /* Using Yes.keepTerminator. */
2651         readSourcesYesTerminator.clear;
2652         auto inputSourcesYesTerminator = byLineSourceRange!(Yes.keepTerminator)(inputFiles[0 .. numFiles]);
2653         assert(inputSourcesYesTerminator.length == numFiles);
2654 
2655         foreach(fileNum, source; inputSourcesYesTerminator.enumerate)
2656         {
2657             readSourcesYesTerminator.put(source);
2658             assert(source.isOpen);
2659             assert(source._file.isOpen);
2660             assert(readSourcesYesTerminator.data[0 .. fileNum].all!(s => !s.isOpen));
2661             assert(readSourcesYesTerminator.data[fileNum].isOpen);
2662 
2663             assert(source.byLine.empty || source.byLine.front == fileHeaders[fileNum]);
2664 
2665             assert(source.name == inputFiles[fileNum]);
2666             assert(!source.isStdin);
2667 
2668             auto readFileData = appender!(char[]);
2669             foreach(line; source.byLine)
2670             {
2671                 readFileData.put(line);
2672             }
2673 
2674             assert(readFileData.data == fileData[fileNum]);
2675         }
2676 
2677         /* The ByLineSourceRange is a reference range, consumed by the foreach. */
2678         assert(inputSourcesYesTerminator.empty);
2679     }
2680 
2681     /* Empty filelist. */
2682     string[] nofiles;
2683     {
2684         auto sources = byLineSourceRange!(No.keepTerminator)(nofiles);
2685         assert(sources.empty);
2686     }
2687     {
2688         auto sources = byLineSourceRange!(Yes.keepTerminator)(nofiles);
2689         assert(sources.empty);
2690     }
2691 
2692     /* Error cases. */
2693     assertThrown(byLineSourceRange!(No.keepTerminator)([file0, "no_such_file.txt"]).each);
2694     assertThrown(byLineSourceRange!(Yes.keepTerminator)(["no_such_file.txt", file1]).each);
2695 }