1 /**
2 Utilities used by tsv-utils applications. InputFieldReordering, BufferedOutputRange,
3 and a several others.
4 
5 Utilities in this file:
6 $(LIST
7     * [InputFieldReordering] - A class that creates a reordered subset of fields from
8       an input line. Fields in the subset are accessed by array indicies. This is
9       especially useful when processing the subset in a specific order, such as the
10       order listed on the command-line at run-time.
11 
12     * [BufferedOutputRange] - An OutputRange with an internal buffer used to buffer
13       output. Intended for use with stdout, it is a significant performance benefit.
14 
15     * [bufferedByLine] - An input range that reads from a File handle line by line.
16       It is similar to the standard library method std.stdio.File.byLine, but quite a
17       bit faster. This is achieved by reading in larger blocks and buffering.
18 
19     * [joinAppend] - A function that performs a join, but appending the join output to
20       an output stream. It is a performance improvement over using join or joiner with
21       writeln.
22 
23     * [getTsvFieldValue] - A convenience function when only a single value is needed from
24       an input line.
25 
26     * Field-lists: [parseFieldList], [makeFieldListOptionHandler] - Helper functions for
27       parsing field-lists entered on the command line.
28 
29     * [throwIfWindowsNewlineOnUnix] - A utility for Unix platform builds to detecting
30       Windows newlines in input.
31 )
32 
33 Copyright (c) 2015-2020, eBay Inc.
34 Initially written by Jon Degenhardt
35 
36 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt)
37 */
38 
39 module tsv_utils.common.utils;
40 
41 import std.range;
42 import std.traits : isIntegral, isSomeChar, isSomeString, isUnsigned;
43 import std.typecons : Flag, No, Yes;
44 
45 // InputFieldReording class.
46 
47 /** Flag used by the InputFieldReordering template. */
48 alias EnablePartialLines = Flag!"enablePartialLines";
49 
50 /**
51 InputFieldReordering - Move select fields from an input line to an output array,
52 reordering along the way.
53 
54 The InputFieldReordering class is used to reorder a subset of fields from an input line.
55 The caller instantiates an InputFieldReordering object at the start of input processing.
56 The instance contains a mapping from input index to output index, plus a buffer holding
57 the reordered fields. The caller processes each input line by calling initNewLine,
58 splitting the line into fields, and calling processNextField on each field. The output
59 buffer is ready when the allFieldsFilled method returns true.
60 
61 Fields are not copied, instead the output buffer points to the fields passed by the caller.
62 The caller needs to use or copy the output buffer while the fields are still valid, which
63 is normally until reading the next input line. The program below illustrates the basic use
64 case. It reads stdin and outputs fields [3, 0, 2], in that order. (See also joinAppend,
65 below, which has a performance improvement over join used here.)
66 
67 ---
68 int main(string[] args)
69 {
70     import tsv_utils.common.utils;
71     import std.algorithm, std.array, std.range, std.stdio;
72     size_t[] fieldIndicies = [3, 0, 2];
73     auto fieldReordering = new InputFieldReordering!char(fieldIndicies);
74     foreach (line; stdin.byLine)
75     {
76         fieldReordering.initNewLine;
77         foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate)
78         {
79             fieldReordering.processNextField(fieldIndex, fieldValue);
80             if (fieldReordering.allFieldsFilled) break;
81         }
82         if (fieldReordering.allFieldsFilled)
83         {
84             writeln(fieldReordering.outputFields.join('\t'));
85         }
86         else
87         {
88             writeln("Error: Insufficient number of field on the line.");
89         }
90     }
91     return 0;
92 }
93 ---
94 
95 Field indicies are zero-based. An individual field can be listed multiple times. The
96 outputFields array is not valid until all the specified fields have been processed. The
97 allFieldsFilled method tests this. If a line does not have enough fields the outputFields
98 buffer cannot be used. For most TSV applications this is okay, as it means the line is
99 invalid and cannot be used. However, if partial lines are okay, the template can be
100 instantiated with EnablePartialLines.yes. This will ensure that any fields not filled-in
101 are empty strings in the outputFields return.
102 */
103 final class InputFieldReordering(C, EnablePartialLines partialLinesOk = EnablePartialLines.no)
104 if (isSomeChar!C)
105 {
106     /* Implementation: The class works by creating an array of tuples mapping the input
107      * field index to the location in the outputFields array. The 'fromToMap' array is
108      * sorted in input field order, enabling placement in the outputFields buffer during a
109      * pass over the input fields. The map is created by the constructor. An example:
110      *
111      *    inputFieldIndicies: [3, 0, 7, 7, 1, 0, 9]
112      *             fromToMap: [<0,1>, <0,5>, <1,4>, <3,0>, <7,2>, <7,3>, <9,6>]
113      *
114      * During processing of an a line, an array slice, mapStack, is used to track how
115      * much of the fromToMap remains to be processed.
116      */
117     import std.range;
118     import std.typecons : Tuple;
119 
120     alias TupleFromTo = Tuple!(size_t, "from", size_t, "to");
121 
122     private C[][] outputFieldsBuf;
123     private TupleFromTo[] fromToMap;
124     private TupleFromTo[] mapStack;
125 
126     final this(const ref size_t[] inputFieldIndicies, size_t start = 0) pure nothrow @safe
127     {
128         import std.algorithm : sort;
129 
130         outputFieldsBuf = new C[][](inputFieldIndicies.length);
131         fromToMap.reserve(inputFieldIndicies.length);
132 
133         foreach (to, from; inputFieldIndicies.enumerate(start))
134         {
135             fromToMap ~= TupleFromTo(from, to);
136         }
137 
138         sort(fromToMap);
139         initNewLine;
140     }
141 
142     /** initNewLine initializes the object for a new line. */
143     final void initNewLine() pure nothrow @safe
144     {
145         mapStack = fromToMap;
146         static if (partialLinesOk)
147         {
148             import std.algorithm : each;
149             outputFieldsBuf.each!((ref s) => s.length = 0);
150         }
151     }
152 
153     /** processNextField maps an input field to the correct locations in the
154      * outputFields array.
155      *
156      * processNextField should be called once for each field on the line, in the order
157      * found. The processing of the line can terminate once allFieldsFilled returns
158      * true.
159      *
160      * The return value is the number of output fields the input field maps to. Zero
161      * means the field is not mapped to the output fields array.
162      *
163      * If, prior to allFieldsProcessed returning true, any fields on the input line
164      * are not passed to processNextField, the caller should either ensure the fields
165      * are not part of the output fields or have partial lines enabled.
166      */
167     final size_t processNextField(size_t fieldIndex, C[] fieldValue) pure nothrow @safe @nogc
168     {
169         size_t numFilled = 0;
170         while (!mapStack.empty && fieldIndex == mapStack.front.from)
171         {
172             outputFieldsBuf[mapStack.front.to] = fieldValue;
173             mapStack.popFront;
174             numFilled++;
175         }
176         return numFilled;
177     }
178 
179     /** allFieldsFilled returned true if all fields expected have been processed. */
180     final bool allFieldsFilled() const pure nothrow @safe @nogc
181     {
182         return mapStack.empty;
183     }
184 
185     /** outputFields is the assembled output fields. Unless partial lines are enabled,
186      * it is only valid after allFieldsFilled is true.
187      */
188     final C[][] outputFields() pure nothrow @safe @nogc
189     {
190         return outputFieldsBuf[];
191     }
192 }
193 
194 /* Tests using different character types. */
195 @safe unittest
196 {
197     import std.conv : to;
198 
199     auto inputLines = [["r1f0", "r1f1", "r1f2",   "r1f3"],
200                        ["r2f0", "abc",  "ÀBCßßZ", "ghi"],
201                        ["r3f0", "123",  "456",    "789"]];
202 
203     size_t[] fields_2_0 = [2, 0];
204 
205     auto expected_2_0 = [["r1f2",   "r1f0"],
206                          ["ÀBCßßZ", "r2f0"],
207                          ["456",    "r3f0"]];
208 
209     char[][][]  charExpected_2_0 = to!(char[][][])(expected_2_0);
210     wchar[][][] wcharExpected_2_0 = to!(wchar[][][])(expected_2_0);
211     dchar[][][] dcharExpected_2_0 = to!(dchar[][][])(expected_2_0);
212     dstring[][] dstringExpected_2_0 = to!(dstring[][])(expected_2_0);
213 
214     auto charIFR  = new InputFieldReordering!char(fields_2_0);
215     auto wcharIFR = new InputFieldReordering!wchar(fields_2_0);
216     auto dcharIFR = new InputFieldReordering!dchar(fields_2_0);
217 
218     foreach (lineIndex, line; inputLines)
219     {
220         charIFR.initNewLine;
221         wcharIFR.initNewLine;
222         dcharIFR.initNewLine;
223 
224         foreach (fieldIndex, fieldValue; line)
225         {
226             charIFR.processNextField(fieldIndex, to!(char[])(fieldValue));
227             wcharIFR.processNextField(fieldIndex, to!(wchar[])(fieldValue));
228             dcharIFR.processNextField(fieldIndex, to!(dchar[])(fieldValue));
229 
230             assert ((fieldIndex >= 2) == charIFR.allFieldsFilled);
231             assert ((fieldIndex >= 2) == wcharIFR.allFieldsFilled);
232             assert ((fieldIndex >= 2) == dcharIFR.allFieldsFilled);
233         }
234         assert(charIFR.allFieldsFilled);
235         assert(wcharIFR.allFieldsFilled);
236         assert(dcharIFR.allFieldsFilled);
237 
238         assert(charIFR.outputFields == charExpected_2_0[lineIndex]);
239         assert(wcharIFR.outputFields == wcharExpected_2_0[lineIndex]);
240         assert(dcharIFR.outputFields == dcharExpected_2_0[lineIndex]);
241     }
242 }
243 
244 /* Test of partial line support. */
245 @safe unittest
246 {
247     import std.conv : to;
248 
249     auto inputLines = [["r1f0", "r1f1", "r1f2",   "r1f3"],
250                        ["r2f0", "abc",  "ÀBCßßZ", "ghi"],
251                        ["r3f0", "123",  "456",    "789"]];
252 
253     size_t[] fields_2_0 = [2, 0];
254 
255     // The expected states of the output field while each line and field are processed.
256     auto expectedBylineByfield_2_0 =
257         [
258             [["", "r1f0"], ["", "r1f0"], ["r1f2", "r1f0"],   ["r1f2", "r1f0"]],
259             [["", "r2f0"], ["", "r2f0"], ["ÀBCßßZ", "r2f0"], ["ÀBCßßZ", "r2f0"]],
260             [["", "r3f0"], ["", "r3f0"], ["456", "r3f0"],    ["456", "r3f0"]],
261         ];
262 
263     char[][][][]  charExpectedBylineByfield_2_0 = to!(char[][][][])(expectedBylineByfield_2_0);
264 
265     auto charIFR  = new InputFieldReordering!(char, EnablePartialLines.yes)(fields_2_0);
266 
267     foreach (lineIndex, line; inputLines)
268     {
269         charIFR.initNewLine;
270         foreach (fieldIndex, fieldValue; line)
271         {
272             charIFR.processNextField(fieldIndex, to!(char[])(fieldValue));
273             assert(charIFR.outputFields == charExpectedBylineByfield_2_0[lineIndex][fieldIndex]);
274         }
275     }
276 }
277 
278 /* Field combination tests. */
279 @safe unittest
280 {
281     import std.conv : to;
282     import std.stdio;
283 
284     auto inputLines = [["00", "01", "02", "03"],
285                        ["10", "11", "12", "13"],
286                        ["20", "21", "22", "23"]];
287 
288     size_t[] fields_0 = [0];
289     size_t[] fields_3 = [3];
290     size_t[] fields_01 = [0, 1];
291     size_t[] fields_10 = [1, 0];
292     size_t[] fields_03 = [0, 3];
293     size_t[] fields_30 = [3, 0];
294     size_t[] fields_0123 = [0, 1, 2, 3];
295     size_t[] fields_3210 = [3, 2, 1, 0];
296     size_t[] fields_03001 = [0, 3, 0, 0, 1];
297 
298     auto expected_0 = to!(char[][][])([["00"],
299                                        ["10"],
300                                        ["20"]]);
301 
302     auto expected_3 = to!(char[][][])([["03"],
303                                        ["13"],
304                                        ["23"]]);
305 
306     auto expected_01 = to!(char[][][])([["00", "01"],
307                                         ["10", "11"],
308                                         ["20", "21"]]);
309 
310     auto expected_10 = to!(char[][][])([["01", "00"],
311                                         ["11", "10"],
312                                         ["21", "20"]]);
313 
314     auto expected_03 = to!(char[][][])([["00", "03"],
315                                         ["10", "13"],
316                                         ["20", "23"]]);
317 
318     auto expected_30 = to!(char[][][])([["03", "00"],
319                                         ["13", "10"],
320                                         ["23", "20"]]);
321 
322     auto expected_0123 = to!(char[][][])([["00", "01", "02", "03"],
323                                           ["10", "11", "12", "13"],
324                                           ["20", "21", "22", "23"]]);
325 
326     auto expected_3210 = to!(char[][][])([["03", "02", "01", "00"],
327                                           ["13", "12", "11", "10"],
328                                           ["23", "22", "21", "20"]]);
329 
330     auto expected_03001 = to!(char[][][])([["00", "03", "00", "00", "01"],
331                                            ["10", "13", "10", "10", "11"],
332                                            ["20", "23", "20", "20", "21"]]);
333 
334     auto ifr_0 = new InputFieldReordering!char(fields_0);
335     auto ifr_3 = new InputFieldReordering!char(fields_3);
336     auto ifr_01 = new InputFieldReordering!char(fields_01);
337     auto ifr_10 = new InputFieldReordering!char(fields_10);
338     auto ifr_03 = new InputFieldReordering!char(fields_03);
339     auto ifr_30 = new InputFieldReordering!char(fields_30);
340     auto ifr_0123 = new InputFieldReordering!char(fields_0123);
341     auto ifr_3210 = new InputFieldReordering!char(fields_3210);
342     auto ifr_03001 = new InputFieldReordering!char(fields_03001);
343 
344     foreach (lineIndex, line; inputLines)
345     {
346         ifr_0.initNewLine;
347         ifr_3.initNewLine;
348         ifr_01.initNewLine;
349         ifr_10.initNewLine;
350         ifr_03.initNewLine;
351         ifr_30.initNewLine;
352         ifr_0123.initNewLine;
353         ifr_3210.initNewLine;
354         ifr_03001.initNewLine;
355 
356         foreach (fieldIndex, fieldValue; line)
357         {
358             ifr_0.processNextField(fieldIndex, to!(char[])(fieldValue));
359             ifr_3.processNextField(fieldIndex, to!(char[])(fieldValue));
360             ifr_01.processNextField(fieldIndex, to!(char[])(fieldValue));
361             ifr_10.processNextField(fieldIndex, to!(char[])(fieldValue));
362             ifr_03.processNextField(fieldIndex, to!(char[])(fieldValue));
363             ifr_30.processNextField(fieldIndex, to!(char[])(fieldValue));
364             ifr_0123.processNextField(fieldIndex, to!(char[])(fieldValue));
365             ifr_3210.processNextField(fieldIndex, to!(char[])(fieldValue));
366             ifr_03001.processNextField(fieldIndex, to!(char[])(fieldValue));
367         }
368 
369         assert(ifr_0.outputFields == expected_0[lineIndex]);
370         assert(ifr_3.outputFields == expected_3[lineIndex]);
371         assert(ifr_01.outputFields == expected_01[lineIndex]);
372         assert(ifr_10.outputFields == expected_10[lineIndex]);
373         assert(ifr_03.outputFields == expected_03[lineIndex]);
374         assert(ifr_30.outputFields == expected_30[lineIndex]);
375         assert(ifr_0123.outputFields == expected_0123[lineIndex]);
376         assert(ifr_3210.outputFields == expected_3210[lineIndex]);
377         assert(ifr_03001.outputFields == expected_03001[lineIndex]);
378     }
379 }
380 
381 
382 import std.stdio : File, isFileHandle, KeepTerminator;
383 import std.range : isOutputRange;
384 import std.traits : Unqual;
385 
386 /**
387 BufferedOutputRange is a performance enhancement over writing directly to an output
388 stream. It holds a File open for write or an OutputRange. Ouput is accumulated in an
389 internal buffer and written to the output stream as a block.
390 
391 Writing to stdout is a key use case. BufferedOutputRange is often dramatically faster
392 than writing to stdout directly. This is especially noticable for outputs with short
393 lines, as it blocks many writes together in a single write.
394 
395 The internal buffer is written to the output stream after flushSize has been reached.
396 This is checked at newline boundaries, when appendln is called or when put is called
397 with a single newline character. Other writes check maxSize, which is used to avoid
398 runaway buffers.
399 
400 
401 BufferedOutputRange has a put method allowing it to be used a range. It has a number
402 of other methods providing additional control.
403 
404 $(LIST
405     * `this(outputStream [, flushSize, reserveSize, maxSize])` - Constructor. Takes the
406       output stream, e.g. stdout. Other arguments are optional, defaults normally suffice.
407 
408     * `append(stuff)` - Append to the internal buffer.
409 
410     * `appendln(stuff)` - Append to the internal buffer, followed by a newline. The buffer
411       is flushed to the output stream if is has reached flushSize.
412 
413     * `appendln()` - Append a newline to the internal buffer. The buffer is flushed to the
414       output stream if is has reached flushSize.
415 
416     * `joinAppend(inputRange, delim)` - An optimization of `append(inputRange.joiner(delim))`.
417       For reasons that are not clear, joiner is quite slow.
418 
419     * `flushIfFull()` - Flush the internal buffer to the output stream if flushSize has been
420       reached.
421 
422     * `flush()` - Write the internal buffer to the output stream.
423 
424     * `put(stuff)` - Appends to the internal buffer. Acts as `appendln()` if passed a single
425       newline character, '\n' or "\n".
426 )
427 
428 The internal buffer is automatically flushed when the BufferedOutputRange goes out of
429 scope.
430 */
431 struct BufferedOutputRange(OutputTarget)
432 if (isFileHandle!(Unqual!OutputTarget) || isOutputRange!(Unqual!OutputTarget, char))
433 {
434     import std.range : isOutputRange;
435     import std.array : appender;
436     import std.format : format;
437 
438     /* Identify the output element type. Only supporting char and ubyte for now. */
439     static if (isFileHandle!OutputTarget || isOutputRange!(OutputTarget, char))
440     {
441         alias C = char;
442     }
443     else static if (isOutputRange!(OutputTarget, ubyte))
444     {
445         alias C = ubyte;
446     }
447     else static assert(false);
448 
449     private enum defaultReserveSize = 11264;
450     private enum defaultFlushSize = 10240;
451     private enum defaultMaxSize = 4194304;
452 
453     private OutputTarget _outputTarget;
454     private auto _outputBuffer = appender!(C[]);
455     private immutable size_t _flushSize;
456     private immutable size_t _maxSize;
457 
458     this(OutputTarget outputTarget,
459          size_t flushSize = defaultFlushSize,
460          size_t reserveSize = defaultReserveSize,
461          size_t maxSize = defaultMaxSize)
462     @safe
463     {
464         assert(flushSize <= maxSize);
465 
466         _outputTarget = outputTarget;
467         _flushSize = flushSize;
468         _maxSize = (flushSize <= maxSize) ? maxSize : flushSize;
469         _outputBuffer.reserve(reserveSize);
470     }
471 
472     ~this() @safe
473     {
474         flush();
475     }
476 
477     void flush() @safe
478     {
479         static if (isFileHandle!OutputTarget) _outputTarget.write(_outputBuffer.data);
480         else _outputTarget.put(_outputBuffer.data);
481 
482         _outputBuffer.clear;
483     }
484 
485     bool flushIfFull() @safe
486     {
487         bool isFull = _outputBuffer.data.length >= _flushSize;
488         if (isFull) flush();
489         return isFull;
490     }
491 
492     /* flushIfMaxSize is a safety check to avoid runaway buffer growth. */
493     void flushIfMaxSize() @safe
494     {
495         if (_outputBuffer.data.length >= _maxSize) flush();
496     }
497 
498     /* maybeFlush is intended for the case where put is called with a trailing newline.
499      *
500      * Flushing occurs if the buffer has a trailing newline and has reached flush size.
501      * Flushing also occurs if the buffer has reached max size.
502      */
503     private bool maybeFlush() @safe
504     {
505         immutable bool doFlush =
506             _outputBuffer.data.length >= _flushSize &&
507             (_outputBuffer.data[$-1] == '\n' || _outputBuffer.data.length >= _maxSize);
508 
509         if (doFlush) flush();
510         return doFlush;
511     }
512 
513 
514     private void appendRaw(T)(T stuff) pure @safe
515     {
516         import std.range : rangePut = put;
517         rangePut(_outputBuffer, stuff);
518     }
519 
520     void append(T)(T stuff) @safe
521     {
522         appendRaw(stuff);
523         maybeFlush();
524     }
525 
526     bool appendln() @safe
527     {
528         appendRaw('\n');
529         return flushIfFull();
530     }
531 
532     bool appendln(T)(T stuff)
533     {
534         appendRaw(stuff);
535         return appendln();
536     }
537 
538     /* joinAppend is an optimization of append(inputRange.joiner(delimiter).
539      * This form is quite a bit faster, 40%+ on some benchmarks.
540      */
541     void joinAppend(InputRange, E)(InputRange inputRange, E delimiter)
542     if (isInputRange!InputRange &&
543         is(ElementType!InputRange : const C[]) &&
544         (is(E : const C[]) || is(E : const C)))
545     {
546         if (!inputRange.empty)
547         {
548             appendRaw(inputRange.front);
549             inputRange.popFront;
550         }
551         foreach (x; inputRange)
552         {
553             appendRaw(delimiter);
554             appendRaw(x);
555         }
556         flushIfMaxSize();
557     }
558 
559     /* Make this an output range. */
560     void put(T)(T stuff)
561     {
562         import std.traits;
563         import std.stdio;
564 
565         static if (isSomeChar!T)
566         {
567             if (stuff == '\n') appendln();
568             else appendRaw(stuff);
569         }
570         else static if (isSomeString!T)
571         {
572             if (stuff == "\n") appendln();
573             else append(stuff);
574         }
575         else append(stuff);
576     }
577 }
578 
579 unittest
580 {
581     import tsv_utils.common.unittest_utils;
582     import std.file : rmdirRecurse, readText;
583     import std.path : buildPath;
584 
585     auto testDir = makeUnittestTempDir("tsv_utils_buffered_output");
586     scope(exit) testDir.rmdirRecurse;
587 
588     import std.algorithm : map, joiner;
589     import std.range : iota;
590     import std.conv : to;
591 
592     /* Basic test. Note that exiting the scope triggers flush. */
593     string filepath1 = buildPath(testDir, "file1.txt");
594     {
595         import std.stdio : File;
596 
597         auto ostream = BufferedOutputRange!File(filepath1.File("w"));
598         ostream.append("file1: ");
599         ostream.append("abc");
600         ostream.append(["def", "ghi", "jkl"]);
601         ostream.appendln(100.to!string);
602         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
603         ostream.appendln();
604     }
605     assert(filepath1.readText == "file1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
606 
607     /* Test with no reserve and no flush at every line. */
608     string filepath2 = buildPath(testDir, "file2.txt");
609     {
610         import std.stdio : File;
611 
612         auto ostream = BufferedOutputRange!File(filepath2.File("w"), 0, 0);
613         ostream.append("file2: ");
614         ostream.append("abc");
615         ostream.append(["def", "ghi", "jkl"]);
616         ostream.appendln("100");
617         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
618         ostream.appendln();
619     }
620     assert(filepath2.readText == "file2: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
621 
622     /* With a locking text writer. Requires version 2.078.0
623        See: https://issues.dlang.org/show_bug.cgi?id=9661
624      */
625     static if (__VERSION__ >= 2078)
626     {
627         string filepath3 = buildPath(testDir, "file3.txt");
628         {
629             import std.stdio : File;
630 
631             auto ltw = filepath3.File("w").lockingTextWriter;
632             {
633                 auto ostream = BufferedOutputRange!(typeof(ltw))(ltw);
634                 ostream.append("file3: ");
635                 ostream.append("abc");
636                 ostream.append(["def", "ghi", "jkl"]);
637                 ostream.appendln("100");
638                 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
639                 ostream.appendln();
640             }
641         }
642         assert(filepath3.readText == "file3: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
643     }
644 
645     /* With an Appender. */
646     import std.array : appender;
647     auto app1 = appender!(char[]);
648     {
649         auto ostream = BufferedOutputRange!(typeof(app1))(app1);
650         ostream.append("appender1: ");
651         ostream.append("abc");
652         ostream.append(["def", "ghi", "jkl"]);
653         ostream.appendln("100");
654         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
655         ostream.appendln();
656     }
657     assert(app1.data == "appender1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
658 
659     /* With an Appender, but checking flush boundaries. */
660     auto app2 = appender!(char[]);
661     {
662         auto ostream = BufferedOutputRange!(typeof(app2))(app2, 10, 0); // Flush if 10+
663         bool wasFlushed = false;
664 
665         assert(app2.data == "");
666 
667         ostream.append("12345678"); // Not flushed yet.
668         assert(app2.data == "");
669 
670         wasFlushed = ostream.appendln;  // Nineth char, not flushed yet.
671         assert(!wasFlushed);
672         assert(app2.data == "");
673 
674         wasFlushed = ostream.appendln;  // Tenth char, now flushed.
675         assert(wasFlushed);
676         assert(app2.data == "12345678\n\n");
677 
678         app2.clear;
679         assert(app2.data == "");
680 
681         ostream.append("12345678");
682 
683         wasFlushed = ostream.flushIfFull;
684         assert(!wasFlushed);
685         assert(app2.data == "");
686 
687         ostream.flush;
688         assert(app2.data == "12345678");
689 
690         app2.clear;
691         assert(app2.data == "");
692 
693         ostream.append("123456789012345");
694         assert(app2.data == "");
695     }
696     assert(app2.data == "123456789012345");
697 
698     /* Using joinAppend. */
699     auto app1b = appender!(char[]);
700     {
701         auto ostream = BufferedOutputRange!(typeof(app1b))(app1b);
702         ostream.append("appenderB: ");
703         ostream.joinAppend(["a", "bc", "def"], '-');
704         ostream.append(':');
705         ostream.joinAppend(["g", "hi", "jkl"], '-');
706         ostream.appendln("*100*");
707         ostream.joinAppend(iota(0, 6).map!(x => x.to!string), ' ');
708         ostream.append(' ');
709         ostream.joinAppend(iota(6, 10).map!(x => x.to!string), " ");
710         ostream.appendln();
711     }
712     assert(app1b.data == "appenderB: a-bc-def:g-hi-jkl*100*\n0 1 2 3 4 5 6 7 8 9\n",
713            "app1b.data: |" ~app1b.data ~ "|");
714 
715     /* Operating as an output range. When passed to a function as a ref, exiting
716      * the function does not flush. When passed as a value, it get flushed when
717      * the function returns. Also test both UCFS and non-UFCS styles.
718      */
719 
720     void outputStuffAsRef(T)(ref T range)
721     if (isOutputRange!(T, char))
722     {
723         range.put('1');
724         put(range, "23");
725         range.put('\n');
726         range.put(["5", "67"]);
727         put(range, iota(8, 10).map!(x => x.to!string));
728         put(range, "\n");
729     }
730 
731     void outputStuffAsVal(T)(T range)
732     if (isOutputRange!(T, char))
733     {
734         put(range, '1');
735         range.put("23");
736         put(range, '\n');
737         put(range, ["5", "67"]);
738         range.put(iota(8, 10).map!(x => x.to!string));
739         range.put("\n");
740     }
741 
742     auto app3 = appender!(char[]);
743     {
744         auto ostream = BufferedOutputRange!(typeof(app3))(app3, 12, 0);
745         outputStuffAsRef(ostream);
746         assert(app3.data == "", "app3.data: |" ~app3.data ~ "|");
747         outputStuffAsRef(ostream);
748         assert(app3.data == "123\n56789\n123\n", "app3.data: |" ~app3.data ~ "|");
749     }
750     assert(app3.data == "123\n56789\n123\n56789\n", "app3.data: |" ~app3.data ~ "|");
751 
752     auto app4 = appender!(char[]);
753     {
754         auto ostream = BufferedOutputRange!(typeof(app4))(app4, 12, 0);
755         outputStuffAsVal(ostream);
756         assert(app4.data == "123\n56789\n", "app4.data: |" ~app4.data ~ "|");
757         outputStuffAsVal(ostream);
758         assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|");
759     }
760     assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|");
761 
762     /* Test maxSize. */
763     auto app5 = appender!(char[]);
764     {
765         auto ostream = BufferedOutputRange!(typeof(app5))(app5, 5, 0, 10); // maxSize 10
766         assert(app5.data == "");
767 
768         ostream.append("1234567");  // Not flushed yet (no newline).
769         assert(app5.data == "");
770 
771         ostream.append("89012");    // Flushed by maxSize
772         assert(app5.data == "123456789012");
773 
774         ostream.put("1234567");     // Not flushed yet (no newline).
775         assert(app5.data == "123456789012");
776 
777         ostream.put("89012");       // Flushed by maxSize
778         assert(app5.data == "123456789012123456789012");
779 
780         ostream.joinAppend(["ab", "cd"], '-');        // Not flushed yet
781         ostream.joinAppend(["de", "gh", "ij"], '-');  // Flushed by maxSize
782         assert(app5.data == "123456789012123456789012ab-cdde-gh-ij");
783     }
784     assert(app5.data == "123456789012123456789012ab-cdde-gh-ij");
785 }
786 
787 /**
788 bufferedByLine is a performance enhancement over std.stdio.File.byLine. It works by
789 reading a large buffer from the input stream rather than just a single line.
790 
791 The file argument needs to be a File object open for reading, typically a filesystem
792 file or standard input. Use the Yes.keepTerminator template parameter to keep the
793 newline. This is similar to stdio.File.byLine, except specified as a template paramter
794 rather than a runtime parameter.
795 
796 Reading in blocks does mean that input is not read until a full buffer is available or
797 end-of-file is reached. For this reason, bufferedByLine is not appropriate for
798 interactive input.
799 */
800 
801 auto bufferedByLine(KeepTerminator keepTerminator = No.keepTerminator, Char = char,
802                     ubyte terminator = '\n', size_t readSize = 1024 * 128, size_t growSize = 1024 * 16)
803     (File file)
804 if (is(Char == char) || is(Char == ubyte))
805 {
806     static assert(0 < growSize && growSize <= readSize);
807 
808     static final class BufferedByLineImpl
809     {
810         /* Buffer state variables
811          *   - _buffer.length - Full length of allocated buffer.
812          *   - _dataEnd - End of currently valid data (end of last read).
813          *   - _lineStart - Start of current line.
814          *   - _lineEnd - End of current line.
815          */
816         private File _file;
817         private ubyte[] _buffer;
818         private size_t _lineStart = 0;
819         private size_t _lineEnd = 0;
820         private size_t _dataEnd = 0;
821 
822         this (File f) @safe
823         {
824             _file = f;
825             _buffer = new ubyte[readSize + growSize];
826         }
827 
828         bool empty() const pure @safe
829         {
830             return _file.eof && _lineStart == _dataEnd;
831         }
832 
833         Char[] front()  pure @safe
834         {
835             assert(!empty, "Attempt to take the front of an empty bufferedByLine.");
836 
837             static if (keepTerminator == Yes.keepTerminator)
838             {
839                 return cast(Char[]) _buffer[_lineStart .. _lineEnd];
840             }
841             else
842             {
843                 assert(_lineStart < _lineEnd);
844                 immutable end = (_buffer[_lineEnd - 1] == terminator) ? _lineEnd - 1 : _lineEnd;
845                 return cast(Char[]) _buffer[_lineStart .. end];
846             }
847         }
848 
849         /* Note: Call popFront at initialization to do the initial read. */
850         void popFront() @safe
851         {
852             import std.algorithm: copy, find;
853             assert(!empty, "Attempt to popFront an empty bufferedByLine.");
854 
855             /* Pop the current line. */
856             _lineStart = _lineEnd;
857 
858             /* Set up the next line if more data is available, either in the buffer or
859              * the file. The next line ends at the next newline, if there is one.
860              *
861              * Notes:
862              * - 'find' returns the slice starting with the character searched for, or
863              *   an empty range if not found.
864              * - _lineEnd is set to _dataEnd both when the current buffer does not have
865              *   a newline and when it ends with one.
866              */
867             auto found = _buffer[_lineStart .. _dataEnd].find(terminator);
868             _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1;
869 
870             if (found.empty && !_file.eof)
871             {
872                 /* No newline in current buffer. Read from the file until the next
873                  * newline is found.
874                  */
875                 assert(_lineEnd == _dataEnd);
876 
877                 if (_lineStart > 0)
878                 {
879                     /* Move remaining data to the start of the buffer. */
880                     immutable remainingLength = _dataEnd - _lineStart;
881                     copy(_buffer[_lineStart .. _dataEnd], _buffer[0 .. remainingLength]);
882                     _lineStart = 0;
883                     _lineEnd = _dataEnd = remainingLength;
884                 }
885 
886                 do
887                 {
888                     /* Grow the buffer if necessary. */
889                     immutable availableSize = _buffer.length - _dataEnd;
890                     if (availableSize < readSize)
891                     {
892                         size_t growBy = growSize;
893                         while (availableSize + growBy < readSize) growBy += growSize;
894                         _buffer.length += growBy;
895                     }
896 
897                     /* Read the next block. */
898                     _dataEnd +=
899                         _file.rawRead(_buffer[_dataEnd .. _dataEnd + readSize])
900                         .length;
901 
902                     found = _buffer[_lineEnd .. _dataEnd].find(terminator);
903                     _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1;
904 
905                 } while (found.empty && !_file.eof);
906             }
907         }
908     }
909 
910     assert(file.isOpen, "bufferedByLine passed a closed file.");
911 
912     auto r = new BufferedByLineImpl(file);
913     r.popFront;
914     return r;
915 }
916 
917 unittest
918 {
919     import std.array : appender;
920     import std.conv : to;
921     import std.file : rmdirRecurse, readText;
922     import std.path : buildPath;
923     import std.range : lockstep;
924     import std.stdio;
925     import tsv_utils.common.unittest_utils;
926 
927     auto testDir = makeUnittestTempDir("tsv_utils_buffered_byline");
928     scope(exit) testDir.rmdirRecurse;
929 
930     /* Create two data files with the same data. Read both in parallel with byLine and
931      * bufferedByLine and compare each line.
932      */
933     auto data1 = appender!(char[])();
934 
935     foreach (i; 1 .. 1001) data1.put('\n');
936     foreach (i; 1 .. 1001) data1.put("a\n");
937     foreach (i; 1 .. 1001) { data1.put(i.to!string); data1.put('\n'); }
938     foreach (i; 1 .. 1001)
939     {
940         foreach (j; 1 .. i+1) data1.put('x');
941         data1.put('\n');
942     }
943 
944     string file1a = buildPath(testDir, "file1a.txt");
945     string file1b = buildPath(testDir, "file1b.txt");
946     {
947 
948         file1a.File("w").write(data1.data);
949         file1b.File("w").write(data1.data);
950     }
951 
952     /* Default parameters. */
953     {
954         auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator);
955         auto f1bIn = file1b.File().byLine(No.keepTerminator);
956         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
957     }
958     {
959         auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator);
960         auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
961         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
962     }
963 
964     /* Smaller read size. This will trigger buffer growth. */
965     {
966         auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', 512, 256);
967         auto f1bIn = file1b.File().byLine(No.keepTerminator);
968         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
969     }
970 
971     /* Exercise boundary cases in buffer growth.
972      * Note: static-foreach requires DMD 2.076 / LDC 1.6
973      */
974     static foreach (readSize; [1, 2, 4])
975     {
976         static foreach (growSize; 1 .. readSize + 1)
977         {{
978             auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
979             auto f1bIn = file1b.File().byLine(No.keepTerminator);
980             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
981         }}
982         static foreach (growSize; 1 .. readSize + 1)
983         {{
984             auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
985             auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
986             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
987         }}
988     }
989 
990 
991     /* Files that do not end in a newline. */
992 
993     string file2a = buildPath(testDir, "file2a.txt");
994     string file2b = buildPath(testDir, "file2b.txt");
995     string file3a = buildPath(testDir, "file3a.txt");
996     string file3b = buildPath(testDir, "file3b.txt");
997     string file4a = buildPath(testDir, "file4a.txt");
998     string file4b = buildPath(testDir, "file4b.txt");
999     {
1000         file1a.File("w").write("a");
1001         file1b.File("w").write("a");
1002         file2a.File("w").write("ab");
1003         file2b.File("w").write("ab");
1004         file3a.File("w").write("abc");
1005         file3b.File("w").write("abc");
1006     }
1007 
1008     static foreach (readSize; [1, 2, 4])
1009     {
1010         static foreach (growSize; 1 .. readSize + 1)
1011         {{
1012             auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1013             auto f1bIn = file1b.File().byLine(No.keepTerminator);
1014             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1015 
1016             auto f2aIn = file2a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1017             auto f2bIn = file2b.File().byLine(No.keepTerminator);
1018             foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1019 
1020             auto f3aIn = file3a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1021             auto f3bIn = file3b.File().byLine(No.keepTerminator);
1022             foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1023         }}
1024         static foreach (growSize; 1 .. readSize + 1)
1025         {{
1026             auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1027             auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
1028             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1029 
1030             auto f2aIn = file2a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1031             auto f2bIn = file2b.File().byLine(Yes.keepTerminator);
1032             foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1033 
1034             auto f3aIn = file3a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1035             auto f3bIn = file3b.File().byLine(Yes.keepTerminator);
1036             foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1037         }}
1038     }
1039 }
1040 
1041 /**
1042 joinAppend performs a join operation on an input range, appending the results to
1043 an output range.
1044 
1045 joinAppend was written as a performance enhancement over using std.algorithm.joiner
1046 or std.array.join with writeln. Using joiner with writeln is quite slow, 3-4x slower
1047 than std.array.join with writeln. The joiner performance may be due to interaction
1048 with writeln, this was not investigated. Using joiner with stdout.lockingTextWriter
1049 is better, but still substantially slower than join. Using join works reasonably well,
1050 but is allocating memory unnecessarily.
1051 
1052 Using joinAppend with Appender is a bit faster than join, and allocates less memory.
1053 The Appender re-uses the underlying data buffer, saving memory. The example below
1054 illustrates. It is a modification of the InputFieldReordering example. The role
1055 Appender plus joinAppend are playing is to buffer the output. BufferedOutputRange
1056 uses a similar technique to buffer multiple lines.
1057 
1058 Note: The original uses joinAppend have been replaced by BufferedOutputRange, which has
1059 its own joinAppend method. However, joinAppend remains useful when constructing internal
1060 buffers where BufferedOutputRange is not appropriate.
1061 
1062 ---
1063 int main(string[] args)
1064 {
1065     import tsvutil;
1066     import std.algorithm, std.array, std.range, std.stdio;
1067     size_t[] fieldIndicies = [3, 0, 2];
1068     auto fieldReordering = new InputFieldReordering!char(fieldIndicies);
1069     auto outputBuffer = appender!(char[]);
1070     foreach (line; stdin.byLine)
1071     {
1072         fieldReordering.initNewLine;
1073         foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate)
1074         {
1075             fieldReordering.processNextField(fieldIndex, fieldValue);
1076             if (fieldReordering.allFieldsFilled) break;
1077         }
1078         if (fieldReordering.allFieldsFilled)
1079         {
1080             outputBuffer.clear;
1081             writeln(fieldReordering.outputFields.joinAppend(outputBuffer, ('\t')));
1082         }
1083         else
1084         {
1085             writeln("Error: Insufficient number of field on the line.");
1086         }
1087     }
1088     return 0;
1089 }
1090 ---
1091 */
1092 OutputRange joinAppend(InputRange, OutputRange, E)
1093     (InputRange inputRange, ref OutputRange outputRange, E delimiter)
1094 if (isInputRange!InputRange &&
1095     (is(ElementType!InputRange : const E[]) &&
1096      isOutputRange!(OutputRange, E[]))
1097      ||
1098     (is(ElementType!InputRange : const E) &&
1099      isOutputRange!(OutputRange, E))
1100     )
1101 {
1102     if (!inputRange.empty)
1103     {
1104         outputRange.put(inputRange.front);
1105         inputRange.popFront;
1106     }
1107     foreach (x; inputRange)
1108     {
1109         outputRange.put(delimiter);
1110         outputRange.put(x);
1111     }
1112     return outputRange;
1113 }
1114 
1115 @safe unittest
1116 {
1117     import std.array : appender;
1118     import std.algorithm : equal;
1119 
1120     char[] c1 = ['a', 'b', 'c'];
1121     char[] c2 = ['d', 'e', 'f'];
1122     char[] c3 = ['g', 'h', 'i'];
1123     auto cvec = [c1, c2, c3];
1124 
1125     auto s1 = "abc";
1126     auto s2 = "def";
1127     auto s3 = "ghi";
1128     auto svec = [s1, s2, s3];
1129 
1130     auto charAppender = appender!(char[])();
1131 
1132     assert(cvec.joinAppend(charAppender, '_').data == "abc_def_ghi");
1133     assert(equal(cvec, [c1, c2, c3]));
1134 
1135     charAppender.put('$');
1136     assert(svec.joinAppend(charAppender, '|').data == "abc_def_ghi$abc|def|ghi");
1137     assert(equal(cvec, [s1, s2, s3]));
1138 
1139     charAppender.clear;
1140     assert(svec.joinAppend(charAppender, '|').data == "abc|def|ghi");
1141 
1142     auto intAppender = appender!(int[])();
1143 
1144     auto i1 = [100, 101, 102];
1145     auto i2 = [200, 201, 202];
1146     auto i3 = [300, 301, 302];
1147     auto ivec = [i1, i2, i3];
1148 
1149     assert(ivec.joinAppend(intAppender, 0).data ==
1150            [100, 101, 102, 0, 200, 201, 202, 0, 300, 301, 302]);
1151 
1152     intAppender.clear;
1153     assert(i1.joinAppend(intAppender, 0).data ==
1154            [100, 0, 101, 0, 102]);
1155     assert(i2.joinAppend(intAppender, 1).data ==
1156            [100, 0, 101, 0, 102,
1157             200, 1, 201, 1, 202]);
1158     assert(i3.joinAppend(intAppender, 2).data ==
1159            [100, 0, 101, 0, 102,
1160             200, 1, 201, 1, 202,
1161             300, 2, 301, 2, 302]);
1162 }
1163 
1164 /**
1165 getTsvFieldValue extracts the value of a single field from a delimited text string.
1166 
1167 This is a convenience function intended for cases when only a single field from an
1168 input line is needed. If multiple values are needed, it will be more efficient to
1169 work directly with std.algorithm.splitter or the InputFieldReordering class.
1170 
1171 The input text is split by a delimiter character. The specified field is converted
1172 to the desired type and the value returned.
1173 
1174 An exception is thrown if there are not enough fields on the line or if conversion
1175 fails. Conversion is done with std.conv.to, it throws a std.conv.ConvException on
1176 failure. If not enough fields, the exception text is generated referencing 1-upped
1177 field numbers as would be provided by command line users.
1178  */
1179 T getTsvFieldValue(T, C)(const C[] line, size_t fieldIndex, C delim)
1180 if (isSomeChar!C)
1181 {
1182     import std.algorithm : splitter;
1183     import std.conv : to;
1184     import std.format : format;
1185     import std.range;
1186 
1187     auto splitLine = line.splitter(delim);
1188     size_t atField = 0;
1189 
1190     while (atField < fieldIndex && !splitLine.empty)
1191     {
1192         splitLine.popFront;
1193         atField++;
1194     }
1195 
1196     T val;
1197     if (splitLine.empty)
1198     {
1199         if (fieldIndex == 0)
1200         {
1201             /* This is a workaround to a splitter special case - If the input is empty,
1202              * the returned split range is empty. This doesn't properly represent a single
1203              * column file. More correct mathematically, and for this case, would be a
1204              * single value representing an empty string. The input line is a convenient
1205              * source of an empty line. Info:
1206              *   Bug: https://issues.dlang.org/show_bug.cgi?id=15735
1207              *   Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030
1208              */
1209             assert(line.empty);
1210             val = line.to!T;
1211         }
1212         else
1213         {
1214             throw new Exception(
1215                 format("Not enough fields on line. Number required: %d; Number found: %d",
1216                        fieldIndex + 1, atField));
1217         }
1218     }
1219     else
1220     {
1221         val = splitLine.front.to!T;
1222     }
1223 
1224     return val;
1225 }
1226 
1227 @safe unittest
1228 {
1229     import std.conv : ConvException, to;
1230     import std.exception;
1231 
1232     /* Common cases. */
1233     assert(getTsvFieldValue!double("123", 0, '\t') == 123.0);
1234     assert(getTsvFieldValue!double("-10.5", 0, '\t') == -10.5);
1235     assert(getTsvFieldValue!size_t("abc|123", 1, '|') == 123);
1236     assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99);
1237     assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99);
1238     assert(getTsvFieldValue!string("紅\t红\t99", 2, '\t') == "99");
1239     assert(getTsvFieldValue!string("紅\t红\t99", 1, '\t') == "红");
1240     assert(getTsvFieldValue!string("紅\t红\t99", 0, '\t') == "紅");
1241     assert(getTsvFieldValue!string("红色和绿色\tred and green\t赤と緑\t10.5", 2, '\t') == "赤と緑");
1242     assert(getTsvFieldValue!double("红色和绿色\tred and green\t赤と緑\t10.5", 3, '\t') == 10.5);
1243 
1244     /* The empty field cases. */
1245     assert(getTsvFieldValue!string("", 0, '\t') == "");
1246     assert(getTsvFieldValue!string("\t", 0, '\t') == "");
1247     assert(getTsvFieldValue!string("\t", 1, '\t') == "");
1248     assert(getTsvFieldValue!string("", 0, ':') == "");
1249     assert(getTsvFieldValue!string(":", 0, ':') == "");
1250     assert(getTsvFieldValue!string(":", 1, ':') == "");
1251 
1252     /* Tests with different data types. */
1253     string stringLine = "orange and black\tნარინჯისფერი და შავი\t88.5";
1254     char[] charLine = "orange and black\tნარინჯისფერი და შავი\t88.5".to!(char[]);
1255     dchar[] dcharLine = stringLine.to!(dchar[]);
1256     wchar[] wcharLine = stringLine.to!(wchar[]);
1257 
1258     assert(getTsvFieldValue!string(stringLine, 0, '\t') == "orange and black");
1259     assert(getTsvFieldValue!string(stringLine, 1, '\t') == "ნარინჯისფერი და შავი");
1260     assert(getTsvFieldValue!wstring(stringLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1261     assert(getTsvFieldValue!double(stringLine, 2, '\t') == 88.5);
1262 
1263     assert(getTsvFieldValue!string(charLine, 0, '\t') == "orange and black");
1264     assert(getTsvFieldValue!string(charLine, 1, '\t') == "ნარინჯისფერი და შავი");
1265     assert(getTsvFieldValue!wstring(charLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1266     assert(getTsvFieldValue!double(charLine, 2, '\t') == 88.5);
1267 
1268     assert(getTsvFieldValue!string(dcharLine, 0, '\t') == "orange and black");
1269     assert(getTsvFieldValue!string(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი");
1270     assert(getTsvFieldValue!wstring(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1271     assert(getTsvFieldValue!double(dcharLine, 2, '\t') == 88.5);
1272 
1273     assert(getTsvFieldValue!string(wcharLine, 0, '\t') == "orange and black");
1274     assert(getTsvFieldValue!string(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი");
1275     assert(getTsvFieldValue!wstring(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1276     assert(getTsvFieldValue!double(wcharLine, 2, '\t') == 88.5);
1277 
1278     /* Conversion errors. */
1279     assertThrown!ConvException(getTsvFieldValue!double("", 0, '\t'));
1280     assertThrown!ConvException(getTsvFieldValue!double("abc", 0, '|'));
1281     assertThrown!ConvException(getTsvFieldValue!size_t("-1", 0, '|'));
1282     assertThrown!ConvException(getTsvFieldValue!size_t("a23|23.4", 1, '|'));
1283     assertThrown!ConvException(getTsvFieldValue!double("23.5|def", 1, '|'));
1284 
1285     /* Not enough field errors. These should throw, but not a ConvException.*/
1286     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("", 1, '\t')));
1287     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc", 1, '\t')));
1288     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc\tdef", 2, '\t')));
1289 }
1290 
1291 /**
1292 Field-lists - A field-list is a string entered on the command line identifying one or more
1293 field numbers. They are used by the majority of the tsv utility applications. There are
1294 two helper functions, makeFieldListOptionHandler and parseFieldList. Most applications
1295 will use makeFieldListOptionHandler, it creates a delegate that can be passed to
1296 std.getopt to process the command option. Actual processing of the option text is done by
1297 parseFieldList. It can be called directly when the text of the option value contains more
1298 than just the field number.
1299 
1300 Syntax and behavior:
1301 
1302 A 'field-list' is a list of numeric field numbers entered on the command line. Fields are
1303 1-upped integers representing locations in an input line, in the traditional meaning of
1304 Unix command line tools. Fields can be entered as single numbers or a range. Multiple
1305 entries are separated by commas. Some examples (with 'fields' as the command line option):
1306 
1307    --fields 3                 // Single field
1308    --fields 4,1               // Two fields
1309    --fields 3-9               // A range, fields 3 to 9 inclusive
1310    --fields 1,2,7-34,11       // A mix of ranges and fields
1311    --fields 15-5,3-1          // Two ranges in reverse order.
1312 
1313 Incomplete ranges are not supported, for example, '6-'. Zero is disallowed as a field
1314 value by default, but can be enabled to support the notion of zero as representing the
1315 entire line. However, zero cannot be part of a range. Field numbers are one-based by
1316 default, but can be converted to zero-based. If conversion to zero-based is enabled, field
1317 number zero must be disallowed or a signed integer type specified for the returned range.
1318 
1319 An error is thrown if an invalid field specification is encountered. Error text is
1320 intended for display. Error conditions include:
1321   - Empty fields list
1322   - Empty value, e.g. Two consequtive commas, a trailing comma, or a leading comma
1323   - String that does not parse as a valid integer
1324   - Negative integers, or zero if zero is disallowed.
1325   - An incomplete range
1326   - Zero used as part of a range.
1327 
1328 No other behaviors are enforced. Repeated values are accepted. If zero is allowed, other
1329 field numbers can be entered as well. Additional restrictions need to be applied by the
1330 caller.
1331 
1332 Notes:
1333   - The data type determines the max field number that can be entered. Enabling conversion
1334     to zero restricts to the signed version of the data type.
1335   - Use 'import std.typecons : Yes, No' to use the convertToZeroBasedIndex and
1336     allowFieldNumZero template parameters.
1337 */
1338 
1339 /** [Yes|No].convertToZeroBasedIndex parameter controls whether field numbers are
1340  *  converted to zero-based indices by makeFieldListOptionHander and parseFieldList.
1341  */
1342 alias ConvertToZeroBasedIndex = Flag!"convertToZeroBasedIndex";
1343 
1344 /** [Yes|No].allowFieldNumZero parameter controls whether zero is a valid field. This is
1345  *  used by makeFieldListOptionHander and parseFieldList.
1346  */
1347 alias AllowFieldNumZero = Flag!"allowFieldNumZero";
1348 
1349 alias OptionHandlerDelegate = void delegate(string option, string value);
1350 
1351 /**
1352 makeFieldListOptionHandler creates a std.getopt option hander for processing field lists
1353 entered on the command line. A field list is as defined by parseFieldList.
1354 */
1355 OptionHandlerDelegate makeFieldListOptionHandler(
1356     T,
1357     ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex,
1358     AllowFieldNumZero allowZero = No.allowFieldNumZero)
1359     (ref T[] fieldsArray)
1360 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T))
1361 {
1362     void fieldListOptionHandler(ref T[] fieldArray, string option, string value) pure @safe
1363     {
1364         import std.algorithm : each;
1365         try value.parseFieldList!(T, convertToZero, allowZero).each!(x => fieldArray ~= x);
1366         catch (Exception exc)
1367         {
1368             import std.format : format;
1369             exc.msg = format("[--%s] %s", option, exc.msg);
1370             throw exc;
1371         }
1372     }
1373 
1374     return (option, value) => fieldListOptionHandler(fieldsArray, option, value);
1375 }
1376 
1377 unittest
1378 {
1379     import std.exception : assertThrown, assertNotThrown;
1380     import std.getopt;
1381 
1382     {
1383         size_t[] fields;
1384         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1385         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1386         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1387     }
1388     {
1389         size_t[] fields;
1390         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1391         getopt(args,
1392                "f|fields", fields.makeFieldListOptionHandler!(size_t, Yes.convertToZeroBasedIndex));
1393         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1394     }
1395     {
1396         size_t[] fields;
1397         auto args = ["program", "-f", "0"];
1398         getopt(args,
1399                "f|fields", fields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1400         assert(fields == [0]);
1401     }
1402     {
1403         size_t[] fields;
1404         auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1"];
1405         getopt(args,
1406                "f|fields", fields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1407         assert(fields == [0, 1, 0, 0, 1]);
1408     }
1409     {
1410         size_t[] ints;
1411         size_t[] fields;
1412         auto args = ["program", "--ints", "1,2,3", "--fields", "1", "--ints", "4,5,6", "--fields", "2,4,7-9,23-21"];
1413         std.getopt.arraySep = ",";
1414         getopt(args,
1415                "i|ints", "Built-in list of integers.", &ints,
1416                "f|fields", "Field-list style integers.", fields.makeFieldListOptionHandler);
1417         assert(ints == [1, 2, 3, 4, 5, 6]);
1418         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1419     }
1420 
1421     /* Basic cases involved unsinged types smaller than size_t. */
1422     {
1423         uint[] fields;
1424         auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1", "-f", "55-58"];
1425         getopt(args,
1426                "f|fields", fields.makeFieldListOptionHandler!(uint, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1427         assert(fields == [0, 1, 0, 0, 1, 55, 56, 57, 58]);
1428     }
1429     {
1430         ushort[] fields;
1431         auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1", "-f", "55-58"];
1432         getopt(args,
1433                "f|fields", fields.makeFieldListOptionHandler!(ushort, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1434         assert(fields == [0, 1, 0, 0, 1, 55, 56, 57, 58]);
1435     }
1436 
1437     /* Basic cases involving unsigned types. */
1438     {
1439         long[] fields;
1440         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1441         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1442         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1443     }
1444     {
1445         long[] fields;
1446         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1447         getopt(args,
1448                "f|fields", fields.makeFieldListOptionHandler!(long, Yes.convertToZeroBasedIndex));
1449         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1450     }
1451     {
1452         long[] fields;
1453         auto args = ["program", "-f", "0"];
1454         getopt(args,
1455                "f|fields", fields.makeFieldListOptionHandler!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1456         assert(fields == [-1]);
1457     }
1458     {
1459         int[] fields;
1460         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1461         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1462         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1463     }
1464     {
1465         int[] fields;
1466         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1467         getopt(args,
1468                "f|fields", fields.makeFieldListOptionHandler!(int, Yes.convertToZeroBasedIndex));
1469         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1470     }
1471     {
1472         int[] fields;
1473         auto args = ["program", "-f", "0"];
1474         getopt(args,
1475                "f|fields", fields.makeFieldListOptionHandler!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1476         assert(fields == [-1]);
1477     }
1478     {
1479         short[] fields;
1480         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1481         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1482         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1483     }
1484     {
1485         short[] fields;
1486         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1487         getopt(args,
1488                "f|fields", fields.makeFieldListOptionHandler!(short, Yes.convertToZeroBasedIndex));
1489         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1490     }
1491     {
1492         short[] fields;
1493         auto args = ["program", "-f", "0"];
1494         getopt(args,
1495                "f|fields", fields.makeFieldListOptionHandler!(short, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1496         assert(fields == [-1]);
1497     }
1498 
1499     {
1500         /* Error cases. */
1501         size_t[] fields;
1502         auto args = ["program", "-f", "0"];
1503         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1504 
1505         args = ["program", "-f", "-1"];
1506         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1507 
1508         args = ["program", "-f", "--fields", "1"];
1509         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1510 
1511         args = ["program", "-f", "a"];
1512         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1513 
1514         args = ["program", "-f", "1.5"];
1515         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1516 
1517         args = ["program", "-f", "2-"];
1518         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1519 
1520         args = ["program", "-f", "3,5,-7"];
1521         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1522 
1523         args = ["program", "-f", "3,5,"];
1524         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1525 
1526         args = ["program", "-f", "-1"];
1527         assertThrown(getopt(args,
1528                             "f|fields", fields.makeFieldListOptionHandler!(
1529                                 size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)));
1530     }
1531 }
1532 
1533 /**
1534 parseFieldList lazily generates a range of fields numbers from a 'field-list' string.
1535 */
1536 auto parseFieldList(T = size_t,
1537                     ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex,
1538                     AllowFieldNumZero allowZero = No.allowFieldNumZero)
1539     (string fieldList, char delim = ',')
1540 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T))
1541 {
1542     import std.algorithm : splitter;
1543 
1544     auto _splitFieldList = fieldList.splitter(delim);
1545     auto _currFieldParse =
1546         (_splitFieldList.empty ? "" : _splitFieldList.front)
1547         .parseFieldRange!(T, convertToZero, allowZero);
1548 
1549     if (!_splitFieldList.empty) _splitFieldList.popFront;
1550 
1551     struct Result
1552     {
1553         @property bool empty() pure nothrow @safe @nogc
1554         {
1555             return _currFieldParse.empty;
1556         }
1557 
1558         @property T front() pure @safe
1559         {
1560             import std.conv : to;
1561 
1562             assert(!empty, "Attempting to fetch the front of an empty field-list.");
1563             assert(!_currFieldParse.empty, "Internal error. Call to front with an empty _currFieldParse.");
1564 
1565             return _currFieldParse.front.to!T;
1566         }
1567 
1568         void popFront() pure @safe
1569         {
1570             assert(!empty, "Attempting to popFront an empty field-list.");
1571 
1572             _currFieldParse.popFront;
1573             if (_currFieldParse.empty && !_splitFieldList.empty)
1574             {
1575                 _currFieldParse = _splitFieldList.front.parseFieldRange!(T, convertToZero, allowZero);
1576                 _splitFieldList.popFront;
1577             }
1578         }
1579     }
1580 
1581     return Result();
1582 }
1583 
1584 @safe unittest
1585 {
1586     import std.algorithm : each, equal;
1587     import std.exception : assertThrown, assertNotThrown;
1588 
1589     /* Basic tests. */
1590     assert("1".parseFieldList.equal([1]));
1591     assert("1,2".parseFieldList.equal([1, 2]));
1592     assert("1,2,3".parseFieldList.equal([1, 2, 3]));
1593     assert("1-2".parseFieldList.equal([1, 2]));
1594     assert("1-2,6-4".parseFieldList.equal([1, 2, 6, 5, 4]));
1595     assert("1-2,1,1-2,2,2-1".parseFieldList.equal([1, 2, 1, 1, 2, 2, 2, 1]));
1596     assert("1-2,5".parseFieldList!size_t.equal([1, 2, 5]));
1597 
1598     /* Signed Int tests */
1599     assert("1".parseFieldList!int.equal([1]));
1600     assert("1,2,3".parseFieldList!int.equal([1, 2, 3]));
1601     assert("1-2".parseFieldList!int.equal([1, 2]));
1602     assert("1-2,6-4".parseFieldList!int.equal([1, 2, 6, 5, 4]));
1603     assert("1-2,5".parseFieldList!int.equal([1, 2, 5]));
1604 
1605     /* Convert to zero tests */
1606     assert("1".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0]));
1607     assert("1,2,3".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 2]));
1608     assert("1-2".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1]));
1609     assert("1-2,6-4".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 5, 4, 3]));
1610     assert("1-2,5".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 4]));
1611 
1612     assert("1".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0]));
1613     assert("1,2,3".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 2]));
1614     assert("1-2".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1]));
1615     assert("1-2,6-4".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 5, 4, 3]));
1616     assert("1-2,5".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 4]));
1617 
1618     /* Allow zero tests. */
1619     assert("0".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1620     assert("1,0,3".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 0, 3]));
1621     assert("1-2,5".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 2, 5]));
1622     assert("0".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1623     assert("1,0,3".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 0, 3]));
1624     assert("1-2,5".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 2, 5]));
1625     assert("0".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1626     assert("1,0,3".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0, -1, 2]));
1627     assert("1-2,5".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0, 1, 4]));
1628 
1629     /* Error cases. */
1630     assertThrown("".parseFieldList.each);
1631     assertThrown(" ".parseFieldList.each);
1632     assertThrown(",".parseFieldList.each);
1633     assertThrown("5 6".parseFieldList.each);
1634     assertThrown(",7".parseFieldList.each);
1635     assertThrown("8,".parseFieldList.each);
1636     assertThrown("8,9,".parseFieldList.each);
1637     assertThrown("10,,11".parseFieldList.each);
1638     assertThrown("".parseFieldList!(long, Yes.convertToZeroBasedIndex).each);
1639     assertThrown("1,2-3,".parseFieldList!(long, Yes.convertToZeroBasedIndex).each);
1640     assertThrown("2-,4".parseFieldList!(long, Yes.convertToZeroBasedIndex).each);
1641     assertThrown("1,2,3,,4".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1642     assertThrown(",7".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1643     assertThrown("8,".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1644     assertThrown("10,0,,11".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1645     assertThrown("8,9,".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1646 
1647     assertThrown("0".parseFieldList.each);
1648     assertThrown("1,0,3".parseFieldList.each);
1649     assertThrown("0".parseFieldList!(int, Yes.convertToZeroBasedIndex, No.allowFieldNumZero).each);
1650     assertThrown("1,0,3".parseFieldList!(int, Yes.convertToZeroBasedIndex, No.allowFieldNumZero).each);
1651     assertThrown("0-2,6-0".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1652     assertThrown("0-2,6-0".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1653     assertThrown("0-2,6-0".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1654 }
1655 
1656 /* parseFieldRange parses a single number or number range. E.g. '5' or '5-8'. These are
1657  * the values in a field-list separated by a comma or other delimiter. It returns a range
1658  * that iterates over all the values in the range.
1659  */
1660 private auto parseFieldRange(T = size_t,
1661                              ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex,
1662                              AllowFieldNumZero allowZero = No.allowFieldNumZero)
1663     (string fieldRange)
1664 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T))
1665 {
1666     import std.algorithm : findSplit;
1667     import std.conv : to;
1668     import std.format : format;
1669     import std.range : iota;
1670     import std.traits : Signed;
1671 
1672     /* Pick the largest compatible integral type for the IOTA range. This must be the
1673      * signed type if convertToZero is true, as a reverse order range may end at -1.
1674      */
1675     static if (convertToZero) alias S = Signed!T;
1676     else alias S = T;
1677 
1678     if (fieldRange.length == 0) throw new Exception("Empty field number.");
1679 
1680     auto rangeSplit = findSplit(fieldRange, "-");
1681 
1682     if (!rangeSplit[1].empty && (rangeSplit[0].empty || rangeSplit[2].empty))
1683     {
1684         // Range starts or ends with a dash.
1685         throw new Exception(format("Incomplete ranges are not supported: '%s'", fieldRange));
1686     }
1687 
1688     S start = rangeSplit[0].to!S;
1689     S last = rangeSplit[1].empty ? start : rangeSplit[2].to!S;
1690     Signed!T increment = (start <= last) ? 1 : -1;
1691 
1692     static if (allowZero)
1693     {
1694         if (start == 0 && !rangeSplit[1].empty)
1695         {
1696             throw new Exception(format("Zero cannot be used as part of a range: '%s'", fieldRange));
1697         }
1698     }
1699 
1700     static if (allowZero)
1701     {
1702         if (start < 0 || last < 0)
1703         {
1704             throw new Exception(format("Field numbers must be non-negative integers: '%d'",
1705                                        (start < 0) ? start : last));
1706         }
1707     }
1708     else
1709     {
1710         if (start < 1 || last < 1)
1711         {
1712             throw new Exception(format("Field numbers must be greater than zero: '%d'",
1713                                        (start < 1) ? start : last));
1714         }
1715     }
1716 
1717     static if (convertToZero)
1718     {
1719         start--;
1720         last--;
1721     }
1722 
1723     return iota(start, last + increment, increment);
1724 }
1725 
1726 @safe unittest // parseFieldRange
1727 {
1728     import std.algorithm : equal;
1729     import std.exception : assertThrown, assertNotThrown;
1730 
1731     /* Basic cases */
1732     assert(parseFieldRange("1").equal([1]));
1733     assert("2".parseFieldRange.equal([2]));
1734     assert("3-4".parseFieldRange.equal([3, 4]));
1735     assert("3-5".parseFieldRange.equal([3, 4, 5]));
1736     assert("4-3".parseFieldRange.equal([4, 3]));
1737     assert("10-1".parseFieldRange.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1738 
1739     /* Convert to zero-based indices */
1740     assert(parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)("1").equal([0]));
1741     assert("2".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([1]));
1742     assert("3-4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([2, 3]));
1743     assert("3-5".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([2, 3, 4]));
1744     assert("4-3".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([3, 2]));
1745     assert("10-1".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]));
1746 
1747     /* Allow zero. */
1748     assert("0".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1749     assert(parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)("1").equal([1]));
1750     assert("3-4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([3, 4]));
1751     assert("10-1".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1752 
1753     /* Allow zero, convert to zero-based index. */
1754     assert("0".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1755     assert(parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)("1").equal([0]));
1756     assert("3-4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([2, 3]));
1757     assert("10-1".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]));
1758 
1759     /* Alternate integer types. */
1760     assert("2".parseFieldRange!uint.equal([2]));
1761     assert("3-5".parseFieldRange!uint.equal([3, 4, 5]));
1762     assert("10-1".parseFieldRange!uint.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1763     assert("2".parseFieldRange!int.equal([2]));
1764     assert("3-5".parseFieldRange!int.equal([3, 4, 5]));
1765     assert("10-1".parseFieldRange!int.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1766     assert("2".parseFieldRange!ushort.equal([2]));
1767     assert("3-5".parseFieldRange!ushort.equal([3, 4, 5]));
1768     assert("10-1".parseFieldRange!ushort.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1769     assert("2".parseFieldRange!short.equal([2]));
1770     assert("3-5".parseFieldRange!short.equal([3, 4, 5]));
1771     assert("10-1".parseFieldRange!short.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1772 
1773     assert("0".parseFieldRange!(long, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1774     assert("0".parseFieldRange!(uint, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1775     assert("0".parseFieldRange!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1776     assert("0".parseFieldRange!(ushort, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1777     assert("0".parseFieldRange!(short, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1778     assert("0".parseFieldRange!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1779     assert("0".parseFieldRange!(short, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1780 
1781     /* Max field value cases. */
1782     assert("65535".parseFieldRange!ushort.equal([65535]));   // ushort max
1783     assert("65533-65535".parseFieldRange!ushort.equal([65533, 65534, 65535]));
1784     assert("32767".parseFieldRange!short.equal([32767]));    // short max
1785     assert("32765-32767".parseFieldRange!short.equal([32765, 32766, 32767]));
1786     assert("32767".parseFieldRange!(short, Yes.convertToZeroBasedIndex).equal([32766]));
1787 
1788     /* Error cases. */
1789     assertThrown("".parseFieldRange);
1790     assertThrown(" ".parseFieldRange);
1791     assertThrown("-".parseFieldRange);
1792     assertThrown(" -".parseFieldRange);
1793     assertThrown("- ".parseFieldRange);
1794     assertThrown("1-".parseFieldRange);
1795     assertThrown("-2".parseFieldRange);
1796     assertThrown("-1".parseFieldRange);
1797     assertThrown("1.0".parseFieldRange);
1798     assertThrown("0".parseFieldRange);
1799     assertThrown("0-3".parseFieldRange);
1800     assertThrown("-2-4".parseFieldRange);
1801     assertThrown("2--4".parseFieldRange);
1802     assertThrown("2-".parseFieldRange);
1803     assertThrown("a".parseFieldRange);
1804     assertThrown("0x3".parseFieldRange);
1805     assertThrown("3U".parseFieldRange);
1806     assertThrown("1_000".parseFieldRange);
1807     assertThrown(".".parseFieldRange);
1808 
1809     assertThrown("".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1810     assertThrown(" ".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1811     assertThrown("-".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1812     assertThrown("1-".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1813     assertThrown("-2".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1814     assertThrown("-1".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1815     assertThrown("0".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1816     assertThrown("0-3".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1817     assertThrown("-2-4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1818     assertThrown("2--4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1819 
1820     assertThrown("".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1821     assertThrown(" ".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1822     assertThrown("-".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1823     assertThrown("1-".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1824     assertThrown("-2".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1825     assertThrown("-1".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1826     assertThrown("0-3".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1827     assertThrown("-2-4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1828 
1829     assertThrown("".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1830     assertThrown(" ".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1831     assertThrown("-".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1832     assertThrown("1-".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1833     assertThrown("-2".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1834     assertThrown("-1".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1835     assertThrown("0-3".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1836     assertThrown("-2-4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1837 
1838     /* Value out of range cases. */
1839     assertThrown("65536".parseFieldRange!ushort);   // One more than ushort max.
1840     assertThrown("65535-65536".parseFieldRange!ushort);
1841     assertThrown("32768".parseFieldRange!short);    // One more than short max.
1842     assertThrown("32765-32768".parseFieldRange!short);
1843     // Convert to zero limits signed range.
1844     assertThrown("32768".parseFieldRange!(ushort, Yes.convertToZeroBasedIndex));
1845     assert("32767".parseFieldRange!(ushort, Yes.convertToZeroBasedIndex).equal([32766]));
1846 }
1847 
1848 /** [Yes|No.newlineWasRemoved] is a template parameter to throwIfWindowsNewlineOnUnix.
1849  *  A Yes value indicates the Unix newline was already removed, as might be done via
1850  *  std.File.byLine or similar mechanism.
1851  */
1852 alias NewlineWasRemoved = Flag!"newlineWasRemoved";
1853 
1854 /**
1855 throwIfWindowsLineNewlineOnUnix is used to throw an exception if a Windows/DOS
1856 line ending is found on a build compiled for a Unix platform. This is used by
1857 the TSV Utilities to detect Window/DOS line endings and terminate processing
1858 with an error message to the user.
1859  */
1860 void throwIfWindowsNewlineOnUnix
1861     (NewlineWasRemoved nlWasRemoved = Yes.newlineWasRemoved)
1862     (const char[] line, const char[] filename, size_t lineNum)
1863 {
1864     version(Posix)
1865     {
1866         static if (nlWasRemoved)
1867         {
1868             immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r';
1869         }
1870         else
1871         {
1872             immutable bool hasWindowsLineEnding =
1873                 line.length > 1 &&
1874                 line[$ - 2] == '\r' &&
1875                 line[$ - 1] == '\n';
1876         }
1877 
1878         if (hasWindowsLineEnding)
1879         {
1880             import std.format;
1881             throw new Exception(
1882                 format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n  File: %s, Line: %s",
1883                        (filename == "-") ? "Standard Input" : filename, lineNum));
1884         }
1885     }
1886 }
1887 
1888 @safe unittest
1889 {
1890     /* Note: Currently only building on Posix. Need to add non-Posix test cases
1891      * if Windows builds are ever done.
1892      */
1893     version(Posix)
1894     {
1895         import std.exception;
1896 
1897         assertNotThrown(throwIfWindowsNewlineOnUnix("", "afile.tsv", 1));
1898         assertNotThrown(throwIfWindowsNewlineOnUnix("a", "afile.tsv", 2));
1899         assertNotThrown(throwIfWindowsNewlineOnUnix("ab", "afile.tsv", 3));
1900         assertNotThrown(throwIfWindowsNewlineOnUnix("abc", "afile.tsv", 4));
1901 
1902         assertThrown(throwIfWindowsNewlineOnUnix("\r", "afile.tsv", 1));
1903         assertThrown(throwIfWindowsNewlineOnUnix("a\r", "afile.tsv", 2));
1904         assertThrown(throwIfWindowsNewlineOnUnix("ab\r", "afile.tsv", 3));
1905         assertThrown(throwIfWindowsNewlineOnUnix("abc\r", "afile.tsv", 4));
1906 
1907         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\n", "afile.tsv", 1));
1908         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\n", "afile.tsv", 2));
1909         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3));
1910         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4));
1911 
1912         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5));
1913         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6));
1914         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7));
1915         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8));
1916 
1917         /* Standard Input formatting. */
1918         import std.algorithm : endsWith;
1919         bool exceptionCaught = false;
1920 
1921         try (throwIfWindowsNewlineOnUnix("\r", "-", 99));
1922         catch (Exception e)
1923         {
1924             assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1925             exceptionCaught = true;
1926         }
1927         finally
1928         {
1929             assert(exceptionCaught);
1930             exceptionCaught = false;
1931         }
1932 
1933         try (throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "-", 99));
1934         catch (Exception e)
1935         {
1936             assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1937             exceptionCaught = true;
1938         }
1939         finally
1940         {
1941             assert(exceptionCaught);
1942             exceptionCaught = false;
1943         }
1944     }
1945 }