tsv_utils.common.utils source code

1 /**
2 Utilities used by tsv-utils applications. InputFieldReordering, BufferedOututRange,
3 and a several others.
4 
5 Utilities in this file:
6 $(LIST
7     * [InputFieldReordering] - A class that creates a reordered subset of fields from
8       an input line. Fields in the subset are accessed by array indicies. This is
9       especially useful when processing the subset in a specific order, such as the
10       order listed on the command-line at run-time.
11 
12     * [BufferedOutputRange] - An OutputRange with an internal buffer used to buffer
13       output. Intended for use with stdout, it is a significant performance benefit.
14 
15     * [bufferedByLine] - An input range that reads from a File handle line by line.
16       It is similar to the standard library method std.stdio.File.byLine, but quite a
17       bit faster. This is achieved by reading in larger blocks and buffering.
18 
19     * [joinAppend] - A function that performs a join, but appending the join output to
20       an output stream. It is a performance improvement over using join or joiner with
21       writeln.
22 
23     * [getTsvFieldValue] - A convenience function when only a single value is needed from
24       an input line.
25 
26     * Field-lists: [parseFieldList], [makeFieldListOptionHandler] - Helper functions for
27       parsing field-lists entered on the command line.
28 
29     * [throwIfWindowsNewlineOnUnix] - A utility for Unix platform builds to detecting
30       Windows newlines in input.
31 )
32 
33 Copyright (c) 2015-2019, eBay Software Foundation
34 Initially written by Jon Degenhardt
35 
36 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt)
37 */
38 
39 module tsv_utils.common.utils;
40 
41 import std.range;
42 import std.traits : isIntegral, isSomeChar, isSomeString, isUnsigned;
43 import std.typecons : Flag, No, Yes;
44 
45 // InputFieldReording class.
46 
47 /** Flag used by the InputFieldReordering template. */
48 alias EnablePartialLines = Flag!"enablePartialLines";
49 
50 /**
51 InputFieldReordering - Move select fields from an input line to an output array,
52 reordering along the way.
53 
54 The InputFieldReordering class is used to reorder a subset of fields from an input line.
55 The caller instantiates an InputFieldReordering object at the start of input processing.
56 The instance contains a mapping from input index to output index, plus a buffer holding
57 the reordered fields. The caller processes each input line by calling initNewLine,
58 splitting the line into fields, and calling processNextField on each field. The output
59 buffer is ready when the allFieldsFilled method returns true.
60 
61 Fields are not copied, instead the output buffer points to the fields passed by the caller.
62 The caller needs to use or copy the output buffer while the fields are still valid, which
63 is normally until reading the next input line. The program below illustrates the basic use
64 case. It reads stdin and outputs fields [3, 0, 2], in that order. (See also joinAppend,
65 below, which has a performance improvement over join used here.)
66 
67 ---
68 int main(string[] args)
69 {
70     import tsv_utils.common.utils;
71     import std.algorithm, std.array, std.range, std.stdio;
72     size_t[] fieldIndicies = [3, 0, 2];
73     auto fieldReordering = new InputFieldReordering!char(fieldIndicies);
74     foreach (line; stdin.byLine)
75     {
76         fieldReordering.initNewLine;
77         foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate)
78         {
79             fieldReordering.processNextField(fieldIndex, fieldValue);
80             if (fieldReordering.allFieldsFilled) break;
81         }
82         if (fieldReordering.allFieldsFilled)
83         {
84             writeln(fieldReordering.outputFields.join('\t'));
85         }
86         else
87         {
88             writeln("Error: Insufficient number of field on the line.");
89         }
90     }
91     return 0;
92 }
93 ---
94 
95 Field indicies are zero-based. An individual field can be listed multiple times. The
96 outputFields array is not valid until all the specified fields have been processed. The
97 allFieldsFilled method tests this. If a line does not have enough fields the outputFields
98 buffer cannot be used. For most TSV applications this is okay, as it means the line is
99 invalid and cannot be used. However, if partial lines are okay, the template can be
100 instantiated with EnablePartialLines.yes. This will ensure that any fields not filled-in
101 are empty strings in the outputFields return.
102 */
103 final class InputFieldReordering(C, EnablePartialLines partialLinesOk = EnablePartialLines.no)
104 if (isSomeChar!C)
105 {
106     /* Implementation: The class works by creating an array of tuples mapping the input
107      * field index to the location in the outputFields array. The 'fromToMap' array is
108      * sorted in input field order, enabling placement in the outputFields buffer during a
109      * pass over the input fields. The map is created by the constructor. An example:
110      *
111      *    inputFieldIndicies: [3, 0, 7, 7, 1, 0, 9]
112      *             fromToMap: [<0,1>, <0,5>, <1,4>, <3,0>, <7,2>, <7,3>, <9,6>]
113      *
114      * During processing of an a line, an array slice, mapStack, is used to track how
115      * much of the fromToMap remains to be processed.
116      */
117     import std.range;
118     import std.typecons : Tuple;
119 
120     alias TupleFromTo = Tuple!(size_t, "from", size_t, "to");
121 
122     private C[][] outputFieldsBuf;
123     private TupleFromTo[] fromToMap;
124     private TupleFromTo[] mapStack;
125 
126     final this(const ref size_t[] inputFieldIndicies, size_t start = 0) pure nothrow @safe
127     {
128         import std.algorithm : sort;
129 
130         outputFieldsBuf = new C[][](inputFieldIndicies.length);
131         fromToMap.reserve(inputFieldIndicies.length);
132 
133         foreach (to, from; inputFieldIndicies.enumerate(start))
134         {
135             fromToMap ~= TupleFromTo(from, to);
136         }
137 
138         sort(fromToMap);
139         initNewLine;
140     }
141 
142     /** initNewLine initializes the object for a new line. */
143     final void initNewLine() pure nothrow @safe
144     {
145         mapStack = fromToMap;
146         static if (partialLinesOk)
147         {
148             import std.algorithm : each;
149             outputFieldsBuf.each!((ref s) => s.length = 0);
150         }
151     }
152 
153     /** processNextField maps an input field to the correct locations in the outputFields
154      * array. It should be called once for each field on the line, in the order found.
155      */
156     final size_t processNextField(size_t fieldIndex, C[] fieldValue) pure nothrow @safe @nogc
157     {
158         size_t numFilled = 0;
159         while (!mapStack.empty && fieldIndex == mapStack.front.from)
160         {
161             outputFieldsBuf[mapStack.front.to] = fieldValue;
162             mapStack.popFront;
163             numFilled++;
164         }
165         return numFilled;
166     }
167 
168     /** allFieldsFilled returned true if all fields expected have been processed. */
169     final bool allFieldsFilled() const pure nothrow @safe @nogc
170     {
171         return mapStack.empty;
172     }
173 
174     /** outputFields is the assembled output fields. Unless partial lines are enabled,
175      * it is only valid after allFieldsFilled is true.
176      */
177     final C[][] outputFields() pure nothrow @safe @nogc
178     {
179         return outputFieldsBuf[];
180     }
181 }
182 
183 /* Tests using different character types. */
184 unittest
185 {
186     import std.conv : to;
187 
188     auto inputLines = [["r1f0", "r1f1", "r1f2",   "r1f3"],
189                        ["r2f0", "abc",  "ÀBCßßZ", "ghi"],
190                        ["r3f0", "123",  "456",    "789"]];
191 
192     size_t[] fields_2_0 = [2, 0];
193 
194     auto expected_2_0 = [["r1f2",   "r1f0"],
195                          ["ÀBCßßZ", "r2f0"],
196                          ["456",    "r3f0"]];
197 
198     char[][][]  charExpected_2_0 = to!(char[][][])(expected_2_0);
199     wchar[][][] wcharExpected_2_0 = to!(wchar[][][])(expected_2_0);
200     dchar[][][] dcharExpected_2_0 = to!(dchar[][][])(expected_2_0);
201     dstring[][] dstringExpected_2_0 = to!(dstring[][])(expected_2_0);
202 
203     auto charIFR  = new InputFieldReordering!char(fields_2_0);
204     auto wcharIFR = new InputFieldReordering!wchar(fields_2_0);
205     auto dcharIFR = new InputFieldReordering!dchar(fields_2_0);
206 
207     foreach (lineIndex, line; inputLines)
208     {
209         charIFR.initNewLine;
210         wcharIFR.initNewLine;
211         dcharIFR.initNewLine;
212 
213         foreach (fieldIndex, fieldValue; line)
214         {
215             charIFR.processNextField(fieldIndex, to!(char[])(fieldValue));
216             wcharIFR.processNextField(fieldIndex, to!(wchar[])(fieldValue));
217             dcharIFR.processNextField(fieldIndex, to!(dchar[])(fieldValue));
218 
219             assert ((fieldIndex >= 2) == charIFR.allFieldsFilled);
220             assert ((fieldIndex >= 2) == wcharIFR.allFieldsFilled);
221             assert ((fieldIndex >= 2) == dcharIFR.allFieldsFilled);
222         }
223         assert(charIFR.allFieldsFilled);
224         assert(wcharIFR.allFieldsFilled);
225         assert(dcharIFR.allFieldsFilled);
226 
227         assert(charIFR.outputFields == charExpected_2_0[lineIndex]);
228         assert(wcharIFR.outputFields == wcharExpected_2_0[lineIndex]);
229         assert(dcharIFR.outputFields == dcharExpected_2_0[lineIndex]);
230     }
231 }
232 
233 /* Test of partial line support. */
234 unittest
235 {
236     import std.conv : to;
237 
238     auto inputLines = [["r1f0", "r1f1", "r1f2",   "r1f3"],
239                        ["r2f0", "abc",  "ÀBCßßZ", "ghi"],
240                        ["r3f0", "123",  "456",    "789"]];
241 
242     size_t[] fields_2_0 = [2, 0];
243 
244     // The expected states of the output field while each line and field are processed.
245     auto expectedBylineByfield_2_0 =
246         [
247             [["", "r1f0"], ["", "r1f0"], ["r1f2", "r1f0"],   ["r1f2", "r1f0"]],
248             [["", "r2f0"], ["", "r2f0"], ["ÀBCßßZ", "r2f0"], ["ÀBCßßZ", "r2f0"]],
249             [["", "r3f0"], ["", "r3f0"], ["456", "r3f0"],    ["456", "r3f0"]],
250         ];
251 
252     char[][][][]  charExpectedBylineByfield_2_0 = to!(char[][][][])(expectedBylineByfield_2_0);
253 
254     auto charIFR  = new InputFieldReordering!(char, EnablePartialLines.yes)(fields_2_0);
255 
256     foreach (lineIndex, line; inputLines)
257     {
258         charIFR.initNewLine;
259         foreach (fieldIndex, fieldValue; line)
260         {
261             charIFR.processNextField(fieldIndex, to!(char[])(fieldValue));
262             assert(charIFR.outputFields == charExpectedBylineByfield_2_0[lineIndex][fieldIndex]);
263         }
264     }
265 }
266 
267 /* Field combination tests. */
268 unittest
269 {
270     import std.conv : to;
271     import std.stdio;
272 
273     auto inputLines = [["00", "01", "02", "03"],
274                        ["10", "11", "12", "13"],
275                        ["20", "21", "22", "23"]];
276 
277     size_t[] fields_0 = [0];
278     size_t[] fields_3 = [3];
279     size_t[] fields_01 = [0, 1];
280     size_t[] fields_10 = [1, 0];
281     size_t[] fields_03 = [0, 3];
282     size_t[] fields_30 = [3, 0];
283     size_t[] fields_0123 = [0, 1, 2, 3];
284     size_t[] fields_3210 = [3, 2, 1, 0];
285     size_t[] fields_03001 = [0, 3, 0, 0, 1];
286 
287     auto expected_0 = to!(char[][][])([["00"],
288                                        ["10"],
289                                        ["20"]]);
290 
291     auto expected_3 = to!(char[][][])([["03"],
292                                        ["13"],
293                                        ["23"]]);
294 
295     auto expected_01 = to!(char[][][])([["00", "01"],
296                                         ["10", "11"],
297                                         ["20", "21"]]);
298 
299     auto expected_10 = to!(char[][][])([["01", "00"],
300                                         ["11", "10"],
301                                         ["21", "20"]]);
302 
303     auto expected_03 = to!(char[][][])([["00", "03"],
304                                         ["10", "13"],
305                                         ["20", "23"]]);
306 
307     auto expected_30 = to!(char[][][])([["03", "00"],
308                                         ["13", "10"],
309                                         ["23", "20"]]);
310 
311     auto expected_0123 = to!(char[][][])([["00", "01", "02", "03"],
312                                           ["10", "11", "12", "13"],
313                                           ["20", "21", "22", "23"]]);
314 
315     auto expected_3210 = to!(char[][][])([["03", "02", "01", "00"],
316                                           ["13", "12", "11", "10"],
317                                           ["23", "22", "21", "20"]]);
318 
319     auto expected_03001 = to!(char[][][])([["00", "03", "00", "00", "01"],
320                                            ["10", "13", "10", "10", "11"],
321                                            ["20", "23", "20", "20", "21"]]);
322 
323     auto ifr_0 = new InputFieldReordering!char(fields_0);
324     auto ifr_3 = new InputFieldReordering!char(fields_3);
325     auto ifr_01 = new InputFieldReordering!char(fields_01);
326     auto ifr_10 = new InputFieldReordering!char(fields_10);
327     auto ifr_03 = new InputFieldReordering!char(fields_03);
328     auto ifr_30 = new InputFieldReordering!char(fields_30);
329     auto ifr_0123 = new InputFieldReordering!char(fields_0123);
330     auto ifr_3210 = new InputFieldReordering!char(fields_3210);
331     auto ifr_03001 = new InputFieldReordering!char(fields_03001);
332 
333     foreach (lineIndex, line; inputLines)
334     {
335         ifr_0.initNewLine;
336         ifr_3.initNewLine;
337         ifr_01.initNewLine;
338         ifr_10.initNewLine;
339         ifr_03.initNewLine;
340         ifr_30.initNewLine;
341         ifr_0123.initNewLine;
342         ifr_3210.initNewLine;
343         ifr_03001.initNewLine;
344 
345         foreach (fieldIndex, fieldValue; line)
346         {
347             ifr_0.processNextField(fieldIndex, to!(char[])(fieldValue));
348             ifr_3.processNextField(fieldIndex, to!(char[])(fieldValue));
349             ifr_01.processNextField(fieldIndex, to!(char[])(fieldValue));
350             ifr_10.processNextField(fieldIndex, to!(char[])(fieldValue));
351             ifr_03.processNextField(fieldIndex, to!(char[])(fieldValue));
352             ifr_30.processNextField(fieldIndex, to!(char[])(fieldValue));
353             ifr_0123.processNextField(fieldIndex, to!(char[])(fieldValue));
354             ifr_3210.processNextField(fieldIndex, to!(char[])(fieldValue));
355             ifr_03001.processNextField(fieldIndex, to!(char[])(fieldValue));
356         }
357 
358         assert(ifr_0.outputFields == expected_0[lineIndex]);
359         assert(ifr_3.outputFields == expected_3[lineIndex]);
360         assert(ifr_01.outputFields == expected_01[lineIndex]);
361         assert(ifr_10.outputFields == expected_10[lineIndex]);
362         assert(ifr_03.outputFields == expected_03[lineIndex]);
363         assert(ifr_30.outputFields == expected_30[lineIndex]);
364         assert(ifr_0123.outputFields == expected_0123[lineIndex]);
365         assert(ifr_3210.outputFields == expected_3210[lineIndex]);
366         assert(ifr_03001.outputFields == expected_03001[lineIndex]);
367     }
368 }
369 
370 
371 import std.stdio : File, isFileHandle, KeepTerminator;
372 import std.range : isOutputRange;
373 import std.traits : Unqual;
374 
375 /**
376 BufferedOutputRange is a performance enhancement over writing directly to an output
377 stream. It holds a File open for write or an OutputRange. Ouput is accumulated in an
378 internal buffer and written to the output stream as a block.
379 
380 Writing to stdout is a key use case. BufferedOutputRange is often dramatically faster
381 than writing to stdout directly. This is especially noticable for outputs with short
382 lines, as it blocks many writes together in a single write.
383 
384 The internal buffer is written to the output stream after flushSize has been reached.
385 This is checked at newline boundaries, when appendln is called or when put is called
386 with a single newline character. Other writes check maxSize, which is used to avoid
387 runaway buffers.
388 
389 
390 BufferedOutputRange has a put method allowing it to be used a range. It has a number
391 of other methods providing additional control.
392 
393 $(LIST
394     * `this(outputStream [, flushSize, reserveSize, maxSize])` - Constructor. Takes the
395       output stream, e.g. stdout. Other arguments are optional, defaults normally suffice.
396 
397     * `append(stuff)` - Append to the internal buffer.
398 
399     * `appendln(stuff)` - Append to the internal buffer, followed by a newline. The buffer
400       is flushed to the output stream if is has reached flushSize.
401 
402     * `appendln()` - Append a newline to the internal buffer. The buffer is flushed to the
403       output stream if is has reached flushSize.
404 
405     * `joinAppend(inputRange, delim)` - An optimization of `append(inputRange.joiner(delim))`.
406       For reasons that are not clear, joiner is quite slow.
407 
408     * `flushIfFull()` - Flush the internal buffer to the output stream if flushSize has been
409       reached.
410 
411     * `flush()` - Write the internal buffer to the output stream.
412 
413     * `put(stuff)` - Appends to the internal buffer. Acts as `appendln()` if passed a single
414       newline character, '\n' or "\n".
415 )
416 
417 The internal buffer is automatically flushed when the BufferedOutputRange goes out of
418 scope.
419 */
420 struct BufferedOutputRange(OutputTarget)
421 if (isFileHandle!(Unqual!OutputTarget) || isOutputRange!(Unqual!OutputTarget, char))
422 {
423     import std.range : isOutputRange;
424     import std.array : appender;
425     import std.format : format;
426 
427     /* Identify the output element type. Only supporting char and ubyte for now. */
428     static if (isFileHandle!OutputTarget || isOutputRange!(OutputTarget, char))
429     {
430         alias C = char;
431     }
432     else static if (isOutputRange!(OutputTarget, ubyte))
433     {
434         alias C = ubyte;
435     }
436     else static assert(false);
437 
438     private enum defaultReserveSize = 11264;
439     private enum defaultFlushSize = 10240;
440     private enum defaultMaxSize = 4194304;
441 
442     private OutputTarget _outputTarget;
443     private auto _outputBuffer = appender!(C[]);
444     private immutable size_t _flushSize;
445     private immutable size_t _maxSize;
446 
447     this(OutputTarget outputTarget,
448          size_t flushSize = defaultFlushSize,
449          size_t reserveSize = defaultReserveSize,
450          size_t maxSize = defaultMaxSize)
451     {
452         assert(flushSize <= maxSize);
453 
454         _outputTarget = outputTarget;
455         _flushSize = flushSize;
456         _maxSize = (flushSize <= maxSize) ? maxSize : flushSize;
457         _outputBuffer.reserve(reserveSize);
458     }
459 
460     ~this()
461     {
462         flush();
463     }
464 
465     void flush()
466     {
467         static if (isFileHandle!OutputTarget) _outputTarget.write(_outputBuffer.data);
468         else _outputTarget.put(_outputBuffer.data);
469 
470         _outputBuffer.clear;
471     }
472 
473     bool flushIfFull()
474     {
475         bool isFull = _outputBuffer.data.length >= _flushSize;
476         if (isFull) flush();
477         return isFull;
478     }
479 
480     /* flushIfMaxSize is a safety check to avoid runaway buffer growth. */
481     void flushIfMaxSize()
482     {
483         if (_outputBuffer.data.length >= _maxSize) flush();
484     }
485 
486     /* maybeFlush is intended for the case where put is called with a trailing newline.
487      *
488      * Flushing occurs if the buffer has a trailing newline and has reached flush size.
489      * Flushing also occurs if the buffer has reached max size.
490      */
491     private bool maybeFlush()
492     {
493         immutable bool doFlush =
494             _outputBuffer.data.length >= _flushSize &&
495             (_outputBuffer.data[$-1] == '\n' || _outputBuffer.data.length >= _maxSize);
496 
497         if (doFlush) flush();
498         return doFlush;
499     }
500 
501 
502     private void appendRaw(T)(T stuff)
503     {
504         import std.range : rangePut = put;
505         rangePut(_outputBuffer, stuff);
506     }
507 
508     void append(T)(T stuff)
509     {
510         appendRaw(stuff);
511         maybeFlush();
512     }
513 
514     bool appendln()
515     {
516         appendRaw('\n');
517         return flushIfFull();
518     }
519 
520     bool appendln(T)(T stuff)
521     {
522         appendRaw(stuff);
523         return appendln();
524     }
525 
526     /* joinAppend is an optimization of append(inputRange.joiner(delimiter).
527      * This form is quite a bit faster, 40%+ on some benchmarks.
528      */
529     void joinAppend(InputRange, E)(InputRange inputRange, E delimiter)
530     if (isInputRange!InputRange &&
531         is(ElementType!InputRange : const C[]) &&
532         (is(E : const C[]) || is(E : const C)))
533     {
534         if (!inputRange.empty)
535         {
536             appendRaw(inputRange.front);
537             inputRange.popFront;
538         }
539         foreach (x; inputRange)
540         {
541             appendRaw(delimiter);
542             appendRaw(x);
543         }
544         flushIfMaxSize();
545     }
546 
547     /* Make this an output range. */
548     void put(T)(T stuff)
549     {
550         import std.traits;
551         import std.stdio;
552 
553         static if (isSomeChar!T)
554         {
555             if (stuff == '\n') appendln();
556             else appendRaw(stuff);
557         }
558         else static if (isSomeString!T)
559         {
560             if (stuff == "\n") appendln();
561             else append(stuff);
562         }
563         else append(stuff);
564     }
565 }
566 
567 unittest
568 {
569     import tsv_utils.common.unittest_utils;
570     import std.file : rmdirRecurse, readText;
571     import std.path : buildPath;
572 
573     auto testDir = makeUnittestTempDir("tsv_utils_buffered_output");
574     scope(exit) testDir.rmdirRecurse;
575 
576     import std.algorithm : map, joiner;
577     import std.range : iota;
578     import std.conv : to;
579 
580     /* Basic test. Note that exiting the scope triggers flush. */
581     string filepath1 = buildPath(testDir, "file1.txt");
582     {
583         import std.stdio : File;
584 
585         auto ostream = BufferedOutputRange!File(filepath1.File("w"));
586         ostream.append("file1: ");
587         ostream.append("abc");
588         ostream.append(["def", "ghi", "jkl"]);
589         ostream.appendln(100.to!string);
590         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
591         ostream.appendln();
592     }
593     assert(filepath1.readText == "file1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
594 
595     /* Test with no reserve and no flush at every line. */
596     string filepath2 = buildPath(testDir, "file2.txt");
597     {
598         import std.stdio : File;
599 
600         auto ostream = BufferedOutputRange!File(filepath2.File("w"), 0, 0);
601         ostream.append("file2: ");
602         ostream.append("abc");
603         ostream.append(["def", "ghi", "jkl"]);
604         ostream.appendln("100");
605         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
606         ostream.appendln();
607     }
608     assert(filepath2.readText == "file2: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
609 
610     /* With a locking text writer. Requires version 2.078.0
611        See: https://issues.dlang.org/show_bug.cgi?id=9661
612      */
613     static if (__VERSION__ >= 2078)
614     {
615         string filepath3 = buildPath(testDir, "file3.txt");
616         {
617             import std.stdio : File;
618 
619             auto ltw = filepath3.File("w").lockingTextWriter;
620             {
621                 auto ostream = BufferedOutputRange!(typeof(ltw))(ltw);
622                 ostream.append("file3: ");
623                 ostream.append("abc");
624                 ostream.append(["def", "ghi", "jkl"]);
625                 ostream.appendln("100");
626                 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
627                 ostream.appendln();
628             }
629         }
630         assert(filepath3.readText == "file3: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
631     }
632 
633     /* With an Appender. */
634     import std.array : appender;
635     auto app1 = appender!(char[]);
636     {
637         auto ostream = BufferedOutputRange!(typeof(app1))(app1);
638         ostream.append("appender1: ");
639         ostream.append("abc");
640         ostream.append(["def", "ghi", "jkl"]);
641         ostream.appendln("100");
642         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
643         ostream.appendln();
644     }
645     assert(app1.data == "appender1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
646 
647     /* With an Appender, but checking flush boundaries. */
648     auto app2 = appender!(char[]);
649     {
650         auto ostream = BufferedOutputRange!(typeof(app2))(app2, 10, 0); // Flush if 10+
651         bool wasFlushed = false;
652 
653         assert(app2.data == "");
654 
655         ostream.append("12345678"); // Not flushed yet.
656         assert(app2.data == "");
657 
658         wasFlushed = ostream.appendln;  // Nineth char, not flushed yet.
659         assert(!wasFlushed);
660         assert(app2.data == "");
661 
662         wasFlushed = ostream.appendln;  // Tenth char, now flushed.
663         assert(wasFlushed);
664         assert(app2.data == "12345678\n\n");
665 
666         app2.clear;
667         assert(app2.data == "");
668 
669         ostream.append("12345678");
670 
671         wasFlushed = ostream.flushIfFull;
672         assert(!wasFlushed);
673         assert(app2.data == "");
674 
675         ostream.flush;
676         assert(app2.data == "12345678");
677 
678         app2.clear;
679         assert(app2.data == "");
680 
681         ostream.append("123456789012345");
682         assert(app2.data == "");
683     }
684     assert(app2.data == "123456789012345");
685 
686     /* Using joinAppend. */
687     auto app1b = appender!(char[]);
688     {
689         auto ostream = BufferedOutputRange!(typeof(app1b))(app1b);
690         ostream.append("appenderB: ");
691         ostream.joinAppend(["a", "bc", "def"], '-');
692         ostream.append(':');
693         ostream.joinAppend(["g", "hi", "jkl"], '-');
694         ostream.appendln("*100*");
695         ostream.joinAppend(iota(0, 6).map!(x => x.to!string), ' ');
696         ostream.append(' ');
697         ostream.joinAppend(iota(6, 10).map!(x => x.to!string), " ");
698         ostream.appendln();
699     }
700     assert(app1b.data == "appenderB: a-bc-def:g-hi-jkl*100*\n0 1 2 3 4 5 6 7 8 9\n",
701            "app1b.data: |" ~app1b.data ~ "|");
702 
703     /* Operating as an output range. When passed to a function as a ref, exiting
704      * the function does not flush. When passed as a value, it get flushed when
705      * the function returns. Also test both UCFS and non-UFCS styles.
706      */
707 
708     void outputStuffAsRef(T)(ref T range)
709     if (isOutputRange!(T, char))
710     {
711         range.put('1');
712         put(range, "23");
713         range.put('\n');
714         range.put(["5", "67"]);
715         put(range, iota(8, 10).map!(x => x.to!string));
716         put(range, "\n");
717     }
718 
719     void outputStuffAsVal(T)(T range)
720     if (isOutputRange!(T, char))
721     {
722         put(range, '1');
723         range.put("23");
724         put(range, '\n');
725         put(range, ["5", "67"]);
726         range.put(iota(8, 10).map!(x => x.to!string));
727         range.put("\n");
728     }
729 
730     auto app3 = appender!(char[]);
731     {
732         auto ostream = BufferedOutputRange!(typeof(app3))(app3, 12, 0);
733         outputStuffAsRef(ostream);
734         assert(app3.data == "", "app3.data: |" ~app3.data ~ "|");
735         outputStuffAsRef(ostream);
736         assert(app3.data == "123\n56789\n123\n", "app3.data: |" ~app3.data ~ "|");
737     }
738     assert(app3.data == "123\n56789\n123\n56789\n", "app3.data: |" ~app3.data ~ "|");
739 
740     auto app4 = appender!(char[]);
741     {
742         auto ostream = BufferedOutputRange!(typeof(app4))(app4, 12, 0);
743         outputStuffAsVal(ostream);
744         assert(app4.data == "123\n56789\n", "app4.data: |" ~app4.data ~ "|");
745         outputStuffAsVal(ostream);
746         assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|");
747     }
748     assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|");
749 
750     /* Test maxSize. */
751     auto app5 = appender!(char[]);
752     {
753         auto ostream = BufferedOutputRange!(typeof(app5))(app5, 5, 0, 10); // maxSize 10
754         assert(app5.data == "");
755 
756         ostream.append("1234567");  // Not flushed yet (no newline).
757         assert(app5.data == "");
758 
759         ostream.append("89012");    // Flushed by maxSize
760         assert(app5.data == "123456789012");
761 
762         ostream.put("1234567");     // Not flushed yet (no newline).
763         assert(app5.data == "123456789012");
764 
765         ostream.put("89012");       // Flushed by maxSize
766         assert(app5.data == "123456789012123456789012");
767 
768         ostream.joinAppend(["ab", "cd"], '-');        // Not flushed yet
769         ostream.joinAppend(["de", "gh", "ij"], '-');  // Flushed by maxSize
770         assert(app5.data == "123456789012123456789012ab-cdde-gh-ij");
771     }
772     assert(app5.data == "123456789012123456789012ab-cdde-gh-ij");
773 }
774 
775 /**
776 bufferedByLine is a performance enhancement over std.stdio.File.byLine. It works by
777 reading a large buffer from the input stream rather than just a single line.
778 
779 The file argument needs to be a File object open for reading, typically a filesystem
780 file or standard input. Use the Yes.keepTerminator template parameter to keep the
781 newline. This is similar to stdio.File.byLine, except specified as a template paramter
782 rather than a runtime parameter.
783 
784 Reading in blocks does mean that input is not read until a full buffer is available or
785 end-of-file is reached. For this reason, bufferedByLine is not appropriate for
786 interactive input.
787 */
788 
789 auto bufferedByLine(KeepTerminator keepTerminator = No.keepTerminator, Char = char,
790                     ubyte terminator = '\n', size_t readSize = 1024 * 128, size_t growSize = 1024 * 16)
791     (File file)
792 if (is(Char == char) || is(Char == ubyte))
793 {
794     static assert(0 < growSize && growSize <= readSize);
795 
796     static final class BufferedByLineImpl
797     {
798         /* Buffer state variables
799          *   - _buffer.length - Full length of allocated buffer.
800          *   - _dataEnd - End of currently valid data (end of last read).
801          *   - _lineStart - Start of current line.
802          *   - _lineEnd - End of current line.
803          */
804         private File _file;
805         private ubyte[] _buffer;
806         private size_t _lineStart = 0;
807         private size_t _lineEnd = 0;
808         private size_t _dataEnd = 0;
809 
810         this (File f)
811         {
812             _file = f;
813             _buffer = new ubyte[readSize + growSize];
814         }
815 
816         bool empty() const
817         {
818             return _file.eof && _lineStart == _dataEnd;
819         }
820 
821         Char[] front()
822         {
823             assert(!empty, "Attempt to take the front of an empty bufferedByLine.");
824 
825             static if (keepTerminator == Yes.keepTerminator)
826             {
827                 return cast(Char[]) _buffer[_lineStart .. _lineEnd];
828             }
829             else
830             {
831                 assert(_lineStart < _lineEnd);
832                 immutable end = (_buffer[_lineEnd - 1] == terminator) ? _lineEnd - 1 : _lineEnd;
833                 return cast(Char[]) _buffer[_lineStart .. end];
834             }
835         }
836 
837         /* Note: Call popFront at initialization to do the initial read. */
838         void popFront()
839         {
840             import std.algorithm: copy, find;
841             assert(!empty, "Attempt to popFront an empty bufferedByLine.");
842 
843             /* Pop the current line. */
844             _lineStart = _lineEnd;
845 
846             /* Set up the next line if more data is available, either in the buffer or
847              * the file. The next line ends at the next newline, if there is one.
848              *
849              * Notes:
850              * - 'find' returns the slice starting with the character searched for, or
851              *   an empty range if not found.
852              * - _lineEnd is set to _dataEnd both when the current buffer does not have
853              *   a newline and when it ends with one.
854              */
855             auto found = _buffer[_lineStart .. _dataEnd].find(terminator);
856             _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1;
857 
858             if (found.empty && !_file.eof)
859             {
860                 /* No newline in current buffer. Read from the file until the next
861                  * newline is found.
862                  */
863                 assert(_lineEnd == _dataEnd);
864 
865                 if (_lineStart > 0)
866                 {
867                     /* Move remaining data to the start of the buffer. */
868                     immutable remainingLength = _dataEnd - _lineStart;
869                     copy(_buffer[_lineStart .. _dataEnd], _buffer[0 .. remainingLength]);
870                     _lineStart = 0;
871                     _lineEnd = _dataEnd = remainingLength;
872                 }
873 
874                 do
875                 {
876                     /* Grow the buffer if necessary. */
877                     immutable availableSize = _buffer.length - _dataEnd;
878                     if (availableSize < readSize)
879                     {
880                         size_t growBy = growSize;
881                         while (availableSize + growBy < readSize) growBy += growSize;
882                         _buffer.length += growBy;
883                     }
884 
885                     /* Read the next block. */
886                     _dataEnd +=
887                         _file.rawRead(_buffer[_dataEnd .. _dataEnd + readSize])
888                         .length;
889 
890                     found = _buffer[_lineEnd .. _dataEnd].find(terminator);
891                     _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1;
892 
893                 } while (found.empty && !_file.eof);
894             }
895         }
896     }
897 
898     assert(file.isOpen, "bufferedByLine passed a closed file.");
899 
900     auto r = new BufferedByLineImpl(file);
901     r.popFront;
902     return r;
903 }
904 
905 unittest
906 {
907     import std.array : appender;
908     import std.conv : to;
909     import std.file : rmdirRecurse, readText;
910     import std.path : buildPath;
911     import std.range : lockstep;
912     import std.stdio;
913     import tsv_utils.common.unittest_utils;
914 
915     auto testDir = makeUnittestTempDir("tsv_utils_buffered_byline");
916     scope(exit) testDir.rmdirRecurse;
917 
918     /* Create two data files with the same data. Read both in parallel with byLine and
919      * bufferedByLine and compare each line.
920      */
921     auto data1 = appender!(char[])();
922 
923     foreach (i; 1 .. 1001) data1.put('\n');
924     foreach (i; 1 .. 1001) data1.put("a\n");
925     foreach (i; 1 .. 1001) { data1.put(i.to!string); data1.put('\n'); }
926     foreach (i; 1 .. 1001)
927     {
928         foreach (j; 1 .. i+1) data1.put('x');
929         data1.put('\n');
930     }
931 
932     string file1a = buildPath(testDir, "file1a.txt");
933     string file1b = buildPath(testDir, "file1b.txt");
934     {
935 
936         file1a.File("w").write(data1.data);
937         file1b.File("w").write(data1.data);
938     }
939 
940     /* Default parameters. */
941     {
942         auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator);
943         auto f1bIn = file1b.File().byLine(No.keepTerminator);
944         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
945     }
946     {
947         auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator);
948         auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
949         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
950     }
951 
952     /* Smaller read size. This will trigger buffer growth. */
953     {
954         auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', 512, 256);
955         auto f1bIn = file1b.File().byLine(No.keepTerminator);
956         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
957     }
958 
959     /* Exercise boundary cases in buffer growth.
960      * Note: static-foreach requires DMD 2.076 / LDC 1.6
961      */
962     static foreach (readSize; [1, 2, 4])
963     {
964         static foreach (growSize; 1 .. readSize + 1)
965         {{
966             auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
967             auto f1bIn = file1b.File().byLine(No.keepTerminator);
968             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
969         }}
970         static foreach (growSize; 1 .. readSize + 1)
971         {{
972             auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
973             auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
974             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
975         }}
976     }
977 
978 
979     /* Files that do not end in a newline. */
980 
981     string file2a = buildPath(testDir, "file2a.txt");
982     string file2b = buildPath(testDir, "file2b.txt");
983     string file3a = buildPath(testDir, "file3a.txt");
984     string file3b = buildPath(testDir, "file3b.txt");
985     string file4a = buildPath(testDir, "file4a.txt");
986     string file4b = buildPath(testDir, "file4b.txt");
987     {
988         file1a.File("w").write("a");
989         file1b.File("w").write("a");
990         file2a.File("w").write("ab");
991         file2b.File("w").write("ab");
992         file3a.File("w").write("abc");
993         file3b.File("w").write("abc");
994     }
995 
996     static foreach (readSize; [1, 2, 4])
997     {
998         static foreach (growSize; 1 .. readSize + 1)
999         {{
1000             auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1001             auto f1bIn = file1b.File().byLine(No.keepTerminator);
1002             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1003 
1004             auto f2aIn = file2a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1005             auto f2bIn = file2b.File().byLine(No.keepTerminator);
1006             foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1007 
1008             auto f3aIn = file3a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1009             auto f3bIn = file3b.File().byLine(No.keepTerminator);
1010             foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1011         }}
1012         static foreach (growSize; 1 .. readSize + 1)
1013         {{
1014             auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1015             auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
1016             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1017 
1018             auto f2aIn = file2a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1019             auto f2bIn = file2b.File().byLine(Yes.keepTerminator);
1020             foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1021 
1022             auto f3aIn = file3a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1023             auto f3bIn = file3b.File().byLine(Yes.keepTerminator);
1024             foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1025         }}
1026     }
1027 }
1028 
1029 /**
1030 joinAppend performs a join operation on an input range, appending the results to
1031 an output range.
1032 
1033 Note: The main uses of joinAppend have been replaced by BufferedOutputRange, which has
1034 its own joinAppend method.
1035 
1036 joinAppend was written as a performance enhancement over using std.algorithm.joiner
1037 or std.array.join with writeln. Using joiner with writeln is quite slow, 3-4x slower
1038 than std.array.join with writeln. The joiner performance may be due to interaction
1039 with writeln, this was not investigated. Using joiner with stdout.lockingTextWriter
1040 is better, but still substantially slower than join. Using join works reasonably well,
1041 but is allocating memory unnecessarily.
1042 
1043 Using joinAppend with Appender is a bit faster than join, and allocates less memory.
1044 The Appender re-uses the underlying data buffer, saving memory. The example below
1045 illustrates. It is a modification of the InputFieldReordering example. The role
1046 Appender plus joinAppend are playing is to buffer the output. BufferedOutputRange
1047 uses a similar technique to buffer multiple lines.
1048 
1049 ---
1050 int main(string[] args)
1051 {
1052     import tsvutil;
1053     import std.algorithm, std.array, std.range, std.stdio;
1054     size_t[] fieldIndicies = [3, 0, 2];
1055     auto fieldReordering = new InputFieldReordering!char(fieldIndicies);
1056     auto outputBuffer = appender!(char[]);
1057     foreach (line; stdin.byLine)
1058     {
1059         fieldReordering.initNewLine;
1060         foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate)
1061         {
1062             fieldReordering.processNextField(fieldIndex, fieldValue);
1063             if (fieldReordering.allFieldsFilled) break;
1064         }
1065         if (fieldReordering.allFieldsFilled)
1066         {
1067             outputBuffer.clear;
1068             writeln(fieldReordering.outputFields.joinAppend(outputBuffer, ('\t')));
1069         }
1070         else
1071         {
1072             writeln("Error: Insufficient number of field on the line.");
1073         }
1074     }
1075     return 0;
1076 }
1077 ---
1078 */
1079 OutputRange joinAppend(InputRange, OutputRange, E)
1080     (InputRange inputRange, ref OutputRange outputRange, E delimiter)
1081 if (isInputRange!InputRange &&
1082     (is(ElementType!InputRange : const E[]) &&
1083      isOutputRange!(OutputRange, E[]))
1084      ||
1085     (is(ElementType!InputRange : const E) &&
1086      isOutputRange!(OutputRange, E))
1087     )
1088 {
1089     if (!inputRange.empty)
1090     {
1091         outputRange.put(inputRange.front);
1092         inputRange.popFront;
1093     }
1094     foreach (x; inputRange)
1095     {
1096         outputRange.put(delimiter);
1097         outputRange.put(x);
1098     }
1099     return outputRange;
1100 }
1101 
1102 @safe unittest
1103 {
1104     import std.array : appender;
1105     import std.algorithm : equal;
1106 
1107     char[] c1 = ['a', 'b', 'c'];
1108     char[] c2 = ['d', 'e', 'f'];
1109     char[] c3 = ['g', 'h', 'i'];
1110     auto cvec = [c1, c2, c3];
1111 
1112     auto s1 = "abc";
1113     auto s2 = "def";
1114     auto s3 = "ghi";
1115     auto svec = [s1, s2, s3];
1116 
1117     auto charAppender = appender!(char[])();
1118 
1119     assert(cvec.joinAppend(charAppender, '_').data == "abc_def_ghi");
1120     assert(equal(cvec, [c1, c2, c3]));
1121 
1122     charAppender.put('$');
1123     assert(svec.joinAppend(charAppender, '|').data == "abc_def_ghi$abc|def|ghi");
1124     assert(equal(cvec, [s1, s2, s3]));
1125 
1126     charAppender.clear;
1127     assert(svec.joinAppend(charAppender, '|').data == "abc|def|ghi");
1128 
1129     auto intAppender = appender!(int[])();
1130 
1131     auto i1 = [100, 101, 102];
1132     auto i2 = [200, 201, 202];
1133     auto i3 = [300, 301, 302];
1134     auto ivec = [i1, i2, i3];
1135 
1136     assert(ivec.joinAppend(intAppender, 0).data ==
1137            [100, 101, 102, 0, 200, 201, 202, 0, 300, 301, 302]);
1138 
1139     intAppender.clear;
1140     assert(i1.joinAppend(intAppender, 0).data ==
1141            [100, 0, 101, 0, 102]);
1142     assert(i2.joinAppend(intAppender, 1).data ==
1143            [100, 0, 101, 0, 102,
1144             200, 1, 201, 1, 202]);
1145     assert(i3.joinAppend(intAppender, 2).data ==
1146            [100, 0, 101, 0, 102,
1147             200, 1, 201, 1, 202,
1148             300, 2, 301, 2, 302]);
1149 }
1150 
1151 /**
1152 getTsvFieldValue extracts the value of a single field from a delimited text string.
1153 
1154 This is a convenience function intended for cases when only a single field from an
1155 input line is needed. If multiple values are needed, it will be more efficient to
1156 work directly with std.algorithm.splitter or the InputFieldReordering class.
1157 
1158 The input text is split by a delimiter character. The specified field is converted
1159 to the desired type and the value returned.
1160 
1161 An exception is thrown if there are not enough fields on the line or if conversion
1162 fails. Conversion is done with std.conv.to, it throws a std.conv.ConvException on
1163 failure. If not enough fields, the exception text is generated referencing 1-upped
1164 field numbers as would be provided by command line users.
1165  */
1166 T getTsvFieldValue(T, C)(const C[] line, size_t fieldIndex, C delim) pure @safe
1167 if (isSomeChar!C)
1168 {
1169     import std.algorithm : splitter;
1170     import std.conv : to;
1171     import std.format : format;
1172     import std.range;
1173 
1174     auto splitLine = line.splitter(delim);
1175     size_t atField = 0;
1176 
1177     while (atField < fieldIndex && !splitLine.empty)
1178     {
1179         splitLine.popFront;
1180         atField++;
1181     }
1182 
1183     T val;
1184     if (splitLine.empty)
1185     {
1186         if (fieldIndex == 0)
1187         {
1188             /* This is a workaround to a splitter special case - If the input is empty,
1189              * the returned split range is empty. This doesn't properly represent a single
1190              * column file. More correct mathematically, and for this case, would be a
1191              * single value representing an empty string. The input line is a convenient
1192              * source of an empty line. Info:
1193              *   Bug: https://issues.dlang.org/show_bug.cgi?id=15735
1194              *   Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030
1195              */
1196             assert(line.empty);
1197             val = line.to!T;
1198         }
1199         else
1200         {
1201             throw new Exception(
1202                 format("Not enough fields on line. Number required: %d; Number found: %d",
1203                        fieldIndex + 1, atField));
1204         }
1205     }
1206     else
1207     {
1208         val = splitLine.front.to!T;
1209     }
1210 
1211     return val;
1212 }
1213 
1214 unittest
1215 {
1216     import std.conv : ConvException, to;
1217     import std.exception;
1218 
1219     /* Common cases. */
1220     assert(getTsvFieldValue!double("123", 0, '\t') == 123.0);
1221     assert(getTsvFieldValue!double("-10.5", 0, '\t') == -10.5);
1222     assert(getTsvFieldValue!size_t("abc|123", 1, '|') == 123);
1223     assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99);
1224     assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99);
1225     assert(getTsvFieldValue!string("紅\t红\t99", 2, '\t') == "99");
1226     assert(getTsvFieldValue!string("紅\t红\t99", 1, '\t') == "红");
1227     assert(getTsvFieldValue!string("紅\t红\t99", 0, '\t') == "紅");
1228     assert(getTsvFieldValue!string("红色和绿色\tred and green\t赤と緑\t10.5", 2, '\t') == "赤と緑");
1229     assert(getTsvFieldValue!double("红色和绿色\tred and green\t赤と緑\t10.5", 3, '\t') == 10.5);
1230 
1231     /* The empty field cases. */
1232     assert(getTsvFieldValue!string("", 0, '\t') == "");
1233     assert(getTsvFieldValue!string("\t", 0, '\t') == "");
1234     assert(getTsvFieldValue!string("\t", 1, '\t') == "");
1235     assert(getTsvFieldValue!string("", 0, ':') == "");
1236     assert(getTsvFieldValue!string(":", 0, ':') == "");
1237     assert(getTsvFieldValue!string(":", 1, ':') == "");
1238 
1239     /* Tests with different data types. */
1240     string stringLine = "orange and black\tნარინჯისფერი და შავი\t88.5";
1241     char[] charLine = "orange and black\tნარინჯისფერი და შავი\t88.5".to!(char[]);
1242     dchar[] dcharLine = stringLine.to!(dchar[]);
1243     wchar[] wcharLine = stringLine.to!(wchar[]);
1244 
1245     assert(getTsvFieldValue!string(stringLine, 0, '\t') == "orange and black");
1246     assert(getTsvFieldValue!string(stringLine, 1, '\t') == "ნარინჯისფერი და შავი");
1247     assert(getTsvFieldValue!wstring(stringLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1248     assert(getTsvFieldValue!double(stringLine, 2, '\t') == 88.5);
1249 
1250     assert(getTsvFieldValue!string(charLine, 0, '\t') == "orange and black");
1251     assert(getTsvFieldValue!string(charLine, 1, '\t') == "ნარინჯისფერი და შავი");
1252     assert(getTsvFieldValue!wstring(charLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1253     assert(getTsvFieldValue!double(charLine, 2, '\t') == 88.5);
1254 
1255     assert(getTsvFieldValue!string(dcharLine, 0, '\t') == "orange and black");
1256     assert(getTsvFieldValue!string(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი");
1257     assert(getTsvFieldValue!wstring(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1258     assert(getTsvFieldValue!double(dcharLine, 2, '\t') == 88.5);
1259 
1260     assert(getTsvFieldValue!string(wcharLine, 0, '\t') == "orange and black");
1261     assert(getTsvFieldValue!string(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი");
1262     assert(getTsvFieldValue!wstring(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1263     assert(getTsvFieldValue!double(wcharLine, 2, '\t') == 88.5);
1264 
1265     /* Conversion errors. */
1266     assertThrown!ConvException(getTsvFieldValue!double("", 0, '\t'));
1267     assertThrown!ConvException(getTsvFieldValue!double("abc", 0, '|'));
1268     assertThrown!ConvException(getTsvFieldValue!size_t("-1", 0, '|'));
1269     assertThrown!ConvException(getTsvFieldValue!size_t("a23|23.4", 1, '|'));
1270     assertThrown!ConvException(getTsvFieldValue!double("23.5|def", 1, '|'));
1271 
1272     /* Not enough field errors. These should throw, but not a ConvException.*/
1273     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("", 1, '\t')));
1274     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc", 1, '\t')));
1275     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc\tdef", 2, '\t')));
1276 }
1277 
1278 /**
1279 Field-lists - A field-list is a string entered on the command line identifying one or more
1280 field numbers. They are used by the majority of the tsv utility applications. There are
1281 two helper functions, makeFieldListOptionHandler and parseFieldList. Most applications
1282 will use makeFieldListOptionHandler, it creates a delegate that can be passed to
1283 std.getopt to process the command option. Actual processing of the option text is done by
1284 parseFieldList. It can be called directly when the text of the option value contains more
1285 than just the field number.
1286 
1287 Syntax and behavior:
1288 
1289 A 'field-list' is a list of numeric field numbers entered on the command line. Fields are
1290 1-upped integers representing locations in an input line, in the traditional meaning of
1291 Unix command line tools. Fields can be entered as single numbers or a range. Multiple
1292 entries are separated by commas. Some examples (with 'fields' as the command line option):
1293 
1294    --fields 3                 // Single field
1295    --fields 4,1               // Two fields
1296    --fields 3-9               // A range, fields 3 to 9 inclusive
1297    --fields 1,2,7-34,11       // A mix of ranges and fields
1298    --fields 15-5,3-1          // Two ranges in reverse order.
1299 
1300 Incomplete ranges are not supported, for example, '6-'. Zero is disallowed as a field
1301 value by default, but can be enabled to support the notion of zero as representing the
1302 entire line. However, zero cannot be part of a range. Field numbers are one-based by
1303 default, but can be converted to zero-based. If conversion to zero-based is enabled, field
1304 number zero must be disallowed or a signed integer type specified for the returned range.
1305 
1306 An error is thrown if an invalid field specification is encountered. Error text is
1307 intended for display. Error conditions include:
1308   - Empty fields list
1309   - Empty value, e.g. Two consequtive commas, a trailing comma, or a leading comma
1310   - String that does not parse as a valid integer
1311   - Negative integers, or zero if zero is disallowed.
1312   - An incomplete range
1313   - Zero used as part of a range.
1314 
1315 No other behaviors are enforced. Repeated values are accepted. If zero is allowed, other
1316 field numbers can be entered as well. Additional restrictions need to be applied by the
1317 caller.
1318 
1319 Notes:
1320   - The data type determines the max field number that can be entered. Enabling conversion
1321     to zero restricts to the signed version of the data type.
1322   - Use 'import std.typecons : Yes, No' to use the convertToZeroBasedIndex and
1323     allowFieldNumZero template parameters.
1324 */
1325 
1326 /** [Yes|No].convertToZeroBasedIndex parameter controls whether field numbers are
1327  *  converted to zero-based indices by makeFieldListOptionHander and parseFieldList.
1328  */
1329 alias ConvertToZeroBasedIndex = Flag!"convertToZeroBasedIndex";
1330 
1331 /** [Yes|No].allowFieldNumZero parameter controls whether zero is a valid field. This is
1332  *  used by makeFieldListOptionHander and parseFieldList.
1333  */
1334 alias AllowFieldNumZero = Flag!"allowFieldNumZero";
1335 
1336 alias OptionHandlerDelegate = void delegate(string option, string value);
1337 
1338 /**
1339 makeFieldListOptionHandler creates a std.getopt option hander for processing field lists
1340 entered on the command line. A field list is as defined by parseFieldList.
1341 */
1342 OptionHandlerDelegate makeFieldListOptionHandler(
1343                                                  T,
1344                                                  ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex,
1345                                                  AllowFieldNumZero allowZero = No.allowFieldNumZero)
1346     (ref T[] fieldsArray)
1347 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T))
1348 {
1349     void fieldListOptionHandler(ref T[] fieldArray, string option, string value)
1350     {
1351         import std.algorithm : each;
1352         try value.parseFieldList!(T, convertToZero, allowZero).each!(x => fieldArray ~= x);
1353         catch (Exception exc)
1354         {
1355             import std.format : format;
1356             exc.msg = format("[--%s] %s", option, exc.msg);
1357             throw exc;
1358         }
1359     }
1360 
1361     return (option, value) => fieldListOptionHandler(fieldsArray, option, value);
1362 }
1363 
1364 unittest
1365 {
1366     import std.exception : assertThrown, assertNotThrown;
1367     import std.getopt;
1368 
1369     {
1370         size_t[] fields;
1371         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1372         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1373         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1374     }
1375     {
1376         size_t[] fields;
1377         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1378         getopt(args,
1379                "f|fields", fields.makeFieldListOptionHandler!(size_t, Yes.convertToZeroBasedIndex));
1380         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1381     }
1382     {
1383         size_t[] fields;
1384         auto args = ["program", "-f", "0"];
1385         getopt(args,
1386                "f|fields", fields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1387         assert(fields == [0]);
1388     }
1389     {
1390         size_t[] fields;
1391         auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1"];
1392         getopt(args,
1393                "f|fields", fields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1394         assert(fields == [0, 1, 0, 0, 1]);
1395     }
1396     {
1397         size_t[] ints;
1398         size_t[] fields;
1399         auto args = ["program", "--ints", "1,2,3", "--fields", "1", "--ints", "4,5,6", "--fields", "2,4,7-9,23-21"];
1400         std.getopt.arraySep = ",";
1401         getopt(args,
1402                "i|ints", "Built-in list of integers.", &ints,
1403                "f|fields", "Field-list style integers.", fields.makeFieldListOptionHandler);
1404         assert(ints == [1, 2, 3, 4, 5, 6]);
1405         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1406     }
1407 
1408     /* Basic cases involved unsinged types smaller than size_t. */
1409     {
1410         uint[] fields;
1411         auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1", "-f", "55-58"];
1412         getopt(args,
1413                "f|fields", fields.makeFieldListOptionHandler!(uint, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1414         assert(fields == [0, 1, 0, 0, 1, 55, 56, 57, 58]);
1415     }
1416     {
1417         ushort[] fields;
1418         auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1", "-f", "55-58"];
1419         getopt(args,
1420                "f|fields", fields.makeFieldListOptionHandler!(ushort, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1421         assert(fields == [0, 1, 0, 0, 1, 55, 56, 57, 58]);
1422     }
1423 
1424     /* Basic cases involving unsigned types. */
1425     {
1426         long[] fields;
1427         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1428         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1429         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1430     }
1431     {
1432         long[] fields;
1433         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1434         getopt(args,
1435                "f|fields", fields.makeFieldListOptionHandler!(long, Yes.convertToZeroBasedIndex));
1436         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1437     }
1438     {
1439         long[] fields;
1440         auto args = ["program", "-f", "0"];
1441         getopt(args,
1442                "f|fields", fields.makeFieldListOptionHandler!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1443         assert(fields == [-1]);
1444     }
1445     {
1446         int[] fields;
1447         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1448         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1449         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1450     }
1451     {
1452         int[] fields;
1453         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1454         getopt(args,
1455                "f|fields", fields.makeFieldListOptionHandler!(int, Yes.convertToZeroBasedIndex));
1456         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1457     }
1458     {
1459         int[] fields;
1460         auto args = ["program", "-f", "0"];
1461         getopt(args,
1462                "f|fields", fields.makeFieldListOptionHandler!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1463         assert(fields == [-1]);
1464     }
1465     {
1466         short[] fields;
1467         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1468         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1469         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1470     }
1471     {
1472         short[] fields;
1473         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1474         getopt(args,
1475                "f|fields", fields.makeFieldListOptionHandler!(short, Yes.convertToZeroBasedIndex));
1476         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1477     }
1478     {
1479         short[] fields;
1480         auto args = ["program", "-f", "0"];
1481         getopt(args,
1482                "f|fields", fields.makeFieldListOptionHandler!(short, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1483         assert(fields == [-1]);
1484     }
1485 
1486     {
1487         /* Error cases. */
1488         size_t[] fields;
1489         auto args = ["program", "-f", "0"];
1490         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1491 
1492         args = ["program", "-f", "-1"];
1493         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1494 
1495         args = ["program", "-f", "--fields", "1"];
1496         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1497 
1498         args = ["program", "-f", "a"];
1499         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1500 
1501         args = ["program", "-f", "1.5"];
1502         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1503 
1504         args = ["program", "-f", "2-"];
1505         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1506 
1507         args = ["program", "-f", "3,5,-7"];
1508         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1509 
1510         args = ["program", "-f", "3,5,"];
1511         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1512 
1513         args = ["program", "-f", "-1"];
1514         assertThrown(getopt(args,
1515                             "f|fields", fields.makeFieldListOptionHandler!(
1516                                 size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)));
1517     }
1518 }
1519 
1520 /**
1521 parseFieldList lazily generates a range of fields numbers from a 'field-list' string.
1522 */
1523 auto parseFieldList(T = size_t,
1524                     ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex,
1525                     AllowFieldNumZero allowZero = No.allowFieldNumZero)
1526     (string fieldList, char delim = ',')
1527 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T))
1528 {
1529     import std.algorithm : splitter;
1530 
1531     auto _splitFieldList = fieldList.splitter(delim);
1532     auto _currFieldParse =
1533         (_splitFieldList.empty ? "" : _splitFieldList.front)
1534         .parseFieldRange!(T, convertToZero, allowZero);
1535 
1536     if (!_splitFieldList.empty) _splitFieldList.popFront;
1537 
1538     struct Result
1539     {
1540         @property bool empty() { return _currFieldParse.empty; }
1541 
1542         @property T front()
1543         {
1544             import std.conv : to;
1545 
1546             assert(!empty, "Attempting to fetch the front of an empty field-list.");
1547             assert(!_currFieldParse.empty, "Internal error. Call to front with an empty _currFieldParse.");
1548 
1549             return _currFieldParse.front.to!T;
1550         }
1551 
1552         void popFront()
1553         {
1554             assert(!empty, "Attempting to popFront an empty field-list.");
1555 
1556             _currFieldParse.popFront;
1557             if (_currFieldParse.empty && !_splitFieldList.empty)
1558             {
1559                 _currFieldParse = _splitFieldList.front.parseFieldRange!(T, convertToZero, allowZero);
1560                 _splitFieldList.popFront;
1561             }
1562         }
1563     }
1564 
1565     return Result();
1566 }
1567 
1568 unittest
1569 {
1570     import std.algorithm : each, equal;
1571     import std.exception : assertThrown, assertNotThrown;
1572 
1573     /* Basic tests. */
1574     assert("1".parseFieldList.equal([1]));
1575     assert("1,2".parseFieldList.equal([1, 2]));
1576     assert("1,2,3".parseFieldList.equal([1, 2, 3]));
1577     assert("1-2".parseFieldList.equal([1, 2]));
1578     assert("1-2,6-4".parseFieldList.equal([1, 2, 6, 5, 4]));
1579     assert("1-2,1,1-2,2,2-1".parseFieldList.equal([1, 2, 1, 1, 2, 2, 2, 1]));
1580     assert("1-2,5".parseFieldList!size_t.equal([1, 2, 5]));
1581 
1582     /* Signed Int tests */
1583     assert("1".parseFieldList!int.equal([1]));
1584     assert("1,2,3".parseFieldList!int.equal([1, 2, 3]));
1585     assert("1-2".parseFieldList!int.equal([1, 2]));
1586     assert("1-2,6-4".parseFieldList!int.equal([1, 2, 6, 5, 4]));
1587     assert("1-2,5".parseFieldList!int.equal([1, 2, 5]));
1588 
1589     /* Convert to zero tests */
1590     assert("1".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0]));
1591     assert("1,2,3".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 2]));
1592     assert("1-2".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1]));
1593     assert("1-2,6-4".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 5, 4, 3]));
1594     assert("1-2,5".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 4]));
1595 
1596     assert("1".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0]));
1597     assert("1,2,3".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 2]));
1598     assert("1-2".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1]));
1599     assert("1-2,6-4".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 5, 4, 3]));
1600     assert("1-2,5".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 4]));
1601 
1602     /* Allow zero tests. */
1603     assert("0".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1604     assert("1,0,3".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 0, 3]));
1605     assert("1-2,5".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 2, 5]));
1606     assert("0".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1607     assert("1,0,3".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 0, 3]));
1608     assert("1-2,5".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 2, 5]));
1609     assert("0".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1610     assert("1,0,3".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0, -1, 2]));
1611     assert("1-2,5".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0, 1, 4]));
1612 
1613     /* Error cases. */
1614     assertThrown("".parseFieldList.each);
1615     assertThrown(" ".parseFieldList.each);
1616     assertThrown(",".parseFieldList.each);
1617     assertThrown("5 6".parseFieldList.each);
1618     assertThrown(",7".parseFieldList.each);
1619     assertThrown("8,".parseFieldList.each);
1620     assertThrown("8,9,".parseFieldList.each);
1621     assertThrown("10,,11".parseFieldList.each);
1622     assertThrown("".parseFieldList!(long, Yes.convertToZeroBasedIndex).each);
1623     assertThrown("1,2-3,".parseFieldList!(long, Yes.convertToZeroBasedIndex).each);
1624     assertThrown("2-,4".parseFieldList!(long, Yes.convertToZeroBasedIndex).each);
1625     assertThrown("1,2,3,,4".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1626     assertThrown(",7".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1627     assertThrown("8,".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1628     assertThrown("10,0,,11".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1629     assertThrown("8,9,".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1630 
1631     assertThrown("0".parseFieldList.each);
1632     assertThrown("1,0,3".parseFieldList.each);
1633     assertThrown("0".parseFieldList!(int, Yes.convertToZeroBasedIndex, No.allowFieldNumZero).each);
1634     assertThrown("1,0,3".parseFieldList!(int, Yes.convertToZeroBasedIndex, No.allowFieldNumZero).each);
1635     assertThrown("0-2,6-0".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1636     assertThrown("0-2,6-0".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1637     assertThrown("0-2,6-0".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1638 }
1639 
1640 /* parseFieldRange parses a single number or number range. E.g. '5' or '5-8'. These are
1641  * the values in a field-list separated by a comma or other delimiter. It returns a range
1642  * that iterates over all the values in the range.
1643  */
1644 private auto parseFieldRange(T = size_t,
1645                              ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex,
1646                              AllowFieldNumZero allowZero = No.allowFieldNumZero)
1647     (string fieldRange)
1648 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T))
1649 {
1650     import std.algorithm : findSplit;
1651     import std.conv : to;
1652     import std.format : format;
1653     import std.range : iota;
1654     import std.traits : Signed;
1655 
1656     /* Pick the largest compatible integral type for the IOTA range. This must be the
1657      * signed type if convertToZero is true, as a reverse order range may end at -1.
1658      */
1659     static if (convertToZero) alias S = Signed!T;
1660     else alias S = T;
1661 
1662     if (fieldRange.length == 0) throw new Exception("Empty field number.");
1663 
1664     auto rangeSplit = findSplit(fieldRange, "-");
1665 
1666     if (!rangeSplit[1].empty && (rangeSplit[0].empty || rangeSplit[2].empty))
1667     {
1668         // Range starts or ends with a dash.
1669         throw new Exception(format("Incomplete ranges are not supported: '%s'", fieldRange));
1670     }
1671 
1672     S start = rangeSplit[0].to!S;
1673     S last = rangeSplit[1].empty ? start : rangeSplit[2].to!S;
1674     Signed!T increment = (start <= last) ? 1 : -1;
1675 
1676     static if (allowZero)
1677     {
1678         if (start == 0 && !rangeSplit[1].empty)
1679         {
1680             throw new Exception(format("Zero cannot be used as part of a range: '%s'", fieldRange));
1681         }
1682     }
1683 
1684     static if (allowZero)
1685     {
1686         if (start < 0 || last < 0)
1687         {
1688             throw new Exception(format("Field numbers must be non-negative integers: '%d'",
1689                                        (start < 0) ? start : last));
1690         }
1691     }
1692     else
1693     {
1694         if (start < 1 || last < 1)
1695         {
1696             throw new Exception(format("Field numbers must be greater than zero: '%d'",
1697                                        (start < 1) ? start : last));
1698         }
1699     }
1700 
1701     static if (convertToZero)
1702     {
1703         start--;
1704         last--;
1705     }
1706 
1707     return iota(start, last + increment, increment);
1708 }
1709 
1710 unittest // parseFieldRange
1711 {
1712     import std.algorithm : equal;
1713     import std.exception : assertThrown, assertNotThrown;
1714 
1715     /* Basic cases */
1716     assert(parseFieldRange("1").equal([1]));
1717     assert("2".parseFieldRange.equal([2]));
1718     assert("3-4".parseFieldRange.equal([3, 4]));
1719     assert("3-5".parseFieldRange.equal([3, 4, 5]));
1720     assert("4-3".parseFieldRange.equal([4, 3]));
1721     assert("10-1".parseFieldRange.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1722 
1723     /* Convert to zero-based indices */
1724     assert(parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)("1").equal([0]));
1725     assert("2".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([1]));
1726     assert("3-4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([2, 3]));
1727     assert("3-5".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([2, 3, 4]));
1728     assert("4-3".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([3, 2]));
1729     assert("10-1".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]));
1730 
1731     /* Allow zero. */
1732     assert("0".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1733     assert(parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)("1").equal([1]));
1734     assert("3-4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([3, 4]));
1735     assert("10-1".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1736 
1737     /* Allow zero, convert to zero-based index. */
1738     assert("0".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1739     assert(parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)("1").equal([0]));
1740     assert("3-4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([2, 3]));
1741     assert("10-1".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]));
1742 
1743     /* Alternate integer types. */
1744     assert("2".parseFieldRange!uint.equal([2]));
1745     assert("3-5".parseFieldRange!uint.equal([3, 4, 5]));
1746     assert("10-1".parseFieldRange!uint.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1747     assert("2".parseFieldRange!int.equal([2]));
1748     assert("3-5".parseFieldRange!int.equal([3, 4, 5]));
1749     assert("10-1".parseFieldRange!int.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1750     assert("2".parseFieldRange!ushort.equal([2]));
1751     assert("3-5".parseFieldRange!ushort.equal([3, 4, 5]));
1752     assert("10-1".parseFieldRange!ushort.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1753     assert("2".parseFieldRange!short.equal([2]));
1754     assert("3-5".parseFieldRange!short.equal([3, 4, 5]));
1755     assert("10-1".parseFieldRange!short.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1756 
1757     assert("0".parseFieldRange!(long, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1758     assert("0".parseFieldRange!(uint, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1759     assert("0".parseFieldRange!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1760     assert("0".parseFieldRange!(ushort, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1761     assert("0".parseFieldRange!(short, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1762     assert("0".parseFieldRange!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1763     assert("0".parseFieldRange!(short, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1764 
1765     /* Max field value cases. */
1766     assert("65535".parseFieldRange!ushort.equal([65535]));   // ushort max
1767     assert("65533-65535".parseFieldRange!ushort.equal([65533, 65534, 65535]));
1768     assert("32767".parseFieldRange!short.equal([32767]));    // short max
1769     assert("32765-32767".parseFieldRange!short.equal([32765, 32766, 32767]));
1770     assert("32767".parseFieldRange!(short, Yes.convertToZeroBasedIndex).equal([32766]));
1771 
1772     /* Error cases. */
1773     assertThrown("".parseFieldRange);
1774     assertThrown(" ".parseFieldRange);
1775     assertThrown("-".parseFieldRange);
1776     assertThrown(" -".parseFieldRange);
1777     assertThrown("- ".parseFieldRange);
1778     assertThrown("1-".parseFieldRange);
1779     assertThrown("-2".parseFieldRange);
1780     assertThrown("-1".parseFieldRange);
1781     assertThrown("1.0".parseFieldRange);
1782     assertThrown("0".parseFieldRange);
1783     assertThrown("0-3".parseFieldRange);
1784     assertThrown("-2-4".parseFieldRange);
1785     assertThrown("2--4".parseFieldRange);
1786     assertThrown("2-".parseFieldRange);
1787     assertThrown("a".parseFieldRange);
1788     assertThrown("0x3".parseFieldRange);
1789     assertThrown("3U".parseFieldRange);
1790     assertThrown("1_000".parseFieldRange);
1791     assertThrown(".".parseFieldRange);
1792 
1793     assertThrown("".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1794     assertThrown(" ".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1795     assertThrown("-".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1796     assertThrown("1-".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1797     assertThrown("-2".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1798     assertThrown("-1".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1799     assertThrown("0".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1800     assertThrown("0-3".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1801     assertThrown("-2-4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1802     assertThrown("2--4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1803 
1804     assertThrown("".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1805     assertThrown(" ".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1806     assertThrown("-".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1807     assertThrown("1-".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1808     assertThrown("-2".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1809     assertThrown("-1".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1810     assertThrown("0-3".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1811     assertThrown("-2-4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1812 
1813     assertThrown("".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1814     assertThrown(" ".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1815     assertThrown("-".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1816     assertThrown("1-".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1817     assertThrown("-2".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1818     assertThrown("-1".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1819     assertThrown("0-3".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1820     assertThrown("-2-4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1821 
1822     /* Value out of range cases. */
1823     assertThrown("65536".parseFieldRange!ushort);   // One more than ushort max.
1824     assertThrown("65535-65536".parseFieldRange!ushort);
1825     assertThrown("32768".parseFieldRange!short);    // One more than short max.
1826     assertThrown("32765-32768".parseFieldRange!short);
1827     // Convert to zero limits signed range.
1828     assertThrown("32768".parseFieldRange!(ushort, Yes.convertToZeroBasedIndex));
1829     assert("32767".parseFieldRange!(ushort, Yes.convertToZeroBasedIndex).equal([32766]));
1830 }
1831 
1832 /** [Yes|No.newlineWasRemoved] is a template parameter to throwIfWindowsNewlineOnUnix.
1833  *  A Yes value indicates the Unix newline was already removed, as might be done via
1834  *  std.File.byLine or similar mechanism.
1835  */
1836 alias NewlineWasRemoved = Flag!"newlineWasRemoved";
1837 
1838 /**
1839 throwIfWindowsLineNewlineOnUnix is used to throw an exception if a Windows/DOS
1840 line ending is found on a build compiled for a Unix platform. This is used by
1841 the TSV Utilities to detect Window/DOS line endings and terminate processing
1842 with an error message to the user.
1843  */
1844 void throwIfWindowsNewlineOnUnix
1845     (NewlineWasRemoved nlWasRemoved = Yes.newlineWasRemoved)
1846     (const char[] line, const char[] filename, size_t lineNum)
1847 {
1848     version(Posix)
1849     {
1850         static if (nlWasRemoved)
1851         {
1852             immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r';
1853         }
1854         else
1855         {
1856             immutable bool hasWindowsLineEnding =
1857                 line.length > 1 &&
1858                 line[$ - 2] == '\r' &&
1859                 line[$ - 1] == '\n';
1860         }
1861 
1862         if (hasWindowsLineEnding)
1863         {
1864             import std.format;
1865             throw new Exception(
1866                 format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n  File: %s, Line: %s",
1867                        (filename == "-") ? "Standard Input" : filename, lineNum));
1868         }
1869     }
1870 }
1871 
1872 unittest
1873 {
1874     /* Note: Currently only building on Posix. Need to add non-Posix test cases
1875      * if Windows builds are ever done.
1876      */
1877     version(Posix)
1878     {
1879         import std.exception;
1880 
1881         assertNotThrown(throwIfWindowsNewlineOnUnix("", "afile.tsv", 1));
1882         assertNotThrown(throwIfWindowsNewlineOnUnix("a", "afile.tsv", 2));
1883         assertNotThrown(throwIfWindowsNewlineOnUnix("ab", "afile.tsv", 3));
1884         assertNotThrown(throwIfWindowsNewlineOnUnix("abc", "afile.tsv", 4));
1885 
1886         assertThrown(throwIfWindowsNewlineOnUnix("\r", "afile.tsv", 1));
1887         assertThrown(throwIfWindowsNewlineOnUnix("a\r", "afile.tsv", 2));
1888         assertThrown(throwIfWindowsNewlineOnUnix("ab\r", "afile.tsv", 3));
1889         assertThrown(throwIfWindowsNewlineOnUnix("abc\r", "afile.tsv", 4));
1890 
1891         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\n", "afile.tsv", 1));
1892         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\n", "afile.tsv", 2));
1893         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3));
1894         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4));
1895 
1896         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5));
1897         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6));
1898         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7));
1899         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8));
1900 
1901         /* Standard Input formatting. */
1902         import std.algorithm : endsWith;
1903         bool exceptionCaught = false;
1904 
1905         try (throwIfWindowsNewlineOnUnix("\r", "-", 99));
1906         catch (Exception e)
1907         {
1908             assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1909             exceptionCaught = true;
1910         }
1911         finally
1912         {
1913             assert(exceptionCaught);
1914             exceptionCaught = false;
1915         }
1916 
1917         try (throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "-", 99));
1918         catch (Exception e)
1919         {
1920             assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1921             exceptionCaught = true;
1922         }
1923         finally
1924         {
1925             assert(exceptionCaught);
1926             exceptionCaught = false;
1927         }
1928     }
1929 }