tsv_utils.common.utils source code

1 /**
2 Utilities used by tsv-utils applications. InputFieldReordering, BufferedOututRange,
3 and a several others.
4 
5 Utilities in this file:
6 $(LIST
7     * [InputFieldReordering] - A class that creates a reordered subset of fields from
8       an input line. Fields in the subset are accessed by array indicies. This is
9       especially useful when processing the subset in a specific order, such as the
10       order listed on the command-line at run-time.
11 
12     * [BufferedOutputRange] - An OutputRange with an internal buffer used to buffer
13       output. Intended for use with stdout, it is a significant performance benefit.
14 
15     * [bufferedByLine] - An input range that reads from a File handle line by line.
16       It is similar to the standard library method std.stdio.File.byLine, but quite a
17       bit faster. This is achieved by reading in larger blocks and buffering.
18 
19     * [joinAppend] - A function that performs a join, but appending the join output to
20       an output stream. It is a performance improvement over using join or joiner with
21       writeln.
22 
23     * [getTsvFieldValue] - A convenience function when only a single value is needed from
24       an input line.
25 
26     * Field-lists: [parseFieldList], [makeFieldListOptionHandler] - Helper functions for
27       parsing field-lists entered on the command line.
28 
29     * [throwIfWindowsNewlineOnUnix] - A utility for Unix platform builds to detecting
30       Windows newlines in input.
31 )
32 
33 Copyright (c) 2015-2020, eBay Inc.
34 Initially written by Jon Degenhardt
35 
36 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt)
37 */
38 
39 module tsv_utils.common.utils;
40 
41 import std.range;
42 import std.traits : isIntegral, isSomeChar, isSomeString, isUnsigned;
43 import std.typecons : Flag, No, Yes;
44 
45 // InputFieldReording class.
46 
47 /** Flag used by the InputFieldReordering template. */
48 alias EnablePartialLines = Flag!"enablePartialLines";
49 
50 /**
51 InputFieldReordering - Move select fields from an input line to an output array,
52 reordering along the way.
53 
54 The InputFieldReordering class is used to reorder a subset of fields from an input line.
55 The caller instantiates an InputFieldReordering object at the start of input processing.
56 The instance contains a mapping from input index to output index, plus a buffer holding
57 the reordered fields. The caller processes each input line by calling initNewLine,
58 splitting the line into fields, and calling processNextField on each field. The output
59 buffer is ready when the allFieldsFilled method returns true.
60 
61 Fields are not copied, instead the output buffer points to the fields passed by the caller.
62 The caller needs to use or copy the output buffer while the fields are still valid, which
63 is normally until reading the next input line. The program below illustrates the basic use
64 case. It reads stdin and outputs fields [3, 0, 2], in that order. (See also joinAppend,
65 below, which has a performance improvement over join used here.)
66 
67 ---
68 int main(string[] args)
69 {
70     import tsv_utils.common.utils;
71     import std.algorithm, std.array, std.range, std.stdio;
72     size_t[] fieldIndicies = [3, 0, 2];
73     auto fieldReordering = new InputFieldReordering!char(fieldIndicies);
74     foreach (line; stdin.byLine)
75     {
76         fieldReordering.initNewLine;
77         foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate)
78         {
79             fieldReordering.processNextField(fieldIndex, fieldValue);
80             if (fieldReordering.allFieldsFilled) break;
81         }
82         if (fieldReordering.allFieldsFilled)
83         {
84             writeln(fieldReordering.outputFields.join('\t'));
85         }
86         else
87         {
88             writeln("Error: Insufficient number of field on the line.");
89         }
90     }
91     return 0;
92 }
93 ---
94 
95 Field indicies are zero-based. An individual field can be listed multiple times. The
96 outputFields array is not valid until all the specified fields have been processed. The
97 allFieldsFilled method tests this. If a line does not have enough fields the outputFields
98 buffer cannot be used. For most TSV applications this is okay, as it means the line is
99 invalid and cannot be used. However, if partial lines are okay, the template can be
100 instantiated with EnablePartialLines.yes. This will ensure that any fields not filled-in
101 are empty strings in the outputFields return.
102 */
103 final class InputFieldReordering(C, EnablePartialLines partialLinesOk = EnablePartialLines.no)
104 if (isSomeChar!C)
105 {
106     /* Implementation: The class works by creating an array of tuples mapping the input
107      * field index to the location in the outputFields array. The 'fromToMap' array is
108      * sorted in input field order, enabling placement in the outputFields buffer during a
109      * pass over the input fields. The map is created by the constructor. An example:
110      *
111      *    inputFieldIndicies: [3, 0, 7, 7, 1, 0, 9]
112      *             fromToMap: [<0,1>, <0,5>, <1,4>, <3,0>, <7,2>, <7,3>, <9,6>]
113      *
114      * During processing of an a line, an array slice, mapStack, is used to track how
115      * much of the fromToMap remains to be processed.
116      */
117     import std.range;
118     import std.typecons : Tuple;
119 
120     alias TupleFromTo = Tuple!(size_t, "from", size_t, "to");
121 
122     private C[][] outputFieldsBuf;
123     private TupleFromTo[] fromToMap;
124     private TupleFromTo[] mapStack;
125 
126     final this(const ref size_t[] inputFieldIndicies, size_t start = 0) pure nothrow @safe
127     {
128         import std.algorithm : sort;
129 
130         outputFieldsBuf = new C[][](inputFieldIndicies.length);
131         fromToMap.reserve(inputFieldIndicies.length);
132 
133         foreach (to, from; inputFieldIndicies.enumerate(start))
134         {
135             fromToMap ~= TupleFromTo(from, to);
136         }
137 
138         sort(fromToMap);
139         initNewLine;
140     }
141 
142     /** initNewLine initializes the object for a new line. */
143     final void initNewLine() pure nothrow @safe
144     {
145         mapStack = fromToMap;
146         static if (partialLinesOk)
147         {
148             import std.algorithm : each;
149             outputFieldsBuf.each!((ref s) => s.length = 0);
150         }
151     }
152 
153     /** processNextField maps an input field to the correct locations in the outputFields
154      * array. It should be called once for each field on the line, in the order found.
155      */
156     final size_t processNextField(size_t fieldIndex, C[] fieldValue) pure nothrow @safe @nogc
157     {
158         size_t numFilled = 0;
159         while (!mapStack.empty && fieldIndex == mapStack.front.from)
160         {
161             outputFieldsBuf[mapStack.front.to] = fieldValue;
162             mapStack.popFront;
163             numFilled++;
164         }
165         return numFilled;
166     }
167 
168     /** allFieldsFilled returned true if all fields expected have been processed. */
169     final bool allFieldsFilled() const pure nothrow @safe @nogc
170     {
171         return mapStack.empty;
172     }
173 
174     /** outputFields is the assembled output fields. Unless partial lines are enabled,
175      * it is only valid after allFieldsFilled is true.
176      */
177     final C[][] outputFields() pure nothrow @safe @nogc
178     {
179         return outputFieldsBuf[];
180     }
181 }
182 
183 /* Tests using different character types. */
184 @safe unittest
185 {
186     import std.conv : to;
187 
188     auto inputLines = [["r1f0", "r1f1", "r1f2",   "r1f3"],
189                        ["r2f0", "abc",  "ÀBCßßZ", "ghi"],
190                        ["r3f0", "123",  "456",    "789"]];
191 
192     size_t[] fields_2_0 = [2, 0];
193 
194     auto expected_2_0 = [["r1f2",   "r1f0"],
195                          ["ÀBCßßZ", "r2f0"],
196                          ["456",    "r3f0"]];
197 
198     char[][][]  charExpected_2_0 = to!(char[][][])(expected_2_0);
199     wchar[][][] wcharExpected_2_0 = to!(wchar[][][])(expected_2_0);
200     dchar[][][] dcharExpected_2_0 = to!(dchar[][][])(expected_2_0);
201     dstring[][] dstringExpected_2_0 = to!(dstring[][])(expected_2_0);
202 
203     auto charIFR  = new InputFieldReordering!char(fields_2_0);
204     auto wcharIFR = new InputFieldReordering!wchar(fields_2_0);
205     auto dcharIFR = new InputFieldReordering!dchar(fields_2_0);
206 
207     foreach (lineIndex, line; inputLines)
208     {
209         charIFR.initNewLine;
210         wcharIFR.initNewLine;
211         dcharIFR.initNewLine;
212 
213         foreach (fieldIndex, fieldValue; line)
214         {
215             charIFR.processNextField(fieldIndex, to!(char[])(fieldValue));
216             wcharIFR.processNextField(fieldIndex, to!(wchar[])(fieldValue));
217             dcharIFR.processNextField(fieldIndex, to!(dchar[])(fieldValue));
218 
219             assert ((fieldIndex >= 2) == charIFR.allFieldsFilled);
220             assert ((fieldIndex >= 2) == wcharIFR.allFieldsFilled);
221             assert ((fieldIndex >= 2) == dcharIFR.allFieldsFilled);
222         }
223         assert(charIFR.allFieldsFilled);
224         assert(wcharIFR.allFieldsFilled);
225         assert(dcharIFR.allFieldsFilled);
226 
227         assert(charIFR.outputFields == charExpected_2_0[lineIndex]);
228         assert(wcharIFR.outputFields == wcharExpected_2_0[lineIndex]);
229         assert(dcharIFR.outputFields == dcharExpected_2_0[lineIndex]);
230     }
231 }
232 
233 /* Test of partial line support. */
234 @safe unittest
235 {
236     import std.conv : to;
237 
238     auto inputLines = [["r1f0", "r1f1", "r1f2",   "r1f3"],
239                        ["r2f0", "abc",  "ÀBCßßZ", "ghi"],
240                        ["r3f0", "123",  "456",    "789"]];
241 
242     size_t[] fields_2_0 = [2, 0];
243 
244     // The expected states of the output field while each line and field are processed.
245     auto expectedBylineByfield_2_0 =
246         [
247             [["", "r1f0"], ["", "r1f0"], ["r1f2", "r1f0"],   ["r1f2", "r1f0"]],
248             [["", "r2f0"], ["", "r2f0"], ["ÀBCßßZ", "r2f0"], ["ÀBCßßZ", "r2f0"]],
249             [["", "r3f0"], ["", "r3f0"], ["456", "r3f0"],    ["456", "r3f0"]],
250         ];
251 
252     char[][][][]  charExpectedBylineByfield_2_0 = to!(char[][][][])(expectedBylineByfield_2_0);
253 
254     auto charIFR  = new InputFieldReordering!(char, EnablePartialLines.yes)(fields_2_0);
255 
256     foreach (lineIndex, line; inputLines)
257     {
258         charIFR.initNewLine;
259         foreach (fieldIndex, fieldValue; line)
260         {
261             charIFR.processNextField(fieldIndex, to!(char[])(fieldValue));
262             assert(charIFR.outputFields == charExpectedBylineByfield_2_0[lineIndex][fieldIndex]);
263         }
264     }
265 }
266 
267 /* Field combination tests. */
268 @safe unittest
269 {
270     import std.conv : to;
271     import std.stdio;
272 
273     auto inputLines = [["00", "01", "02", "03"],
274                        ["10", "11", "12", "13"],
275                        ["20", "21", "22", "23"]];
276 
277     size_t[] fields_0 = [0];
278     size_t[] fields_3 = [3];
279     size_t[] fields_01 = [0, 1];
280     size_t[] fields_10 = [1, 0];
281     size_t[] fields_03 = [0, 3];
282     size_t[] fields_30 = [3, 0];
283     size_t[] fields_0123 = [0, 1, 2, 3];
284     size_t[] fields_3210 = [3, 2, 1, 0];
285     size_t[] fields_03001 = [0, 3, 0, 0, 1];
286 
287     auto expected_0 = to!(char[][][])([["00"],
288                                        ["10"],
289                                        ["20"]]);
290 
291     auto expected_3 = to!(char[][][])([["03"],
292                                        ["13"],
293                                        ["23"]]);
294 
295     auto expected_01 = to!(char[][][])([["00", "01"],
296                                         ["10", "11"],
297                                         ["20", "21"]]);
298 
299     auto expected_10 = to!(char[][][])([["01", "00"],
300                                         ["11", "10"],
301                                         ["21", "20"]]);
302 
303     auto expected_03 = to!(char[][][])([["00", "03"],
304                                         ["10", "13"],
305                                         ["20", "23"]]);
306 
307     auto expected_30 = to!(char[][][])([["03", "00"],
308                                         ["13", "10"],
309                                         ["23", "20"]]);
310 
311     auto expected_0123 = to!(char[][][])([["00", "01", "02", "03"],
312                                           ["10", "11", "12", "13"],
313                                           ["20", "21", "22", "23"]]);
314 
315     auto expected_3210 = to!(char[][][])([["03", "02", "01", "00"],
316                                           ["13", "12", "11", "10"],
317                                           ["23", "22", "21", "20"]]);
318 
319     auto expected_03001 = to!(char[][][])([["00", "03", "00", "00", "01"],
320                                            ["10", "13", "10", "10", "11"],
321                                            ["20", "23", "20", "20", "21"]]);
322 
323     auto ifr_0 = new InputFieldReordering!char(fields_0);
324     auto ifr_3 = new InputFieldReordering!char(fields_3);
325     auto ifr_01 = new InputFieldReordering!char(fields_01);
326     auto ifr_10 = new InputFieldReordering!char(fields_10);
327     auto ifr_03 = new InputFieldReordering!char(fields_03);
328     auto ifr_30 = new InputFieldReordering!char(fields_30);
329     auto ifr_0123 = new InputFieldReordering!char(fields_0123);
330     auto ifr_3210 = new InputFieldReordering!char(fields_3210);
331     auto ifr_03001 = new InputFieldReordering!char(fields_03001);
332 
333     foreach (lineIndex, line; inputLines)
334     {
335         ifr_0.initNewLine;
336         ifr_3.initNewLine;
337         ifr_01.initNewLine;
338         ifr_10.initNewLine;
339         ifr_03.initNewLine;
340         ifr_30.initNewLine;
341         ifr_0123.initNewLine;
342         ifr_3210.initNewLine;
343         ifr_03001.initNewLine;
344 
345         foreach (fieldIndex, fieldValue; line)
346         {
347             ifr_0.processNextField(fieldIndex, to!(char[])(fieldValue));
348             ifr_3.processNextField(fieldIndex, to!(char[])(fieldValue));
349             ifr_01.processNextField(fieldIndex, to!(char[])(fieldValue));
350             ifr_10.processNextField(fieldIndex, to!(char[])(fieldValue));
351             ifr_03.processNextField(fieldIndex, to!(char[])(fieldValue));
352             ifr_30.processNextField(fieldIndex, to!(char[])(fieldValue));
353             ifr_0123.processNextField(fieldIndex, to!(char[])(fieldValue));
354             ifr_3210.processNextField(fieldIndex, to!(char[])(fieldValue));
355             ifr_03001.processNextField(fieldIndex, to!(char[])(fieldValue));
356         }
357 
358         assert(ifr_0.outputFields == expected_0[lineIndex]);
359         assert(ifr_3.outputFields == expected_3[lineIndex]);
360         assert(ifr_01.outputFields == expected_01[lineIndex]);
361         assert(ifr_10.outputFields == expected_10[lineIndex]);
362         assert(ifr_03.outputFields == expected_03[lineIndex]);
363         assert(ifr_30.outputFields == expected_30[lineIndex]);
364         assert(ifr_0123.outputFields == expected_0123[lineIndex]);
365         assert(ifr_3210.outputFields == expected_3210[lineIndex]);
366         assert(ifr_03001.outputFields == expected_03001[lineIndex]);
367     }
368 }
369 
370 
371 import std.stdio : File, isFileHandle, KeepTerminator;
372 import std.range : isOutputRange;
373 import std.traits : Unqual;
374 
375 /**
376 BufferedOutputRange is a performance enhancement over writing directly to an output
377 stream. It holds a File open for write or an OutputRange. Ouput is accumulated in an
378 internal buffer and written to the output stream as a block.
379 
380 Writing to stdout is a key use case. BufferedOutputRange is often dramatically faster
381 than writing to stdout directly. This is especially noticable for outputs with short
382 lines, as it blocks many writes together in a single write.
383 
384 The internal buffer is written to the output stream after flushSize has been reached.
385 This is checked at newline boundaries, when appendln is called or when put is called
386 with a single newline character. Other writes check maxSize, which is used to avoid
387 runaway buffers.
388 
389 
390 BufferedOutputRange has a put method allowing it to be used a range. It has a number
391 of other methods providing additional control.
392 
393 $(LIST
394     * `this(outputStream [, flushSize, reserveSize, maxSize])` - Constructor. Takes the
395       output stream, e.g. stdout. Other arguments are optional, defaults normally suffice.
396 
397     * `append(stuff)` - Append to the internal buffer.
398 
399     * `appendln(stuff)` - Append to the internal buffer, followed by a newline. The buffer
400       is flushed to the output stream if is has reached flushSize.
401 
402     * `appendln()` - Append a newline to the internal buffer. The buffer is flushed to the
403       output stream if is has reached flushSize.
404 
405     * `joinAppend(inputRange, delim)` - An optimization of `append(inputRange.joiner(delim))`.
406       For reasons that are not clear, joiner is quite slow.
407 
408     * `flushIfFull()` - Flush the internal buffer to the output stream if flushSize has been
409       reached.
410 
411     * `flush()` - Write the internal buffer to the output stream.
412 
413     * `put(stuff)` - Appends to the internal buffer. Acts as `appendln()` if passed a single
414       newline character, '\n' or "\n".
415 )
416 
417 The internal buffer is automatically flushed when the BufferedOutputRange goes out of
418 scope.
419 */
420 struct BufferedOutputRange(OutputTarget)
421 if (isFileHandle!(Unqual!OutputTarget) || isOutputRange!(Unqual!OutputTarget, char))
422 {
423     import std.range : isOutputRange;
424     import std.array : appender;
425     import std.format : format;
426 
427     /* Identify the output element type. Only supporting char and ubyte for now. */
428     static if (isFileHandle!OutputTarget || isOutputRange!(OutputTarget, char))
429     {
430         alias C = char;
431     }
432     else static if (isOutputRange!(OutputTarget, ubyte))
433     {
434         alias C = ubyte;
435     }
436     else static assert(false);
437 
438     private enum defaultReserveSize = 11264;
439     private enum defaultFlushSize = 10240;
440     private enum defaultMaxSize = 4194304;
441 
442     private OutputTarget _outputTarget;
443     private auto _outputBuffer = appender!(C[]);
444     private immutable size_t _flushSize;
445     private immutable size_t _maxSize;
446 
447     this(OutputTarget outputTarget,
448          size_t flushSize = defaultFlushSize,
449          size_t reserveSize = defaultReserveSize,
450          size_t maxSize = defaultMaxSize)
451     @safe
452     {
453         assert(flushSize <= maxSize);
454 
455         _outputTarget = outputTarget;
456         _flushSize = flushSize;
457         _maxSize = (flushSize <= maxSize) ? maxSize : flushSize;
458         _outputBuffer.reserve(reserveSize);
459     }
460 
461     ~this() @safe
462     {
463         flush();
464     }
465 
466     void flush() @safe
467     {
468         static if (isFileHandle!OutputTarget) _outputTarget.write(_outputBuffer.data);
469         else _outputTarget.put(_outputBuffer.data);
470 
471         _outputBuffer.clear;
472     }
473 
474     bool flushIfFull() @safe
475     {
476         bool isFull = _outputBuffer.data.length >= _flushSize;
477         if (isFull) flush();
478         return isFull;
479     }
480 
481     /* flushIfMaxSize is a safety check to avoid runaway buffer growth. */
482     void flushIfMaxSize() @safe
483     {
484         if (_outputBuffer.data.length >= _maxSize) flush();
485     }
486 
487     /* maybeFlush is intended for the case where put is called with a trailing newline.
488      *
489      * Flushing occurs if the buffer has a trailing newline and has reached flush size.
490      * Flushing also occurs if the buffer has reached max size.
491      */
492     private bool maybeFlush() @safe
493     {
494         immutable bool doFlush =
495             _outputBuffer.data.length >= _flushSize &&
496             (_outputBuffer.data[$-1] == '\n' || _outputBuffer.data.length >= _maxSize);
497 
498         if (doFlush) flush();
499         return doFlush;
500     }
501 
502 
503     private void appendRaw(T)(T stuff) pure @safe
504     {
505         import std.range : rangePut = put;
506         rangePut(_outputBuffer, stuff);
507     }
508 
509     void append(T)(T stuff) @safe
510     {
511         appendRaw(stuff);
512         maybeFlush();
513     }
514 
515     bool appendln() @safe
516     {
517         appendRaw('\n');
518         return flushIfFull();
519     }
520 
521     bool appendln(T)(T stuff)
522     {
523         appendRaw(stuff);
524         return appendln();
525     }
526 
527     /* joinAppend is an optimization of append(inputRange.joiner(delimiter).
528      * This form is quite a bit faster, 40%+ on some benchmarks.
529      */
530     void joinAppend(InputRange, E)(InputRange inputRange, E delimiter)
531     if (isInputRange!InputRange &&
532         is(ElementType!InputRange : const C[]) &&
533         (is(E : const C[]) || is(E : const C)))
534     {
535         if (!inputRange.empty)
536         {
537             appendRaw(inputRange.front);
538             inputRange.popFront;
539         }
540         foreach (x; inputRange)
541         {
542             appendRaw(delimiter);
543             appendRaw(x);
544         }
545         flushIfMaxSize();
546     }
547 
548     /* Make this an output range. */
549     void put(T)(T stuff)
550     {
551         import std.traits;
552         import std.stdio;
553 
554         static if (isSomeChar!T)
555         {
556             if (stuff == '\n') appendln();
557             else appendRaw(stuff);
558         }
559         else static if (isSomeString!T)
560         {
561             if (stuff == "\n") appendln();
562             else append(stuff);
563         }
564         else append(stuff);
565     }
566 }
567 
568 unittest
569 {
570     import tsv_utils.common.unittest_utils;
571     import std.file : rmdirRecurse, readText;
572     import std.path : buildPath;
573 
574     auto testDir = makeUnittestTempDir("tsv_utils_buffered_output");
575     scope(exit) testDir.rmdirRecurse;
576 
577     import std.algorithm : map, joiner;
578     import std.range : iota;
579     import std.conv : to;
580 
581     /* Basic test. Note that exiting the scope triggers flush. */
582     string filepath1 = buildPath(testDir, "file1.txt");
583     {
584         import std.stdio : File;
585 
586         auto ostream = BufferedOutputRange!File(filepath1.File("w"));
587         ostream.append("file1: ");
588         ostream.append("abc");
589         ostream.append(["def", "ghi", "jkl"]);
590         ostream.appendln(100.to!string);
591         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
592         ostream.appendln();
593     }
594     assert(filepath1.readText == "file1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
595 
596     /* Test with no reserve and no flush at every line. */
597     string filepath2 = buildPath(testDir, "file2.txt");
598     {
599         import std.stdio : File;
600 
601         auto ostream = BufferedOutputRange!File(filepath2.File("w"), 0, 0);
602         ostream.append("file2: ");
603         ostream.append("abc");
604         ostream.append(["def", "ghi", "jkl"]);
605         ostream.appendln("100");
606         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
607         ostream.appendln();
608     }
609     assert(filepath2.readText == "file2: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
610 
611     /* With a locking text writer. Requires version 2.078.0
612        See: https://issues.dlang.org/show_bug.cgi?id=9661
613      */
614     static if (__VERSION__ >= 2078)
615     {
616         string filepath3 = buildPath(testDir, "file3.txt");
617         {
618             import std.stdio : File;
619 
620             auto ltw = filepath3.File("w").lockingTextWriter;
621             {
622                 auto ostream = BufferedOutputRange!(typeof(ltw))(ltw);
623                 ostream.append("file3: ");
624                 ostream.append("abc");
625                 ostream.append(["def", "ghi", "jkl"]);
626                 ostream.appendln("100");
627                 ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
628                 ostream.appendln();
629             }
630         }
631         assert(filepath3.readText == "file3: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
632     }
633 
634     /* With an Appender. */
635     import std.array : appender;
636     auto app1 = appender!(char[]);
637     {
638         auto ostream = BufferedOutputRange!(typeof(app1))(app1);
639         ostream.append("appender1: ");
640         ostream.append("abc");
641         ostream.append(["def", "ghi", "jkl"]);
642         ostream.appendln("100");
643         ostream.append(iota(0, 10).map!(x => x.to!string).joiner(" "));
644         ostream.appendln();
645     }
646     assert(app1.data == "appender1: abcdefghijkl100\n0 1 2 3 4 5 6 7 8 9\n");
647 
648     /* With an Appender, but checking flush boundaries. */
649     auto app2 = appender!(char[]);
650     {
651         auto ostream = BufferedOutputRange!(typeof(app2))(app2, 10, 0); // Flush if 10+
652         bool wasFlushed = false;
653 
654         assert(app2.data == "");
655 
656         ostream.append("12345678"); // Not flushed yet.
657         assert(app2.data == "");
658 
659         wasFlushed = ostream.appendln;  // Nineth char, not flushed yet.
660         assert(!wasFlushed);
661         assert(app2.data == "");
662 
663         wasFlushed = ostream.appendln;  // Tenth char, now flushed.
664         assert(wasFlushed);
665         assert(app2.data == "12345678\n\n");
666 
667         app2.clear;
668         assert(app2.data == "");
669 
670         ostream.append("12345678");
671 
672         wasFlushed = ostream.flushIfFull;
673         assert(!wasFlushed);
674         assert(app2.data == "");
675 
676         ostream.flush;
677         assert(app2.data == "12345678");
678 
679         app2.clear;
680         assert(app2.data == "");
681 
682         ostream.append("123456789012345");
683         assert(app2.data == "");
684     }
685     assert(app2.data == "123456789012345");
686 
687     /* Using joinAppend. */
688     auto app1b = appender!(char[]);
689     {
690         auto ostream = BufferedOutputRange!(typeof(app1b))(app1b);
691         ostream.append("appenderB: ");
692         ostream.joinAppend(["a", "bc", "def"], '-');
693         ostream.append(':');
694         ostream.joinAppend(["g", "hi", "jkl"], '-');
695         ostream.appendln("*100*");
696         ostream.joinAppend(iota(0, 6).map!(x => x.to!string), ' ');
697         ostream.append(' ');
698         ostream.joinAppend(iota(6, 10).map!(x => x.to!string), " ");
699         ostream.appendln();
700     }
701     assert(app1b.data == "appenderB: a-bc-def:g-hi-jkl*100*\n0 1 2 3 4 5 6 7 8 9\n",
702            "app1b.data: |" ~app1b.data ~ "|");
703 
704     /* Operating as an output range. When passed to a function as a ref, exiting
705      * the function does not flush. When passed as a value, it get flushed when
706      * the function returns. Also test both UCFS and non-UFCS styles.
707      */
708 
709     void outputStuffAsRef(T)(ref T range)
710     if (isOutputRange!(T, char))
711     {
712         range.put('1');
713         put(range, "23");
714         range.put('\n');
715         range.put(["5", "67"]);
716         put(range, iota(8, 10).map!(x => x.to!string));
717         put(range, "\n");
718     }
719 
720     void outputStuffAsVal(T)(T range)
721     if (isOutputRange!(T, char))
722     {
723         put(range, '1');
724         range.put("23");
725         put(range, '\n');
726         put(range, ["5", "67"]);
727         range.put(iota(8, 10).map!(x => x.to!string));
728         range.put("\n");
729     }
730 
731     auto app3 = appender!(char[]);
732     {
733         auto ostream = BufferedOutputRange!(typeof(app3))(app3, 12, 0);
734         outputStuffAsRef(ostream);
735         assert(app3.data == "", "app3.data: |" ~app3.data ~ "|");
736         outputStuffAsRef(ostream);
737         assert(app3.data == "123\n56789\n123\n", "app3.data: |" ~app3.data ~ "|");
738     }
739     assert(app3.data == "123\n56789\n123\n56789\n", "app3.data: |" ~app3.data ~ "|");
740 
741     auto app4 = appender!(char[]);
742     {
743         auto ostream = BufferedOutputRange!(typeof(app4))(app4, 12, 0);
744         outputStuffAsVal(ostream);
745         assert(app4.data == "123\n56789\n", "app4.data: |" ~app4.data ~ "|");
746         outputStuffAsVal(ostream);
747         assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|");
748     }
749     assert(app4.data == "123\n56789\n123\n56789\n", "app4.data: |" ~app4.data ~ "|");
750 
751     /* Test maxSize. */
752     auto app5 = appender!(char[]);
753     {
754         auto ostream = BufferedOutputRange!(typeof(app5))(app5, 5, 0, 10); // maxSize 10
755         assert(app5.data == "");
756 
757         ostream.append("1234567");  // Not flushed yet (no newline).
758         assert(app5.data == "");
759 
760         ostream.append("89012");    // Flushed by maxSize
761         assert(app5.data == "123456789012");
762 
763         ostream.put("1234567");     // Not flushed yet (no newline).
764         assert(app5.data == "123456789012");
765 
766         ostream.put("89012");       // Flushed by maxSize
767         assert(app5.data == "123456789012123456789012");
768 
769         ostream.joinAppend(["ab", "cd"], '-');        // Not flushed yet
770         ostream.joinAppend(["de", "gh", "ij"], '-');  // Flushed by maxSize
771         assert(app5.data == "123456789012123456789012ab-cdde-gh-ij");
772     }
773     assert(app5.data == "123456789012123456789012ab-cdde-gh-ij");
774 }
775 
776 /**
777 bufferedByLine is a performance enhancement over std.stdio.File.byLine. It works by
778 reading a large buffer from the input stream rather than just a single line.
779 
780 The file argument needs to be a File object open for reading, typically a filesystem
781 file or standard input. Use the Yes.keepTerminator template parameter to keep the
782 newline. This is similar to stdio.File.byLine, except specified as a template paramter
783 rather than a runtime parameter.
784 
785 Reading in blocks does mean that input is not read until a full buffer is available or
786 end-of-file is reached. For this reason, bufferedByLine is not appropriate for
787 interactive input.
788 */
789 
790 auto bufferedByLine(KeepTerminator keepTerminator = No.keepTerminator, Char = char,
791                     ubyte terminator = '\n', size_t readSize = 1024 * 128, size_t growSize = 1024 * 16)
792     (File file)
793 if (is(Char == char) || is(Char == ubyte))
794 {
795     static assert(0 < growSize && growSize <= readSize);
796 
797     static final class BufferedByLineImpl
798     {
799         /* Buffer state variables
800          *   - _buffer.length - Full length of allocated buffer.
801          *   - _dataEnd - End of currently valid data (end of last read).
802          *   - _lineStart - Start of current line.
803          *   - _lineEnd - End of current line.
804          */
805         private File _file;
806         private ubyte[] _buffer;
807         private size_t _lineStart = 0;
808         private size_t _lineEnd = 0;
809         private size_t _dataEnd = 0;
810 
811         this (File f) @safe
812         {
813             _file = f;
814             _buffer = new ubyte[readSize + growSize];
815         }
816 
817         bool empty() const pure @safe
818         {
819             return _file.eof && _lineStart == _dataEnd;
820         }
821 
822         Char[] front()  pure @safe
823         {
824             assert(!empty, "Attempt to take the front of an empty bufferedByLine.");
825 
826             static if (keepTerminator == Yes.keepTerminator)
827             {
828                 return cast(Char[]) _buffer[_lineStart .. _lineEnd];
829             }
830             else
831             {
832                 assert(_lineStart < _lineEnd);
833                 immutable end = (_buffer[_lineEnd - 1] == terminator) ? _lineEnd - 1 : _lineEnd;
834                 return cast(Char[]) _buffer[_lineStart .. end];
835             }
836         }
837 
838         /* Note: Call popFront at initialization to do the initial read. */
839         void popFront() @safe
840         {
841             import std.algorithm: copy, find;
842             assert(!empty, "Attempt to popFront an empty bufferedByLine.");
843 
844             /* Pop the current line. */
845             _lineStart = _lineEnd;
846 
847             /* Set up the next line if more data is available, either in the buffer or
848              * the file. The next line ends at the next newline, if there is one.
849              *
850              * Notes:
851              * - 'find' returns the slice starting with the character searched for, or
852              *   an empty range if not found.
853              * - _lineEnd is set to _dataEnd both when the current buffer does not have
854              *   a newline and when it ends with one.
855              */
856             auto found = _buffer[_lineStart .. _dataEnd].find(terminator);
857             _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1;
858 
859             if (found.empty && !_file.eof)
860             {
861                 /* No newline in current buffer. Read from the file until the next
862                  * newline is found.
863                  */
864                 assert(_lineEnd == _dataEnd);
865 
866                 if (_lineStart > 0)
867                 {
868                     /* Move remaining data to the start of the buffer. */
869                     immutable remainingLength = _dataEnd - _lineStart;
870                     copy(_buffer[_lineStart .. _dataEnd], _buffer[0 .. remainingLength]);
871                     _lineStart = 0;
872                     _lineEnd = _dataEnd = remainingLength;
873                 }
874 
875                 do
876                 {
877                     /* Grow the buffer if necessary. */
878                     immutable availableSize = _buffer.length - _dataEnd;
879                     if (availableSize < readSize)
880                     {
881                         size_t growBy = growSize;
882                         while (availableSize + growBy < readSize) growBy += growSize;
883                         _buffer.length += growBy;
884                     }
885 
886                     /* Read the next block. */
887                     _dataEnd +=
888                         _file.rawRead(_buffer[_dataEnd .. _dataEnd + readSize])
889                         .length;
890 
891                     found = _buffer[_lineEnd .. _dataEnd].find(terminator);
892                     _lineEnd = found.empty ? _dataEnd : _dataEnd - found.length + 1;
893 
894                 } while (found.empty && !_file.eof);
895             }
896         }
897     }
898 
899     assert(file.isOpen, "bufferedByLine passed a closed file.");
900 
901     auto r = new BufferedByLineImpl(file);
902     r.popFront;
903     return r;
904 }
905 
906 unittest
907 {
908     import std.array : appender;
909     import std.conv : to;
910     import std.file : rmdirRecurse, readText;
911     import std.path : buildPath;
912     import std.range : lockstep;
913     import std.stdio;
914     import tsv_utils.common.unittest_utils;
915 
916     auto testDir = makeUnittestTempDir("tsv_utils_buffered_byline");
917     scope(exit) testDir.rmdirRecurse;
918 
919     /* Create two data files with the same data. Read both in parallel with byLine and
920      * bufferedByLine and compare each line.
921      */
922     auto data1 = appender!(char[])();
923 
924     foreach (i; 1 .. 1001) data1.put('\n');
925     foreach (i; 1 .. 1001) data1.put("a\n");
926     foreach (i; 1 .. 1001) { data1.put(i.to!string); data1.put('\n'); }
927     foreach (i; 1 .. 1001)
928     {
929         foreach (j; 1 .. i+1) data1.put('x');
930         data1.put('\n');
931     }
932 
933     string file1a = buildPath(testDir, "file1a.txt");
934     string file1b = buildPath(testDir, "file1b.txt");
935     {
936 
937         file1a.File("w").write(data1.data);
938         file1b.File("w").write(data1.data);
939     }
940 
941     /* Default parameters. */
942     {
943         auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator);
944         auto f1bIn = file1b.File().byLine(No.keepTerminator);
945         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
946     }
947     {
948         auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator);
949         auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
950         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
951     }
952 
953     /* Smaller read size. This will trigger buffer growth. */
954     {
955         auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', 512, 256);
956         auto f1bIn = file1b.File().byLine(No.keepTerminator);
957         foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
958     }
959 
960     /* Exercise boundary cases in buffer growth.
961      * Note: static-foreach requires DMD 2.076 / LDC 1.6
962      */
963     static foreach (readSize; [1, 2, 4])
964     {
965         static foreach (growSize; 1 .. readSize + 1)
966         {{
967             auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
968             auto f1bIn = file1b.File().byLine(No.keepTerminator);
969             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
970         }}
971         static foreach (growSize; 1 .. readSize + 1)
972         {{
973             auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
974             auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
975             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
976         }}
977     }
978 
979 
980     /* Files that do not end in a newline. */
981 
982     string file2a = buildPath(testDir, "file2a.txt");
983     string file2b = buildPath(testDir, "file2b.txt");
984     string file3a = buildPath(testDir, "file3a.txt");
985     string file3b = buildPath(testDir, "file3b.txt");
986     string file4a = buildPath(testDir, "file4a.txt");
987     string file4b = buildPath(testDir, "file4b.txt");
988     {
989         file1a.File("w").write("a");
990         file1b.File("w").write("a");
991         file2a.File("w").write("ab");
992         file2b.File("w").write("ab");
993         file3a.File("w").write("abc");
994         file3b.File("w").write("abc");
995     }
996 
997     static foreach (readSize; [1, 2, 4])
998     {
999         static foreach (growSize; 1 .. readSize + 1)
1000         {{
1001             auto f1aIn = file1a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1002             auto f1bIn = file1b.File().byLine(No.keepTerminator);
1003             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1004 
1005             auto f2aIn = file2a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1006             auto f2bIn = file2b.File().byLine(No.keepTerminator);
1007             foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1008 
1009             auto f3aIn = file3a.File().bufferedByLine!(No.keepTerminator, char, '\n', readSize, growSize);
1010             auto f3bIn = file3b.File().byLine(No.keepTerminator);
1011             foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1012         }}
1013         static foreach (growSize; 1 .. readSize + 1)
1014         {{
1015             auto f1aIn = file1a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1016             auto f1bIn = file1b.File().byLine(Yes.keepTerminator);
1017             foreach (a, b; lockstep(f1aIn, f1bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1018 
1019             auto f2aIn = file2a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1020             auto f2bIn = file2b.File().byLine(Yes.keepTerminator);
1021             foreach (a, b; lockstep(f2aIn, f2bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1022 
1023             auto f3aIn = file3a.File().bufferedByLine!(Yes.keepTerminator, char, '\n', readSize, growSize);
1024             auto f3bIn = file3b.File().byLine(Yes.keepTerminator);
1025             foreach (a, b; lockstep(f3aIn, f3bIn, StoppingPolicy.requireSameLength)) assert(a == b);
1026         }}
1027     }
1028 }
1029 
1030 /**
1031 joinAppend performs a join operation on an input range, appending the results to
1032 an output range.
1033 
1034 joinAppend was written as a performance enhancement over using std.algorithm.joiner
1035 or std.array.join with writeln. Using joiner with writeln is quite slow, 3-4x slower
1036 than std.array.join with writeln. The joiner performance may be due to interaction
1037 with writeln, this was not investigated. Using joiner with stdout.lockingTextWriter
1038 is better, but still substantially slower than join. Using join works reasonably well,
1039 but is allocating memory unnecessarily.
1040 
1041 Using joinAppend with Appender is a bit faster than join, and allocates less memory.
1042 The Appender re-uses the underlying data buffer, saving memory. The example below
1043 illustrates. It is a modification of the InputFieldReordering example. The role
1044 Appender plus joinAppend are playing is to buffer the output. BufferedOutputRange
1045 uses a similar technique to buffer multiple lines.
1046 
1047 Note: The original uses joinAppend have been replaced by BufferedOutputRange, which has
1048 its own joinAppend method. However, joinAppend remains useful when constructing internal
1049 buffers where BufferedOutputRange is not appropriate.
1050 
1051 ---
1052 int main(string[] args)
1053 {
1054     import tsvutil;
1055     import std.algorithm, std.array, std.range, std.stdio;
1056     size_t[] fieldIndicies = [3, 0, 2];
1057     auto fieldReordering = new InputFieldReordering!char(fieldIndicies);
1058     auto outputBuffer = appender!(char[]);
1059     foreach (line; stdin.byLine)
1060     {
1061         fieldReordering.initNewLine;
1062         foreach(fieldIndex, fieldValue; line.splitter('\t').enumerate)
1063         {
1064             fieldReordering.processNextField(fieldIndex, fieldValue);
1065             if (fieldReordering.allFieldsFilled) break;
1066         }
1067         if (fieldReordering.allFieldsFilled)
1068         {
1069             outputBuffer.clear;
1070             writeln(fieldReordering.outputFields.joinAppend(outputBuffer, ('\t')));
1071         }
1072         else
1073         {
1074             writeln("Error: Insufficient number of field on the line.");
1075         }
1076     }
1077     return 0;
1078 }
1079 ---
1080 */
1081 OutputRange joinAppend(InputRange, OutputRange, E)
1082     (InputRange inputRange, ref OutputRange outputRange, E delimiter)
1083 if (isInputRange!InputRange &&
1084     (is(ElementType!InputRange : const E[]) &&
1085      isOutputRange!(OutputRange, E[]))
1086      ||
1087     (is(ElementType!InputRange : const E) &&
1088      isOutputRange!(OutputRange, E))
1089     )
1090 {
1091     if (!inputRange.empty)
1092     {
1093         outputRange.put(inputRange.front);
1094         inputRange.popFront;
1095     }
1096     foreach (x; inputRange)
1097     {
1098         outputRange.put(delimiter);
1099         outputRange.put(x);
1100     }
1101     return outputRange;
1102 }
1103 
1104 @safe unittest
1105 {
1106     import std.array : appender;
1107     import std.algorithm : equal;
1108 
1109     char[] c1 = ['a', 'b', 'c'];
1110     char[] c2 = ['d', 'e', 'f'];
1111     char[] c3 = ['g', 'h', 'i'];
1112     auto cvec = [c1, c2, c3];
1113 
1114     auto s1 = "abc";
1115     auto s2 = "def";
1116     auto s3 = "ghi";
1117     auto svec = [s1, s2, s3];
1118 
1119     auto charAppender = appender!(char[])();
1120 
1121     assert(cvec.joinAppend(charAppender, '_').data == "abc_def_ghi");
1122     assert(equal(cvec, [c1, c2, c3]));
1123 
1124     charAppender.put('$');
1125     assert(svec.joinAppend(charAppender, '|').data == "abc_def_ghi$abc|def|ghi");
1126     assert(equal(cvec, [s1, s2, s3]));
1127 
1128     charAppender.clear;
1129     assert(svec.joinAppend(charAppender, '|').data == "abc|def|ghi");
1130 
1131     auto intAppender = appender!(int[])();
1132 
1133     auto i1 = [100, 101, 102];
1134     auto i2 = [200, 201, 202];
1135     auto i3 = [300, 301, 302];
1136     auto ivec = [i1, i2, i3];
1137 
1138     assert(ivec.joinAppend(intAppender, 0).data ==
1139            [100, 101, 102, 0, 200, 201, 202, 0, 300, 301, 302]);
1140 
1141     intAppender.clear;
1142     assert(i1.joinAppend(intAppender, 0).data ==
1143            [100, 0, 101, 0, 102]);
1144     assert(i2.joinAppend(intAppender, 1).data ==
1145            [100, 0, 101, 0, 102,
1146             200, 1, 201, 1, 202]);
1147     assert(i3.joinAppend(intAppender, 2).data ==
1148            [100, 0, 101, 0, 102,
1149             200, 1, 201, 1, 202,
1150             300, 2, 301, 2, 302]);
1151 }
1152 
1153 /**
1154 getTsvFieldValue extracts the value of a single field from a delimited text string.
1155 
1156 This is a convenience function intended for cases when only a single field from an
1157 input line is needed. If multiple values are needed, it will be more efficient to
1158 work directly with std.algorithm.splitter or the InputFieldReordering class.
1159 
1160 The input text is split by a delimiter character. The specified field is converted
1161 to the desired type and the value returned.
1162 
1163 An exception is thrown if there are not enough fields on the line or if conversion
1164 fails. Conversion is done with std.conv.to, it throws a std.conv.ConvException on
1165 failure. If not enough fields, the exception text is generated referencing 1-upped
1166 field numbers as would be provided by command line users.
1167  */
1168 T getTsvFieldValue(T, C)(const C[] line, size_t fieldIndex, C delim)
1169 if (isSomeChar!C)
1170 {
1171     import std.algorithm : splitter;
1172     import std.conv : to;
1173     import std.format : format;
1174     import std.range;
1175 
1176     auto splitLine = line.splitter(delim);
1177     size_t atField = 0;
1178 
1179     while (atField < fieldIndex && !splitLine.empty)
1180     {
1181         splitLine.popFront;
1182         atField++;
1183     }
1184 
1185     T val;
1186     if (splitLine.empty)
1187     {
1188         if (fieldIndex == 0)
1189         {
1190             /* This is a workaround to a splitter special case - If the input is empty,
1191              * the returned split range is empty. This doesn't properly represent a single
1192              * column file. More correct mathematically, and for this case, would be a
1193              * single value representing an empty string. The input line is a convenient
1194              * source of an empty line. Info:
1195              *   Bug: https://issues.dlang.org/show_bug.cgi?id=15735
1196              *   Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030
1197              */
1198             assert(line.empty);
1199             val = line.to!T;
1200         }
1201         else
1202         {
1203             throw new Exception(
1204                 format("Not enough fields on line. Number required: %d; Number found: %d",
1205                        fieldIndex + 1, atField));
1206         }
1207     }
1208     else
1209     {
1210         val = splitLine.front.to!T;
1211     }
1212 
1213     return val;
1214 }
1215 
1216 @safe unittest
1217 {
1218     import std.conv : ConvException, to;
1219     import std.exception;
1220 
1221     /* Common cases. */
1222     assert(getTsvFieldValue!double("123", 0, '\t') == 123.0);
1223     assert(getTsvFieldValue!double("-10.5", 0, '\t') == -10.5);
1224     assert(getTsvFieldValue!size_t("abc|123", 1, '|') == 123);
1225     assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99);
1226     assert(getTsvFieldValue!int("紅\t红\t99", 2, '\t') == 99);
1227     assert(getTsvFieldValue!string("紅\t红\t99", 2, '\t') == "99");
1228     assert(getTsvFieldValue!string("紅\t红\t99", 1, '\t') == "红");
1229     assert(getTsvFieldValue!string("紅\t红\t99", 0, '\t') == "紅");
1230     assert(getTsvFieldValue!string("红色和绿色\tred and green\t赤と緑\t10.5", 2, '\t') == "赤と緑");
1231     assert(getTsvFieldValue!double("红色和绿色\tred and green\t赤と緑\t10.5", 3, '\t') == 10.5);
1232 
1233     /* The empty field cases. */
1234     assert(getTsvFieldValue!string("", 0, '\t') == "");
1235     assert(getTsvFieldValue!string("\t", 0, '\t') == "");
1236     assert(getTsvFieldValue!string("\t", 1, '\t') == "");
1237     assert(getTsvFieldValue!string("", 0, ':') == "");
1238     assert(getTsvFieldValue!string(":", 0, ':') == "");
1239     assert(getTsvFieldValue!string(":", 1, ':') == "");
1240 
1241     /* Tests with different data types. */
1242     string stringLine = "orange and black\tნარინჯისფერი და შავი\t88.5";
1243     char[] charLine = "orange and black\tნარინჯისფერი და შავი\t88.5".to!(char[]);
1244     dchar[] dcharLine = stringLine.to!(dchar[]);
1245     wchar[] wcharLine = stringLine.to!(wchar[]);
1246 
1247     assert(getTsvFieldValue!string(stringLine, 0, '\t') == "orange and black");
1248     assert(getTsvFieldValue!string(stringLine, 1, '\t') == "ნარინჯისფერი და შავი");
1249     assert(getTsvFieldValue!wstring(stringLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1250     assert(getTsvFieldValue!double(stringLine, 2, '\t') == 88.5);
1251 
1252     assert(getTsvFieldValue!string(charLine, 0, '\t') == "orange and black");
1253     assert(getTsvFieldValue!string(charLine, 1, '\t') == "ნარინჯისფერი და შავი");
1254     assert(getTsvFieldValue!wstring(charLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1255     assert(getTsvFieldValue!double(charLine, 2, '\t') == 88.5);
1256 
1257     assert(getTsvFieldValue!string(dcharLine, 0, '\t') == "orange and black");
1258     assert(getTsvFieldValue!string(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი");
1259     assert(getTsvFieldValue!wstring(dcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1260     assert(getTsvFieldValue!double(dcharLine, 2, '\t') == 88.5);
1261 
1262     assert(getTsvFieldValue!string(wcharLine, 0, '\t') == "orange and black");
1263     assert(getTsvFieldValue!string(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი");
1264     assert(getTsvFieldValue!wstring(wcharLine, 1, '\t') == "ნარინჯისფერი და შავი".to!wstring);
1265     assert(getTsvFieldValue!double(wcharLine, 2, '\t') == 88.5);
1266 
1267     /* Conversion errors. */
1268     assertThrown!ConvException(getTsvFieldValue!double("", 0, '\t'));
1269     assertThrown!ConvException(getTsvFieldValue!double("abc", 0, '|'));
1270     assertThrown!ConvException(getTsvFieldValue!size_t("-1", 0, '|'));
1271     assertThrown!ConvException(getTsvFieldValue!size_t("a23|23.4", 1, '|'));
1272     assertThrown!ConvException(getTsvFieldValue!double("23.5|def", 1, '|'));
1273 
1274     /* Not enough field errors. These should throw, but not a ConvException.*/
1275     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("", 1, '\t')));
1276     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc", 1, '\t')));
1277     assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc\tdef", 2, '\t')));
1278 }
1279 
1280 /**
1281 Field-lists - A field-list is a string entered on the command line identifying one or more
1282 field numbers. They are used by the majority of the tsv utility applications. There are
1283 two helper functions, makeFieldListOptionHandler and parseFieldList. Most applications
1284 will use makeFieldListOptionHandler, it creates a delegate that can be passed to
1285 std.getopt to process the command option. Actual processing of the option text is done by
1286 parseFieldList. It can be called directly when the text of the option value contains more
1287 than just the field number.
1288 
1289 Syntax and behavior:
1290 
1291 A 'field-list' is a list of numeric field numbers entered on the command line. Fields are
1292 1-upped integers representing locations in an input line, in the traditional meaning of
1293 Unix command line tools. Fields can be entered as single numbers or a range. Multiple
1294 entries are separated by commas. Some examples (with 'fields' as the command line option):
1295 
1296    --fields 3                 // Single field
1297    --fields 4,1               // Two fields
1298    --fields 3-9               // A range, fields 3 to 9 inclusive
1299    --fields 1,2,7-34,11       // A mix of ranges and fields
1300    --fields 15-5,3-1          // Two ranges in reverse order.
1301 
1302 Incomplete ranges are not supported, for example, '6-'. Zero is disallowed as a field
1303 value by default, but can be enabled to support the notion of zero as representing the
1304 entire line. However, zero cannot be part of a range. Field numbers are one-based by
1305 default, but can be converted to zero-based. If conversion to zero-based is enabled, field
1306 number zero must be disallowed or a signed integer type specified for the returned range.
1307 
1308 An error is thrown if an invalid field specification is encountered. Error text is
1309 intended for display. Error conditions include:
1310   - Empty fields list
1311   - Empty value, e.g. Two consequtive commas, a trailing comma, or a leading comma
1312   - String that does not parse as a valid integer
1313   - Negative integers, or zero if zero is disallowed.
1314   - An incomplete range
1315   - Zero used as part of a range.
1316 
1317 No other behaviors are enforced. Repeated values are accepted. If zero is allowed, other
1318 field numbers can be entered as well. Additional restrictions need to be applied by the
1319 caller.
1320 
1321 Notes:
1322   - The data type determines the max field number that can be entered. Enabling conversion
1323     to zero restricts to the signed version of the data type.
1324   - Use 'import std.typecons : Yes, No' to use the convertToZeroBasedIndex and
1325     allowFieldNumZero template parameters.
1326 */
1327 
1328 /** [Yes|No].convertToZeroBasedIndex parameter controls whether field numbers are
1329  *  converted to zero-based indices by makeFieldListOptionHander and parseFieldList.
1330  */
1331 alias ConvertToZeroBasedIndex = Flag!"convertToZeroBasedIndex";
1332 
1333 /** [Yes|No].allowFieldNumZero parameter controls whether zero is a valid field. This is
1334  *  used by makeFieldListOptionHander and parseFieldList.
1335  */
1336 alias AllowFieldNumZero = Flag!"allowFieldNumZero";
1337 
1338 alias OptionHandlerDelegate = void delegate(string option, string value);
1339 
1340 /**
1341 makeFieldListOptionHandler creates a std.getopt option hander for processing field lists
1342 entered on the command line. A field list is as defined by parseFieldList.
1343 */
1344 OptionHandlerDelegate makeFieldListOptionHandler(
1345     T,
1346     ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex,
1347     AllowFieldNumZero allowZero = No.allowFieldNumZero)
1348     (ref T[] fieldsArray)
1349 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T))
1350 {
1351     void fieldListOptionHandler(ref T[] fieldArray, string option, string value) pure @safe
1352     {
1353         import std.algorithm : each;
1354         try value.parseFieldList!(T, convertToZero, allowZero).each!(x => fieldArray ~= x);
1355         catch (Exception exc)
1356         {
1357             import std.format : format;
1358             exc.msg = format("[--%s] %s", option, exc.msg);
1359             throw exc;
1360         }
1361     }
1362 
1363     return (option, value) => fieldListOptionHandler(fieldsArray, option, value);
1364 }
1365 
1366 unittest
1367 {
1368     import std.exception : assertThrown, assertNotThrown;
1369     import std.getopt;
1370 
1371     {
1372         size_t[] fields;
1373         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1374         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1375         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1376     }
1377     {
1378         size_t[] fields;
1379         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1380         getopt(args,
1381                "f|fields", fields.makeFieldListOptionHandler!(size_t, Yes.convertToZeroBasedIndex));
1382         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1383     }
1384     {
1385         size_t[] fields;
1386         auto args = ["program", "-f", "0"];
1387         getopt(args,
1388                "f|fields", fields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1389         assert(fields == [0]);
1390     }
1391     {
1392         size_t[] fields;
1393         auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1"];
1394         getopt(args,
1395                "f|fields", fields.makeFieldListOptionHandler!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1396         assert(fields == [0, 1, 0, 0, 1]);
1397     }
1398     {
1399         size_t[] ints;
1400         size_t[] fields;
1401         auto args = ["program", "--ints", "1,2,3", "--fields", "1", "--ints", "4,5,6", "--fields", "2,4,7-9,23-21"];
1402         std.getopt.arraySep = ",";
1403         getopt(args,
1404                "i|ints", "Built-in list of integers.", &ints,
1405                "f|fields", "Field-list style integers.", fields.makeFieldListOptionHandler);
1406         assert(ints == [1, 2, 3, 4, 5, 6]);
1407         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1408     }
1409 
1410     /* Basic cases involved unsinged types smaller than size_t. */
1411     {
1412         uint[] fields;
1413         auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1", "-f", "55-58"];
1414         getopt(args,
1415                "f|fields", fields.makeFieldListOptionHandler!(uint, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1416         assert(fields == [0, 1, 0, 0, 1, 55, 56, 57, 58]);
1417     }
1418     {
1419         ushort[] fields;
1420         auto args = ["program", "-f", "0", "-f", "1,0", "-f", "0,1", "-f", "55-58"];
1421         getopt(args,
1422                "f|fields", fields.makeFieldListOptionHandler!(ushort, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1423         assert(fields == [0, 1, 0, 0, 1, 55, 56, 57, 58]);
1424     }
1425 
1426     /* Basic cases involving unsigned types. */
1427     {
1428         long[] fields;
1429         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1430         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1431         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1432     }
1433     {
1434         long[] fields;
1435         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1436         getopt(args,
1437                "f|fields", fields.makeFieldListOptionHandler!(long, Yes.convertToZeroBasedIndex));
1438         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1439     }
1440     {
1441         long[] fields;
1442         auto args = ["program", "-f", "0"];
1443         getopt(args,
1444                "f|fields", fields.makeFieldListOptionHandler!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1445         assert(fields == [-1]);
1446     }
1447     {
1448         int[] fields;
1449         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1450         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1451         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1452     }
1453     {
1454         int[] fields;
1455         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1456         getopt(args,
1457                "f|fields", fields.makeFieldListOptionHandler!(int, Yes.convertToZeroBasedIndex));
1458         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1459     }
1460     {
1461         int[] fields;
1462         auto args = ["program", "-f", "0"];
1463         getopt(args,
1464                "f|fields", fields.makeFieldListOptionHandler!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1465         assert(fields == [-1]);
1466     }
1467     {
1468         short[] fields;
1469         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1470         getopt(args, "f|fields", fields.makeFieldListOptionHandler);
1471         assert(fields == [1, 2, 4, 7, 8, 9, 23, 22, 21]);
1472     }
1473     {
1474         short[] fields;
1475         auto args = ["program", "--fields", "1", "--fields", "2,4,7-9,23-21"];
1476         getopt(args,
1477                "f|fields", fields.makeFieldListOptionHandler!(short, Yes.convertToZeroBasedIndex));
1478         assert(fields == [0, 1, 3, 6, 7, 8, 22, 21, 20]);
1479     }
1480     {
1481         short[] fields;
1482         auto args = ["program", "-f", "0"];
1483         getopt(args,
1484                "f|fields", fields.makeFieldListOptionHandler!(short, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1485         assert(fields == [-1]);
1486     }
1487 
1488     {
1489         /* Error cases. */
1490         size_t[] fields;
1491         auto args = ["program", "-f", "0"];
1492         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1493 
1494         args = ["program", "-f", "-1"];
1495         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1496 
1497         args = ["program", "-f", "--fields", "1"];
1498         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1499 
1500         args = ["program", "-f", "a"];
1501         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1502 
1503         args = ["program", "-f", "1.5"];
1504         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1505 
1506         args = ["program", "-f", "2-"];
1507         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1508 
1509         args = ["program", "-f", "3,5,-7"];
1510         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1511 
1512         args = ["program", "-f", "3,5,"];
1513         assertThrown(getopt(args, "f|fields", fields.makeFieldListOptionHandler));
1514 
1515         args = ["program", "-f", "-1"];
1516         assertThrown(getopt(args,
1517                             "f|fields", fields.makeFieldListOptionHandler!(
1518                                 size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)));
1519     }
1520 }
1521 
1522 /**
1523 parseFieldList lazily generates a range of fields numbers from a 'field-list' string.
1524 */
1525 auto parseFieldList(T = size_t,
1526                     ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex,
1527                     AllowFieldNumZero allowZero = No.allowFieldNumZero)
1528     (string fieldList, char delim = ',')
1529 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T))
1530 {
1531     import std.algorithm : splitter;
1532 
1533     auto _splitFieldList = fieldList.splitter(delim);
1534     auto _currFieldParse =
1535         (_splitFieldList.empty ? "" : _splitFieldList.front)
1536         .parseFieldRange!(T, convertToZero, allowZero);
1537 
1538     if (!_splitFieldList.empty) _splitFieldList.popFront;
1539 
1540     struct Result
1541     {
1542         @property bool empty() pure nothrow @safe @nogc
1543         {
1544             return _currFieldParse.empty;
1545         }
1546 
1547         @property T front() pure @safe
1548         {
1549             import std.conv : to;
1550 
1551             assert(!empty, "Attempting to fetch the front of an empty field-list.");
1552             assert(!_currFieldParse.empty, "Internal error. Call to front with an empty _currFieldParse.");
1553 
1554             return _currFieldParse.front.to!T;
1555         }
1556 
1557         void popFront() pure @safe
1558         {
1559             assert(!empty, "Attempting to popFront an empty field-list.");
1560 
1561             _currFieldParse.popFront;
1562             if (_currFieldParse.empty && !_splitFieldList.empty)
1563             {
1564                 _currFieldParse = _splitFieldList.front.parseFieldRange!(T, convertToZero, allowZero);
1565                 _splitFieldList.popFront;
1566             }
1567         }
1568     }
1569 
1570     return Result();
1571 }
1572 
1573 @safe unittest
1574 {
1575     import std.algorithm : each, equal;
1576     import std.exception : assertThrown, assertNotThrown;
1577 
1578     /* Basic tests. */
1579     assert("1".parseFieldList.equal([1]));
1580     assert("1,2".parseFieldList.equal([1, 2]));
1581     assert("1,2,3".parseFieldList.equal([1, 2, 3]));
1582     assert("1-2".parseFieldList.equal([1, 2]));
1583     assert("1-2,6-4".parseFieldList.equal([1, 2, 6, 5, 4]));
1584     assert("1-2,1,1-2,2,2-1".parseFieldList.equal([1, 2, 1, 1, 2, 2, 2, 1]));
1585     assert("1-2,5".parseFieldList!size_t.equal([1, 2, 5]));
1586 
1587     /* Signed Int tests */
1588     assert("1".parseFieldList!int.equal([1]));
1589     assert("1,2,3".parseFieldList!int.equal([1, 2, 3]));
1590     assert("1-2".parseFieldList!int.equal([1, 2]));
1591     assert("1-2,6-4".parseFieldList!int.equal([1, 2, 6, 5, 4]));
1592     assert("1-2,5".parseFieldList!int.equal([1, 2, 5]));
1593 
1594     /* Convert to zero tests */
1595     assert("1".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0]));
1596     assert("1,2,3".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 2]));
1597     assert("1-2".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1]));
1598     assert("1-2,6-4".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 5, 4, 3]));
1599     assert("1-2,5".parseFieldList!(size_t, Yes.convertToZeroBasedIndex).equal([0, 1, 4]));
1600 
1601     assert("1".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0]));
1602     assert("1,2,3".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 2]));
1603     assert("1-2".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1]));
1604     assert("1-2,6-4".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 5, 4, 3]));
1605     assert("1-2,5".parseFieldList!(long, Yes.convertToZeroBasedIndex).equal([0, 1, 4]));
1606 
1607     /* Allow zero tests. */
1608     assert("0".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1609     assert("1,0,3".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 0, 3]));
1610     assert("1-2,5".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 2, 5]));
1611     assert("0".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1612     assert("1,0,3".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 0, 3]));
1613     assert("1-2,5".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([1, 2, 5]));
1614     assert("0".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1615     assert("1,0,3".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0, -1, 2]));
1616     assert("1-2,5".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0, 1, 4]));
1617 
1618     /* Error cases. */
1619     assertThrown("".parseFieldList.each);
1620     assertThrown(" ".parseFieldList.each);
1621     assertThrown(",".parseFieldList.each);
1622     assertThrown("5 6".parseFieldList.each);
1623     assertThrown(",7".parseFieldList.each);
1624     assertThrown("8,".parseFieldList.each);
1625     assertThrown("8,9,".parseFieldList.each);
1626     assertThrown("10,,11".parseFieldList.each);
1627     assertThrown("".parseFieldList!(long, Yes.convertToZeroBasedIndex).each);
1628     assertThrown("1,2-3,".parseFieldList!(long, Yes.convertToZeroBasedIndex).each);
1629     assertThrown("2-,4".parseFieldList!(long, Yes.convertToZeroBasedIndex).each);
1630     assertThrown("1,2,3,,4".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1631     assertThrown(",7".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1632     assertThrown("8,".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1633     assertThrown("10,0,,11".parseFieldList!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1634     assertThrown("8,9,".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1635 
1636     assertThrown("0".parseFieldList.each);
1637     assertThrown("1,0,3".parseFieldList.each);
1638     assertThrown("0".parseFieldList!(int, Yes.convertToZeroBasedIndex, No.allowFieldNumZero).each);
1639     assertThrown("1,0,3".parseFieldList!(int, Yes.convertToZeroBasedIndex, No.allowFieldNumZero).each);
1640     assertThrown("0-2,6-0".parseFieldList!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1641     assertThrown("0-2,6-0".parseFieldList!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1642     assertThrown("0-2,6-0".parseFieldList!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).each);
1643 }
1644 
1645 /* parseFieldRange parses a single number or number range. E.g. '5' or '5-8'. These are
1646  * the values in a field-list separated by a comma or other delimiter. It returns a range
1647  * that iterates over all the values in the range.
1648  */
1649 private auto parseFieldRange(T = size_t,
1650                              ConvertToZeroBasedIndex convertToZero = No.convertToZeroBasedIndex,
1651                              AllowFieldNumZero allowZero = No.allowFieldNumZero)
1652     (string fieldRange)
1653 if (isIntegral!T && (!allowZero || !convertToZero || !isUnsigned!T))
1654 {
1655     import std.algorithm : findSplit;
1656     import std.conv : to;
1657     import std.format : format;
1658     import std.range : iota;
1659     import std.traits : Signed;
1660 
1661     /* Pick the largest compatible integral type for the IOTA range. This must be the
1662      * signed type if convertToZero is true, as a reverse order range may end at -1.
1663      */
1664     static if (convertToZero) alias S = Signed!T;
1665     else alias S = T;
1666 
1667     if (fieldRange.length == 0) throw new Exception("Empty field number.");
1668 
1669     auto rangeSplit = findSplit(fieldRange, "-");
1670 
1671     if (!rangeSplit[1].empty && (rangeSplit[0].empty || rangeSplit[2].empty))
1672     {
1673         // Range starts or ends with a dash.
1674         throw new Exception(format("Incomplete ranges are not supported: '%s'", fieldRange));
1675     }
1676 
1677     S start = rangeSplit[0].to!S;
1678     S last = rangeSplit[1].empty ? start : rangeSplit[2].to!S;
1679     Signed!T increment = (start <= last) ? 1 : -1;
1680 
1681     static if (allowZero)
1682     {
1683         if (start == 0 && !rangeSplit[1].empty)
1684         {
1685             throw new Exception(format("Zero cannot be used as part of a range: '%s'", fieldRange));
1686         }
1687     }
1688 
1689     static if (allowZero)
1690     {
1691         if (start < 0 || last < 0)
1692         {
1693             throw new Exception(format("Field numbers must be non-negative integers: '%d'",
1694                                        (start < 0) ? start : last));
1695         }
1696     }
1697     else
1698     {
1699         if (start < 1 || last < 1)
1700         {
1701             throw new Exception(format("Field numbers must be greater than zero: '%d'",
1702                                        (start < 1) ? start : last));
1703         }
1704     }
1705 
1706     static if (convertToZero)
1707     {
1708         start--;
1709         last--;
1710     }
1711 
1712     return iota(start, last + increment, increment);
1713 }
1714 
1715 @safe unittest // parseFieldRange
1716 {
1717     import std.algorithm : equal;
1718     import std.exception : assertThrown, assertNotThrown;
1719 
1720     /* Basic cases */
1721     assert(parseFieldRange("1").equal([1]));
1722     assert("2".parseFieldRange.equal([2]));
1723     assert("3-4".parseFieldRange.equal([3, 4]));
1724     assert("3-5".parseFieldRange.equal([3, 4, 5]));
1725     assert("4-3".parseFieldRange.equal([4, 3]));
1726     assert("10-1".parseFieldRange.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1727 
1728     /* Convert to zero-based indices */
1729     assert(parseFieldRange!(size_t, Yes.convertToZeroBasedIndex)("1").equal([0]));
1730     assert("2".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([1]));
1731     assert("3-4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([2, 3]));
1732     assert("3-5".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([2, 3, 4]));
1733     assert("4-3".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([3, 2]));
1734     assert("10-1".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex).equal([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]));
1735 
1736     /* Allow zero. */
1737     assert("0".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1738     assert(parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero)("1").equal([1]));
1739     assert("3-4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([3, 4]));
1740     assert("10-1".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1741 
1742     /* Allow zero, convert to zero-based index. */
1743     assert("0".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1744     assert(parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero)("1").equal([0]));
1745     assert("3-4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([2, 3]));
1746     assert("10-1".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([9, 8, 7, 6, 5, 4, 3, 2, 1, 0]));
1747 
1748     /* Alternate integer types. */
1749     assert("2".parseFieldRange!uint.equal([2]));
1750     assert("3-5".parseFieldRange!uint.equal([3, 4, 5]));
1751     assert("10-1".parseFieldRange!uint.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1752     assert("2".parseFieldRange!int.equal([2]));
1753     assert("3-5".parseFieldRange!int.equal([3, 4, 5]));
1754     assert("10-1".parseFieldRange!int.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1755     assert("2".parseFieldRange!ushort.equal([2]));
1756     assert("3-5".parseFieldRange!ushort.equal([3, 4, 5]));
1757     assert("10-1".parseFieldRange!ushort.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1758     assert("2".parseFieldRange!short.equal([2]));
1759     assert("3-5".parseFieldRange!short.equal([3, 4, 5]));
1760     assert("10-1".parseFieldRange!short.equal([10,  9, 8, 7, 6, 5, 4, 3, 2, 1]));
1761 
1762     assert("0".parseFieldRange!(long, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1763     assert("0".parseFieldRange!(uint, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1764     assert("0".parseFieldRange!(int, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1765     assert("0".parseFieldRange!(ushort, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1766     assert("0".parseFieldRange!(short, No.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([0]));
1767     assert("0".parseFieldRange!(int, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1768     assert("0".parseFieldRange!(short, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero).equal([-1]));
1769 
1770     /* Max field value cases. */
1771     assert("65535".parseFieldRange!ushort.equal([65535]));   // ushort max
1772     assert("65533-65535".parseFieldRange!ushort.equal([65533, 65534, 65535]));
1773     assert("32767".parseFieldRange!short.equal([32767]));    // short max
1774     assert("32765-32767".parseFieldRange!short.equal([32765, 32766, 32767]));
1775     assert("32767".parseFieldRange!(short, Yes.convertToZeroBasedIndex).equal([32766]));
1776 
1777     /* Error cases. */
1778     assertThrown("".parseFieldRange);
1779     assertThrown(" ".parseFieldRange);
1780     assertThrown("-".parseFieldRange);
1781     assertThrown(" -".parseFieldRange);
1782     assertThrown("- ".parseFieldRange);
1783     assertThrown("1-".parseFieldRange);
1784     assertThrown("-2".parseFieldRange);
1785     assertThrown("-1".parseFieldRange);
1786     assertThrown("1.0".parseFieldRange);
1787     assertThrown("0".parseFieldRange);
1788     assertThrown("0-3".parseFieldRange);
1789     assertThrown("-2-4".parseFieldRange);
1790     assertThrown("2--4".parseFieldRange);
1791     assertThrown("2-".parseFieldRange);
1792     assertThrown("a".parseFieldRange);
1793     assertThrown("0x3".parseFieldRange);
1794     assertThrown("3U".parseFieldRange);
1795     assertThrown("1_000".parseFieldRange);
1796     assertThrown(".".parseFieldRange);
1797 
1798     assertThrown("".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1799     assertThrown(" ".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1800     assertThrown("-".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1801     assertThrown("1-".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1802     assertThrown("-2".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1803     assertThrown("-1".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1804     assertThrown("0".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1805     assertThrown("0-3".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1806     assertThrown("-2-4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1807     assertThrown("2--4".parseFieldRange!(size_t, Yes.convertToZeroBasedIndex));
1808 
1809     assertThrown("".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1810     assertThrown(" ".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1811     assertThrown("-".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1812     assertThrown("1-".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1813     assertThrown("-2".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1814     assertThrown("-1".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1815     assertThrown("0-3".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1816     assertThrown("-2-4".parseFieldRange!(size_t, No.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1817 
1818     assertThrown("".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1819     assertThrown(" ".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1820     assertThrown("-".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1821     assertThrown("1-".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1822     assertThrown("-2".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1823     assertThrown("-1".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1824     assertThrown("0-3".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1825     assertThrown("-2-4".parseFieldRange!(long, Yes.convertToZeroBasedIndex, Yes.allowFieldNumZero));
1826 
1827     /* Value out of range cases. */
1828     assertThrown("65536".parseFieldRange!ushort);   // One more than ushort max.
1829     assertThrown("65535-65536".parseFieldRange!ushort);
1830     assertThrown("32768".parseFieldRange!short);    // One more than short max.
1831     assertThrown("32765-32768".parseFieldRange!short);
1832     // Convert to zero limits signed range.
1833     assertThrown("32768".parseFieldRange!(ushort, Yes.convertToZeroBasedIndex));
1834     assert("32767".parseFieldRange!(ushort, Yes.convertToZeroBasedIndex).equal([32766]));
1835 }
1836 
1837 /** [Yes|No.newlineWasRemoved] is a template parameter to throwIfWindowsNewlineOnUnix.
1838  *  A Yes value indicates the Unix newline was already removed, as might be done via
1839  *  std.File.byLine or similar mechanism.
1840  */
1841 alias NewlineWasRemoved = Flag!"newlineWasRemoved";
1842 
1843 /**
1844 throwIfWindowsLineNewlineOnUnix is used to throw an exception if a Windows/DOS
1845 line ending is found on a build compiled for a Unix platform. This is used by
1846 the TSV Utilities to detect Window/DOS line endings and terminate processing
1847 with an error message to the user.
1848  */
1849 void throwIfWindowsNewlineOnUnix
1850     (NewlineWasRemoved nlWasRemoved = Yes.newlineWasRemoved)
1851     (const char[] line, const char[] filename, size_t lineNum)
1852 {
1853     version(Posix)
1854     {
1855         static if (nlWasRemoved)
1856         {
1857             immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r';
1858         }
1859         else
1860         {
1861             immutable bool hasWindowsLineEnding =
1862                 line.length > 1 &&
1863                 line[$ - 2] == '\r' &&
1864                 line[$ - 1] == '\n';
1865         }
1866 
1867         if (hasWindowsLineEnding)
1868         {
1869             import std.format;
1870             throw new Exception(
1871                 format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n  File: %s, Line: %s",
1872                        (filename == "-") ? "Standard Input" : filename, lineNum));
1873         }
1874     }
1875 }
1876 
1877 @safe unittest
1878 {
1879     /* Note: Currently only building on Posix. Need to add non-Posix test cases
1880      * if Windows builds are ever done.
1881      */
1882     version(Posix)
1883     {
1884         import std.exception;
1885 
1886         assertNotThrown(throwIfWindowsNewlineOnUnix("", "afile.tsv", 1));
1887         assertNotThrown(throwIfWindowsNewlineOnUnix("a", "afile.tsv", 2));
1888         assertNotThrown(throwIfWindowsNewlineOnUnix("ab", "afile.tsv", 3));
1889         assertNotThrown(throwIfWindowsNewlineOnUnix("abc", "afile.tsv", 4));
1890 
1891         assertThrown(throwIfWindowsNewlineOnUnix("\r", "afile.tsv", 1));
1892         assertThrown(throwIfWindowsNewlineOnUnix("a\r", "afile.tsv", 2));
1893         assertThrown(throwIfWindowsNewlineOnUnix("ab\r", "afile.tsv", 3));
1894         assertThrown(throwIfWindowsNewlineOnUnix("abc\r", "afile.tsv", 4));
1895 
1896         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\n", "afile.tsv", 1));
1897         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\n", "afile.tsv", 2));
1898         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3));
1899         assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4));
1900 
1901         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5));
1902         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6));
1903         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7));
1904         assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8));
1905 
1906         /* Standard Input formatting. */
1907         import std.algorithm : endsWith;
1908         bool exceptionCaught = false;
1909 
1910         try (throwIfWindowsNewlineOnUnix("\r", "-", 99));
1911         catch (Exception e)
1912         {
1913             assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1914             exceptionCaught = true;
1915         }
1916         finally
1917         {
1918             assert(exceptionCaught);
1919             exceptionCaught = false;
1920         }
1921 
1922         try (throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "-", 99));
1923         catch (Exception e)
1924         {
1925             assert(e.msg.endsWith("File: Standard Input, Line: 99"));
1926             exceptionCaught = true;
1927         }
1928         finally
1929         {
1930             assert(exceptionCaught);
1931             exceptionCaught = false;
1932         }
1933     }
1934 }