1 /**
2 Command line tool that filters TSV files.
3 
4 This tool filters tab-delimited files based on numeric or string comparisons
5 against specific fields. See the helpText string for details.
6 
7 Copyright (c) 2015-2020, eBay Inc.
8 Initially written by Jon Degenhardt
9 
10 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt)
11 */
12 module tsv_utils.tsv_filter;
13 
14 import std.algorithm : canFind, equal, findSplit, max, min;
15 import std.conv : to;
16 import std.format : format;
17 import std.math : abs, isFinite, isInfinity, isNaN;
18 import std.range : walkLength;
19 import std.regex;
20 import std.stdio;
21 import std.string : isNumeric;
22 import std.typecons : tuple;
23 import std.uni: asLowerCase, toLower, byGrapheme;
24 
25 /* The program has two main parts, command line arg processing and processing the input
26  * files. Much of the work is in command line arg processing. This sets up the tests run
27  * against each input line. The tests are an array of delegates (closures) run against the
28  * fields in the line. The tests are based on command line arguments, of which there is
29  * a lengthy set, one for each test.
30  */
31 
32 static if (__VERSION__ >= 2085) extern(C) __gshared string[] rt_options = [ "gcopt=cleanup:none" ];
33 
34 /** Main program. Invokes command line arg processing and tsv-filter to perform
35  * the real work. Any errors are caught and reported.
36  */
37 int main(string[] cmdArgs)
38 {
39     /* When running in DMD code coverage mode, turn on report merging. */
40     version(D_Coverage) version(DigitalMars)
41     {
42         import core.runtime : dmd_coverSetMerge;
43         dmd_coverSetMerge(true);
44     }
45 
46     TsvFilterOptions cmdopt;
47     const r = cmdopt.processArgs(cmdArgs);
48     if (!r[0]) return r[1];
49     version(LDC_Profile)
50     {
51         import ldc.profile : resetAll;
52         resetAll();
53     }
54     try tsvFilter(cmdopt, cmdArgs[1..$]);
55     catch (Exception e)
56     {
57         stderr.writefln("Error [%s]: %s", cmdopt.programName, e.msg);
58         return 1;
59     }
60     return 0;
61 }
62 
63 immutable helpText = q"EOS
64 Synopsis: tsv-filter [options] [file...]
65 
66 Filter tab-delimited files for matching lines via comparison tests against
67 individual fields. Use '--help-verbose' for a more detailed description.
68 
69 Global options:
70   --help-verbose      Print full help.
71   --help-options      Print the options list by itself.
72   --V|version         Print version information and exit.
73   --H|header          Treat the first line of each file as a header.
74   --or                Evaluate tests as an OR rather than an AND clause.
75   --v|invert          Invert the filter, printing lines that do not match.
76   --d|delimiter CHR   Field delimiter. Default: TAB.
77 
78 Operators:
79 * Test if a field is empty (no characters) or blank (empty or whitespace only).
80   Syntax:  --empty|not-empty|blank|not-blank  FIELD
81   Example: --empty 5          // True if field 5 is empty
82 
83 * Test if a field is numeric, finite, NaN, or infinity
84   Syntax:  --is-numeric|is-finite|is-nan|is-infinity FIELD
85   Example: --is-numeric 5 --gt 5:100  // Ensure field 5 is numeric before --gt test.
86 
87 * Compare a field to a number (integer or float)
88   Syntax:  --eq|ne|lt|le|gt|ge  FIELD:NUM
89   Example: --lt 5:1000 --gt 2:0.5  // True if (field 5 < 1000) and (field 2 > 0.5)
90 
91 * Compare a field to a string
92   Syntax:  --str-eq|str-ne  FIELD:STR
93   Example: --str-eq 3:abc        // True if field 3 is "abc"
94 
95 * Test if a field contains a string (substring search)
96   Syntax:  --str-in-fld|str-not-in-fld|istr-in-fld|istr-not-in-fld  FIELD:STR
97   Example: --str-in-fld 1:hello  // True if field 1 contains "hello"
98 
99 * Test if a field matches a regular expression.
100   Syntax:  --regex|iregex|not-regex|not-iregex  FIELD:REGEX
101   Example: --regex '3:ab*c'      // True if field 3 contains "ac", "abc", "abbc", etc.
102 
103 * Test a field's character or byte length
104   Syntax:  --char-len-[le|lt|ge|gt|eq|ne] FIELD:NUM
105            --byte-len-[le|lt|ge|gt|eq|ne] FIELD:NUM
106   Example: --char-len-lt 2:10    // True if field 2 is less than 10 characters long.
107            --byte-len-gt 2:10    // True if field 2 is greater than 10 bytes long.
108 
109 * Field to field comparisons - Similar to field vs literal comparisons, but field vs field.
110   Syntax:  --ff-eq|ff-ne|ff-lt|ff-le|ff-gt|ff-ge  FIELD1:FIELD2
111            --ff-str-eq|ff-str-ne|ff-istr-eq|ff-istr-ne  FIELD1:FIELD2
112   Example: --ff-eq 2:4           // True if fields 2 and 4 are numerically equivalent
113            --ff-str-eq 2:4       // True if fields 2 and 4 are the same strings
114 
115 * Field to field difference comparisons - Absolute and relative difference
116   Syntax:  --ff-absdiff-le|ff-absdiff-gt FIELD1:FIELD2:NUM
117            --ff-reldiff-le|ff-reldiff-gt FIELD1:FIELD2:NUM
118   Example: --ff-absdiff-lt 1:3:0.25   // True if abs(field1 - field2) < 0.25
119 
120 EOS";
121 
122 immutable helpTextVerbose = q"EOS
123 Synopsis: tsv-filter [options] [file...]
124 
125 Filter lines of tab-delimited files via comparison tests against fields. Multiple
126 tests can be specified, by default they are evaluated as AND clause. Lines
127 satisfying the tests are written to standard output.
128 
129 Typical test syntax is '--op field:value', where 'op' is an operator, 'field' is a
130 1-based field index, and 'value' is the comparison basis. For example, '--lt 3:500'
131 tests if field 3 is less than 500. A more complete example:
132 
133   tsv-filter --header --gt 1:50 --lt 1:100 --le 2:1000 data.tsv
134 
135 This outputs all lines from file data.tsv where field 1 is greater than 50 and less
136 than 100, and field 2 is less than or equal to 1000. The header is also output.
137 
138 Field lists can be used to specify multiple fields at once. For example:
139 
140   tsv-filter --not-blank 1-10 --str-ne 1,2,5:'--' data.tsv
141 
142 tests that fields 1-10 are not blank and fields 1,2,5 are not "--".
143 
144 Tests available include:
145   * Test if a field is empty (no characters) or blank (empty or whitespace only).
146   * Test if a field is interpretable as a number, a finite number, NaN, or Infinity.
147   * Compare a field to a number - Numeric equality and relational tests.
148   * Compare a field to a string - String equality and relational tests.
149   * Test if a field matches a regular expression. Case sensitive or insensitive.
150   * Test if a field contains a string. Sub-string search, case sensitive or insensitive.
151   * Test a field's character or byte length.
152   * Field to field comparisons - Similar to the other tests, except comparing
153     one field to another in the same line.
154 
155 Details:
156   * The run is aborted if there are not enough fields in an input line.
157   * Numeric tests will fail and abort the run if a field cannot be interpreted as a
158     number. This includes fields with no text. To avoid this use '--is-numeric' or
159     '--is-finite' prior to the numeric test. For example, '--is-numeric 5 --gt 5:100'
160     ensures field 5 is numeric before running the --gt test.
161   * Regular expression syntax is defined by the D programming language. They follow
162     common conventions (perl, python, etc.). Most common forms work as expected.
163 
164 Options:
165 EOS";
166 
167 immutable helpTextOptions = q"EOS
168 Synopsis: tsv-filter [options] [file...]
169 
170 Options:
171 EOS";
172 
173 /* The next blocks of code define the structure of the boolean tests run against input lines.
174  * This includes function and delegate (closure) signatures, creation mechanisms, option
175  * handlers, etc. Command line arg processing to build the test structure.
176 */
177 
178 /* FieldsPredicate delegate signature - Each input line is run against a set of boolean
179  * tests. Each test is a 'FieldsPredicate'. A FieldsPredicate is a delegate (closure)
180  * containing all info about the test except the field values of the line being tested.
181  * These delegates are created as part of command line arg processing. The wrapped data
182  * includes operation, field indexes, literal values, etc. At run-time the delegate is
183  * passed one argument, the split input line.
184  */
185 alias FieldsPredicate = bool delegate(const char[][] fields);
186 
187 /* FieldsPredicate function signatures - These aliases represent the different function
188  * signatures used in FieldsPredicate delegates. Each alias has a corresponding 'make'
189  * function. The 'make' function takes a real predicate function and closure args and
190  * returns a FieldsPredicate delegate. Predicates types are:
191  *
192  * - FieldUnaryPredicate - Test based on a single field. (e.g. --empty 4)
193  * - FieldVsNumberPredicate - Test based on a field index (used to get the field value)
194  *   and a fixed numeric value. For example, field 2 less than 100 (--lt 2:100).
195  * - FieldVsStringPredicate - Test based on a field and a string. (e.g. --str-eq 2:abc)
196  * - FieldVsIStringPredicate - Case-insensitive test based on a field and a string.
197  *   (e.g. --istr-eq 2:abc)
198  * - FieldVsRegexPredicate - Test based on a field and a regex. (e.g. --regex '2:ab*c')
199  * - FieldVsFieldPredicate - Test based on two fields. (e.g. --ff-le 2:4).
200  *
201  * An actual FieldsPredicate takes the fields from the line and the closure args and
202  * runs the test. For example, a function testing if a field is less than a specific
203  * value would pull the specified field from the fields array, convert the string to
204  * a number, then run the less-than test.
205  */
206 alias FieldUnaryPredicate    = bool function(const char[][] fields, size_t index);
207 alias FieldVsNumberPredicate = bool function(const char[][] fields, size_t index, double value);
208 alias FieldVsStringPredicate = bool function(const char[][] fields, size_t index, string value);
209 alias FieldVsIStringPredicate = bool function(const char[][] fields, size_t index, dstring value);
210 alias FieldVsRegexPredicate  = bool function(const char[][] fields, size_t index, Regex!char value);
211 alias FieldVsFieldPredicate  = bool function(const char[][] fields, size_t index1, size_t index2);
212 alias FieldFieldNumPredicate  = bool function(const char[][] fields, size_t index1, size_t index2, double value);
213 
214 FieldsPredicate makeFieldUnaryDelegate(FieldUnaryPredicate fn, size_t index)
215 {
216     return fields => fn(fields, index);
217 }
218 
219 FieldsPredicate makeFieldVsNumberDelegate(FieldVsNumberPredicate fn, size_t index, double value)
220 {
221     return fields => fn(fields, index, value);
222 }
223 
224 FieldsPredicate makeFieldVsStringDelegate(FieldVsStringPredicate fn, size_t index, string value)
225 {
226     return fields => fn(fields, index, value);
227 }
228 
229 FieldsPredicate makeFieldVsIStringDelegate(FieldVsIStringPredicate fn, size_t index, dstring value)
230 {
231     return fields => fn(fields, index, value);
232 }
233 
234 FieldsPredicate makeFieldVsRegexDelegate(FieldVsRegexPredicate fn, size_t index, Regex!char value)
235 {
236     return fields => fn(fields, index, value);
237 }
238 
239 FieldsPredicate makeFieldVsFieldDelegate(FieldVsFieldPredicate fn, size_t index1, size_t index2)
240 {
241     return fields => fn(fields, index1, index2);
242 }
243 
244 FieldsPredicate makeFieldFieldNumDelegate(FieldFieldNumPredicate fn, size_t index1, size_t index2, double value)
245 {
246     return fields => fn(fields, index1, index2, value);
247 }
248 
249 /* Predicate functions - These are the actual functions used in a FieldsPredicate. They
250  * are a direct reflection of the operators available via command line args. Each matches
251  * one of the FieldsPredicate function aliases defined above.
252  */
253 bool fldEmpty(const char[][] fields, size_t index) { return fields[index].length == 0; }
254 bool fldNotEmpty(const char[][] fields, size_t index) { return fields[index].length != 0; }
255 bool fldBlank(const char[][] fields, size_t index) { return cast(bool) fields[index].matchFirst(ctRegex!`^\s*$`); }
256 bool fldNotBlank(const char[][] fields, size_t index) { return !fields[index].matchFirst(ctRegex!`^\s*$`); }
257 
258 bool fldIsNumeric(const char[][] fields, size_t index) { return fields[index].isNumeric; }
259 bool fldIsFinite(const char[][] fields, size_t index) { return fields[index].isNumeric && fields[index].to!double.isFinite; }
260 bool fldIsNaN(const char[][] fields, size_t index) { return fields[index].isNumeric && fields[index].to!double.isNaN; }
261 bool fldIsInfinity(const char[][] fields, size_t index) { return fields[index].isNumeric && fields[index].to!double.isInfinity; }
262 
263 bool numLE(const char[][] fields, size_t index, double val) { return fields[index].to!double <= val; }
264 bool numLT(const char[][] fields, size_t index, double val) { return fields[index].to!double  < val; }
265 bool numGE(const char[][] fields, size_t index, double val) { return fields[index].to!double >= val; }
266 bool numGT(const char[][] fields, size_t index, double val) { return fields[index].to!double  > val; }
267 bool numEQ(const char[][] fields, size_t index, double val) { return fields[index].to!double == val; }
268 bool numNE(const char[][] fields, size_t index, double val) { return fields[index].to!double != val; }
269 
270 bool strLE(const char[][] fields, size_t index, string val) { return fields[index] <= val; }
271 bool strLT(const char[][] fields, size_t index, string val) { return fields[index]  < val; }
272 bool strGE(const char[][] fields, size_t index, string val) { return fields[index] >= val; }
273 bool strGT(const char[][] fields, size_t index, string val) { return fields[index]  > val; }
274 bool strEQ(const char[][] fields, size_t index, string val) { return fields[index] == val; }
275 bool strNE(const char[][] fields, size_t index, string val) { return fields[index] != val; }
276 bool strInFld(const char[][] fields, size_t index, string val) { return fields[index].canFind(val); }
277 bool strNotInFld(const char[][] fields, size_t index, string val) { return !fields[index].canFind(val); }
278 
279 /* Note: For istr predicates, the command line value has been lower-cased by fieldVsIStringOptionHander.
280  */
281 bool istrEQ(const char[][] fields, size_t index, dstring val) { return fields[index].asLowerCase.equal(val); }
282 bool istrNE(const char[][] fields, size_t index, dstring val) { return !fields[index].asLowerCase.equal(val); }
283 bool istrInFld(const char[][] fields, size_t index, dstring val) { return fields[index].asLowerCase.canFind(val); }
284 bool istrNotInFld(const char[][] fields, size_t index, dstring val) { return !fields[index].asLowerCase.canFind(val); }
285 
286 /* Note: Case-sensitivity is built into the regex value, so these regex predicates are
287  * used for both case-sensitive and case-insensitive regex operators.
288  */
289 bool regexMatch(const char[][] fields, size_t index, Regex!char val) { return cast(bool) fields[index].matchFirst(val); }
290 bool regexNotMatch(const char[][] fields, size_t index, Regex!char val) { return !fields[index].matchFirst(val); }
291 
292 bool charLenLE(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength <= val; }
293 bool charLenLT(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength < val; }
294 bool charLenGE(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength >= val; }
295 bool charLenGT(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength > val; }
296 bool charLenEQ(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength == val; }
297 bool charLenNE(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength != val; }
298 
299 bool byteLenLE(const char[][] fields, size_t index, double val) { return fields[index].length <= val; }
300 bool byteLenLT(const char[][] fields, size_t index, double val) { return fields[index].length < val; }
301 bool byteLenGE(const char[][] fields, size_t index, double val) { return fields[index].length >= val; }
302 bool byteLenGT(const char[][] fields, size_t index, double val) { return fields[index].length > val; }
303 bool byteLenEQ(const char[][] fields, size_t index, double val) { return fields[index].length == val; }
304 bool byteLenNE(const char[][] fields, size_t index, double val) { return fields[index].length != val; }
305 
306 bool ffLE(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double <= fields[index2].to!double; }
307 bool ffLT(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double  < fields[index2].to!double; }
308 bool ffGE(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double >= fields[index2].to!double; }
309 bool ffGT(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double  > fields[index2].to!double; }
310 bool ffEQ(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double == fields[index2].to!double; }
311 bool ffNE(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double != fields[index2].to!double; }
312 bool ffStrEQ(const char[][] fields, size_t index1, size_t index2) { return fields[index1] == fields[index2]; }
313 bool ffStrNE(const char[][] fields, size_t index1, size_t index2) { return fields[index1] != fields[index2]; }
314 bool ffIStrEQ(const char[][] fields, size_t index1, size_t index2)
315 {
316     return equal(fields[index1].asLowerCase, fields[index2].asLowerCase);
317 }
318 bool ffIStrNE(const char[][] fields, size_t index1, size_t index2)
319 {
320     return !equal(fields[index1].asLowerCase, fields[index2].asLowerCase);
321 }
322 
323 auto AbsDiff(double v1, double v2) { return (v1 - v2).abs; }
324 auto RelDiff(double v1, double v2) { return (v1 - v2).abs / min(v1.abs, v2.abs); }
325 
326 bool ffAbsDiffLE(const char[][] fields, size_t index1, size_t index2, double value)
327 {
328     return AbsDiff(fields[index1].to!double, fields[index2].to!double) <= value;
329 }
330 bool ffAbsDiffGT(const char[][] fields, size_t index1, size_t index2, double value)
331 {
332     return AbsDiff(fields[index1].to!double, fields[index2].to!double) > value;
333 }
334 bool ffRelDiffLE(const char[][] fields, size_t index1, size_t index2, double value)
335 {
336     return RelDiff(fields[index1].to!double, fields[index2].to!double) <= value;
337 }
338 bool ffRelDiffGT(const char[][] fields, size_t index1, size_t index2, double value)
339 {
340     return RelDiff(fields[index1].to!double, fields[index2].to!double) > value;
341 }
342 
343 /* Command line option handlers - There is a command line option handler for each
344  * predicate type. That is, one each for FieldUnaryPredicate, FieldVsNumberPredicate,
345  * etc. Option handlers are passed the tests array, the predicate function, and the
346  * command line option arguments. A FieldsPredicate delegate is created and appended to
347  * the tests array. An exception is thrown if errors are detected while processing the
348  * option, the error text is intended for the end user.
349  *
350  * These option handlers have similar functionality, differing in option processing and
351  * error message generation. fieldVsNumberOptionHandler is described as an example. It
352  * handles command options such as '--lt 3:1000', which tests field 3 for a values less
353  * than 1000. It is passed the tests array, the 'numLE' function to use for the test, and
354  * the string "3:1000" representing the option value. It parses the option value into
355  * field index (unsigned int) and value (double). These are wrapped in a FieldsPredicate
356  * which is added to the tests array. An error is signaled if the option string is invalid.
357  *
358  * During processing, fields indexes are converted from one-based to zero-based. As an
359  * optimization, the maximum field index is also tracked. This allows early termination of
360  * line splitting.
361  */
362 void fieldUnaryOptionHandler(
363     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, FieldUnaryPredicate fn, string option, string optionVal)
364 {
365     import std.range : enumerate;
366     import std.typecons : Yes, No;
367     import tsv_utils.common.utils :  parseFieldList;
368 
369     try foreach (fieldNum, fieldIndex;
370                  optionVal.parseFieldList!(size_t, Yes.convertToZeroBasedIndex).enumerate(1))
371         {
372             tests ~= makeFieldUnaryDelegate(fn, fieldIndex);
373             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
374         }
375     catch (Exception e)
376     {
377          import std.format : format;
378          e.msg = format("[--%s %s]. %s\n   Expected: '--%s <field>' or '--%s <field-list>'.",
379                         option, optionVal, e.msg, option, option);
380          throw e;
381     }
382 }
383 
384 void fieldVsNumberOptionHandler(
385     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, FieldVsNumberPredicate fn, string option, string optionVal)
386 {
387     import std.range : enumerate;
388     import std.typecons : Yes, No;
389     import tsv_utils.common.utils :  parseFieldList;
390 
391     auto formatErrorMsg(string option, string optionVal, string errorMessage="")
392     {
393         import std.format;
394 
395         string optionalSpace = (errorMessage.length == 0) ? "" : " ";
396         return format(
397             "Invalid option: '--%s %s'.%s%s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val> where <val> is a number.",
398             option, optionVal, optionalSpace, errorMessage, option, option);
399     }
400 
401     immutable valSplit = findSplit(optionVal, ":");
402 
403     if (valSplit[1].length == 0 || valSplit[2].length == 0)
404     {
405         throw new Exception(formatErrorMsg(option, optionVal));
406     }
407 
408     double value;
409     try value = valSplit[2].to!double;
410     catch (Exception e)
411     {
412         throw new Exception(formatErrorMsg(option, optionVal, e.msg));
413     }
414 
415     try foreach (fieldNum, fieldIndex;
416                  valSplit[0].parseFieldList!(size_t, Yes.convertToZeroBasedIndex).enumerate(1))
417         {
418             tests ~= makeFieldVsNumberDelegate(fn, fieldIndex, value);
419             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
420         }
421     catch (Exception e)
422     {
423         import std.format : format;
424         e.msg = format(
425             "[--%s %s]. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val> where <val> is a number.",
426             option, optionVal, e.msg, option, option);
427         throw e;
428     }
429 }
430 
431 void fieldVsStringOptionHandler(
432     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, FieldVsStringPredicate fn, string option, string optionVal)
433 {
434     import std.range : enumerate;
435     import std.typecons : Yes, No;
436     import tsv_utils.common.utils :  parseFieldList;
437 
438     immutable valSplit = findSplit(optionVal, ":");
439     if (valSplit[1].length == 0 || valSplit[2].length == 0)
440     {
441         throw new Exception(
442             format("Invalid option: '--%s %s'.\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a string.",
443                    option, optionVal, option, option));
444     }
445 
446     string value = valSplit[2].to!string;
447 
448     try foreach (fieldNum, fieldIndex;
449                  valSplit[0].parseFieldList!(size_t, Yes.convertToZeroBasedIndex).enumerate(1))
450         {
451             tests ~= makeFieldVsStringDelegate(fn, fieldIndex, value);
452             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
453         }
454     catch (Exception e)
455     {
456         import std.format : format;
457         e.msg = format(
458             "[--%s %s]. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a string.",
459             option, optionVal, e.msg, option, option);
460         throw e;
461     }
462 }
463 
464 /* The fieldVsIStringOptionHandler lower-cases the command line argument, assuming the
465  * case-insensitive comparison will be done on lower-cased values.
466  */
467 void fieldVsIStringOptionHandler(
468     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, FieldVsIStringPredicate fn, string option, string optionVal)
469 {
470     import std.range : enumerate;
471     import std.typecons : Yes, No;
472     import tsv_utils.common.utils :  parseFieldList;
473 
474     immutable valSplit = findSplit(optionVal, ":");
475     if (valSplit[1].length == 0 || valSplit[2].length == 0)
476     {
477         throw new Exception(
478             format("Invalid option: '--%s %s'.\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a string.",
479                    option, optionVal, option, option));
480     }
481 
482     string value = valSplit[2].to!string;
483 
484     try foreach (fieldNum, fieldIndex;
485                  valSplit[0].parseFieldList!(size_t, Yes.convertToZeroBasedIndex).enumerate(1))
486         {
487             tests ~= makeFieldVsIStringDelegate(fn, fieldIndex, value.to!dstring.toLower);
488             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
489         }
490     catch (Exception e)
491     {
492         import std.format : format;
493         e.msg = format(
494             "[--%s %s]. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a string.",
495             option, optionVal, e.msg, option, option);
496         throw e;
497     }
498 }
499 
500 void fieldVsRegexOptionHandler(
501     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, FieldVsRegexPredicate fn, string option, string optionVal,
502     bool caseSensitive)
503 {
504     import std.range : enumerate;
505     import std.typecons : Yes, No;
506     import tsv_utils.common.utils :  parseFieldList;
507 
508     immutable valSplit = findSplit(optionVal, ":");
509     if (valSplit[1].length == 0 || valSplit[2].length == 0)
510     {
511         throw new Exception(
512             format("Invalid option: '--%s %s'.\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a regular expression.",
513                    option, optionVal, option, option));
514     }
515 
516     Regex!char value;
517     try
518     {
519         immutable modifiers = caseSensitive ? "" : "i";
520         value = regex(valSplit[2], modifiers);
521     }
522     catch (Exception e)
523     {
524         throw new Exception(
525             format("Invalid regular expression: '--%s %s'. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a regular expression.",
526                    option, optionVal, e.msg, option, option));
527     }
528 
529     try foreach (fieldNum, fieldIndex;
530                  valSplit[0].parseFieldList!(size_t, Yes.convertToZeroBasedIndex).enumerate(1))
531         {
532             tests ~= makeFieldVsRegexDelegate(fn, fieldIndex, value);
533             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
534         }
535     catch (Exception e)
536     {
537         import std.format : format;
538         e.msg = format(
539             "[--%s %s]. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a regular expression.",
540             option, optionVal, e.msg, option, option);
541         throw e;
542     }
543 }
544 
545 void fieldVsFieldOptionHandler(
546     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, FieldVsFieldPredicate fn, string option, string optionVal)
547 {
548     immutable valSplit = findSplit(optionVal, ":");
549     if (valSplit[1].length == 0 || valSplit[2].length == 0)
550     {
551         throw new Exception(
552             format("Invalid option: '--%s %s'. Expected: '--%s <field1>:<field2>' where fields are 1-upped integers.",
553                    option, optionVal, option));
554     }
555     size_t field1;
556     size_t field2;
557     try
558     {
559         field1 = valSplit[0].to!size_t;
560         field2 = valSplit[2].to!size_t;
561     }
562     catch (Exception e)
563     {
564         throw new Exception(
565             format("Invalid values in option: '--%s %s'. Expected: '--%s <field1>:<field2>' where fields are 1-upped integers.",
566                    option, optionVal, option));
567     }
568 
569     if (field1 == 0 || field2 == 0)
570     {
571         throw new Exception(
572             format("Invalid option: '--%s %s'. Zero is not a valid field index.", option, optionVal));
573     }
574 
575     if (field1 == field2)
576     {
577         throw new Exception(
578             format("Invalid option: '--%s %s'. Field1 and field2 must be different fields", option, optionVal));
579     }
580 
581     immutable size_t zeroBasedIndex1 = field1 - 1;
582     immutable size_t zeroBasedIndex2 = field2 - 1;
583     tests ~= makeFieldVsFieldDelegate(fn, zeroBasedIndex1, zeroBasedIndex2);
584     maxFieldIndex = max(maxFieldIndex, zeroBasedIndex1, zeroBasedIndex2);
585 }
586 
587 
588 void fieldFieldNumOptionHandler(
589     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, FieldFieldNumPredicate fn, string option, string optionVal)
590 {
591     size_t field1;
592     size_t field2;
593     double value;
594     immutable valSplit = findSplit(optionVal, ":");
595     auto invalidOption = (valSplit[1].length == 0 || valSplit[2].length == 0);
596 
597     if (!invalidOption)
598     {
599         immutable valSplit2 = findSplit(valSplit[2], ":");
600         invalidOption = (valSplit2[1].length == 0 || valSplit2[2].length == 0);
601 
602         if (!invalidOption)
603         {
604             try
605             {
606                 field1 = valSplit[0].to!size_t;
607                 field2 = valSplit2[0].to!size_t;
608                 value = valSplit2[2].to!double;
609             }
610             catch (Exception e)
611             {
612                 invalidOption = true;
613             }
614         }
615     }
616 
617     if (invalidOption)
618     {
619         throw new Exception(
620             format("Invalid values in option: '--%s %s'. Expected: '--%s <field1>:<field2>:<num>' where fields are 1-upped integers.",
621                    option, optionVal, option));
622     }
623     if (field1 == 0 || field2 == 0)
624     {
625         throw new Exception(
626             format("Invalid option: '--%s %s'. Zero is not a valid field index.", option, optionVal));
627     }
628     if (field1 == field2)
629     {
630         throw new Exception(
631             format("Invalid option: '--%s %s'. Field1 and field2 must be different fields", option, optionVal));
632     }
633 
634     immutable size_t zeroBasedIndex1 = field1 - 1;
635     immutable size_t zeroBasedIndex2 = field2 - 1;
636     tests ~= makeFieldFieldNumDelegate(fn, zeroBasedIndex1, zeroBasedIndex2, value);
637     maxFieldIndex = max(maxFieldIndex, zeroBasedIndex1, zeroBasedIndex2);
638 }
639 
640 /** Command line options - This struct holds the results of command line option processing.
641  * It also has a method, processArgs, that invokes command line arg processing.
642  */
643 struct TsvFilterOptions
644 {
645     string programName;
646     FieldsPredicate[] tests;         // Derived from tests
647     size_t maxFieldIndex;            // Derived from tests
648     bool hasHeader = false;          // --H|header
649     bool invert = false;             // --invert
650     bool disjunct = false;           // --or
651     char delim = '\t';               // --delimiter
652     bool helpVerbose = false;        // --help-verbose
653     bool helpOptions = false;        // --help-options
654     bool versionWanted = false;      // --V|version
655 
656     /* Returns a tuple. First value is true if command line arguments were successfully
657      * processed and execution should continue, or false if an error occurred or the user
658      * asked for help. If false, the second value is the appropriate exit code (0 or 1).
659      *
660      * Returning true (execution continues) means args have been validated and the
661      * tests array has been established.
662      */
663     auto processArgs (ref string[] cmdArgs)
664     {
665         import std.getopt;
666         import std.path : baseName, stripExtension;
667         import tsv_utils.common.getopt_inorder;
668 
669         programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name";
670 
671         /* Command option handlers - One handler for each option. These conform to the
672          * getopt required handler signature, and separate knowledge the specific command
673          * option text from the option processing.
674          */
675         void handlerFldEmpty(string option, string value)    { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldEmpty,    option, value); }
676         void handlerFldNotEmpty(string option, string value) { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldNotEmpty, option, value); }
677         void handlerFldBlank(string option, string value)    { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldBlank,    option, value); }
678         void handlerFldNotBlank(string option, string value) { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldNotBlank, option, value); }
679 
680         void handlerFldIsNumeric(string option, string value)  { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldIsNumeric, option, value); }
681         void handlerFldIsFinite(string option, string value)   { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldIsFinite, option, value); }
682         void handlerFldIsNaN(string option, string value)      { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldIsNaN, option, value); }
683         void handlerFldIsInfinity(string option, string value) { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldIsInfinity, option, value); }
684 
685         void handlerNumLE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &numLE, option, value); }
686         void handlerNumLT(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &numLT, option, value); }
687         void handlerNumGE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &numGE, option, value); }
688         void handlerNumGT(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &numGT, option, value); }
689         void handlerNumEQ(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &numEQ, option, value); }
690         void handlerNumNE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &numNE, option, value); }
691 
692         void handlerStrLE(string option, string value) { fieldVsStringOptionHandler(tests, maxFieldIndex, &strLE, option, value); }
693         void handlerStrLT(string option, string value) { fieldVsStringOptionHandler(tests, maxFieldIndex, &strLT, option, value); }
694         void handlerStrGE(string option, string value) { fieldVsStringOptionHandler(tests, maxFieldIndex, &strGE, option, value); }
695         void handlerStrGT(string option, string value) { fieldVsStringOptionHandler(tests, maxFieldIndex, &strGT, option, value); }
696         void handlerStrEQ(string option, string value) { fieldVsStringOptionHandler(tests, maxFieldIndex, &strEQ, option, value); }
697         void handlerStrNE(string option, string value) { fieldVsStringOptionHandler(tests, maxFieldIndex, &strNE, option, value); }
698 
699         void handlerStrInFld(string option, string value)    { fieldVsStringOptionHandler(tests, maxFieldIndex, &strInFld,    option, value); }
700         void handlerStrNotInFld(string option, string value) { fieldVsStringOptionHandler(tests, maxFieldIndex, &strNotInFld, option, value); }
701 
702         void handlerIStrEQ(string option, string value)       { fieldVsIStringOptionHandler(tests, maxFieldIndex, &istrEQ,       option, value); }
703         void handlerIStrNE(string option, string value)       { fieldVsIStringOptionHandler(tests, maxFieldIndex, &istrNE,       option, value); }
704         void handlerIStrInFld(string option, string value)    { fieldVsIStringOptionHandler(tests, maxFieldIndex, &istrInFld,    option, value); }
705         void handlerIStrNotInFld(string option, string value) { fieldVsIStringOptionHandler(tests, maxFieldIndex, &istrNotInFld, option, value); }
706 
707         void handlerRegexMatch(string option, string value)     { fieldVsRegexOptionHandler(tests, maxFieldIndex, &regexMatch,    option, value, true); }
708         void handlerRegexNotMatch(string option, string value)  { fieldVsRegexOptionHandler(tests, maxFieldIndex, &regexNotMatch, option, value, true); }
709         void handlerIRegexMatch(string option, string value)    { fieldVsRegexOptionHandler(tests, maxFieldIndex, &regexMatch,    option, value, false); }
710         void handlerIRegexNotMatch(string option, string value) { fieldVsRegexOptionHandler(tests, maxFieldIndex, &regexNotMatch, option, value, false); }
711 
712         void handlerCharLenLE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &charLenLE, option, value); }
713         void handlerCharLenLT(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &charLenLT, option, value); }
714         void handlerCharLenGE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &charLenGE, option, value); }
715         void handlerCharLenGT(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &charLenGT, option, value); }
716         void handlerCharLenEQ(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &charLenEQ, option, value); }
717         void handlerCharLenNE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &charLenNE, option, value); }
718 
719         void handlerByteLenLE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &byteLenLE, option, value); }
720         void handlerByteLenLT(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &byteLenLT, option, value); }
721         void handlerByteLenGE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &byteLenGE, option, value); }
722         void handlerByteLenGT(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &byteLenGT, option, value); }
723         void handlerByteLenEQ(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &byteLenEQ, option, value); }
724         void handlerByteLenNE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &byteLenNE, option, value); }
725 
726         void handlerFFLE(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffLE, option, value); }
727         void handlerFFLT(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffLT, option, value); }
728         void handlerFFGE(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffGE, option, value); }
729         void handlerFFGT(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffGT, option, value); }
730         void handlerFFEQ(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffEQ, option, value); }
731         void handlerFFNE(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffNE, option, value); }
732 
733         void handlerFFStrEQ(string option, string value)  { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffStrEQ,  option, value); }
734         void handlerFFStrNE(string option, string value)  { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffStrNE,  option, value); }
735         void handlerFFIStrEQ(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffIStrEQ, option, value); }
736         void handlerFFIStrNE(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffIStrNE, option, value); }
737 
738         void handlerFFAbsDiffLE(string option, string value) { fieldFieldNumOptionHandler(tests, maxFieldIndex, &ffAbsDiffLE, option, value); }
739         void handlerFFAbsDiffGT(string option, string value) { fieldFieldNumOptionHandler(tests, maxFieldIndex, &ffAbsDiffGT, option, value); }
740         void handlerFFRelDiffLE(string option, string value) { fieldFieldNumOptionHandler(tests, maxFieldIndex, &ffRelDiffLE, option, value); }
741         void handlerFFRelDiffGT(string option, string value) { fieldFieldNumOptionHandler(tests, maxFieldIndex, &ffRelDiffGT, option, value); }
742 
743         try
744         {
745             arraySep = ",";    // Use comma to separate values in command line options
746             auto r = getoptInorder(
747                 cmdArgs,
748                 "help-verbose",    "     Print full help.", &helpVerbose,
749                 "help-options",    "     Print the options list by itself.", &helpOptions,
750                  std.getopt.config.caseSensitive,
751                 "V|version",       "     Print version information and exit.", &versionWanted,
752                 "H|header",        "     Treat the first line of each file as a header.", &hasHeader,
753                 std.getopt.config.caseInsensitive,
754                 "or",              "     Evaluate tests as an OR rather than an AND.", &disjunct,
755                 std.getopt.config.caseSensitive,
756                 "v|invert",        "     Invert the filter, printing lines that do not match.", &invert,
757                 std.getopt.config.caseInsensitive,
758                 "d|delimiter",     "CHR  Field delimiter. Default: TAB. (Single byte UTF-8 characters only.)", &delim,
759 
760                 "empty",           "<field-list>       True if FIELD is empty.", &handlerFldEmpty,
761                 "not-empty",       "<field-list>       True if FIELD is not empty.", &handlerFldNotEmpty,
762                 "blank",           "<field-list>       True if FIELD is empty or all whitespace.", &handlerFldBlank,
763                 "not-blank",       "<field-list>       True if FIELD contains a non-whitespace character.", &handlerFldNotBlank,
764 
765                 "is-numeric",      "<field-list>       True if FIELD is interpretable as a number.", &handlerFldIsNumeric,
766                 "is-finite",       "<field-list>       True if FIELD is interpretable as a number and is not NaN or infinity.", &handlerFldIsFinite,
767                 "is-nan",          "<field-list>       True if FIELD is NaN.", &handlerFldIsNaN,
768                 "is-infinity",     "<field-list>       True if FIELD is infinity.", &handlerFldIsInfinity,
769 
770                 "le",              "<field-list>:NUM   FIELD <= NUM (numeric).", &handlerNumLE,
771                 "lt",              "<field-list>:NUM   FIELD <  NUM (numeric).", &handlerNumLT,
772                 "ge",              "<field-list>:NUM   FIELD >= NUM (numeric).", &handlerNumGE,
773                 "gt",              "<field-list>:NUM   FIELD >  NUM (numeric).", &handlerNumGT,
774                 "eq",              "<field-list>:NUM   FIELD == NUM (numeric).", &handlerNumEQ,
775                 "ne",              "<field-list>:NUM   FIELD != NUM (numeric).", &handlerNumNE,
776 
777                 "str-le",          "<field-list>:STR   FIELD <= STR (string).", &handlerStrLE,
778                 "str-lt",          "<field-list>:STR   FIELD <  STR (string).", &handlerStrLT,
779                 "str-ge",          "<field-list>:STR   FIELD >= STR (string).", &handlerStrGE,
780                 "str-gt",          "<field-list>:STR   FIELD >  STR (string).", &handlerStrGT,
781                 "str-eq",          "<field-list>:STR   FIELD == STR (string).", &handlerStrEQ,
782                 "istr-eq",         "<field-list>:STR   FIELD == STR (string, case-insensitive).", &handlerIStrEQ,
783                 "str-ne",          "<field-list>:STR   FIELD != STR (string).", &handlerStrNE,
784                 "istr-ne",         "<field-list>:STR   FIELD != STR (string, case-insensitive).", &handlerIStrNE,
785                 "str-in-fld",      "<field-list>:STR   FIELD contains STR (substring search).", &handlerStrInFld,
786                 "istr-in-fld",     "<field-list>:STR   FIELD contains STR (substring search, case-insensitive).", &handlerIStrInFld,
787                 "str-not-in-fld",  "<field-list>:STR   FIELD does not contain STR (substring search).", &handlerStrNotInFld,
788                 "istr-not-in-fld", "<field-list>:STR   FIELD does not contain STR (substring search, case-insensitive).", &handlerIStrNotInFld,
789 
790                 "regex",           "<field-list>:REGEX   FIELD matches regular expression.", &handlerRegexMatch,
791                 "iregex",          "<field-list>:REGEX   FIELD matches regular expression, case-insensitive.", &handlerIRegexMatch,
792                 "not-regex",       "<field-list>:REGEX   FIELD does not match regular expression.", &handlerRegexNotMatch,
793                 "not-iregex",      "<field-list>:REGEX   FIELD does not match regular expression, case-insensitive.", &handlerIRegexNotMatch,
794 
795                 "char-len-le",     "<field-list>:NUM   character-length(FIELD) <= NUM.", &handlerCharLenLE,
796                 "char-len-lt",     "<field-list>:NUM   character-length(FIELD) < NUM.", &handlerCharLenLT,
797                 "char-len-ge",     "<field-list>:NUM   character-length(FIELD) >= NUM.", &handlerCharLenGE,
798                 "char-len-gt",     "<field-list>:NUM   character-length(FIELD) > NUM.", &handlerCharLenGT,
799                 "char-len-eq",     "<field-list>:NUM   character-length(FIELD) == NUM.", &handlerCharLenEQ,
800                 "char-len-ne",     "<field-list>:NUM   character-length(FIELD) != NUM.", &handlerCharLenNE,
801 
802                 "byte-len-le",     "<field-list>:NUM   byte-length(FIELD) <= NUM.", &handlerByteLenLE,
803                 "byte-len-lt",     "<field-list>:NUM   byte-length(FIELD) < NUM.", &handlerByteLenLT,
804                 "byte-len-ge",     "<field-list>:NUM   byte-length(FIELD) >= NUM.", &handlerByteLenGE,
805                 "byte-len-gt",     "<field-list>:NUM   byte-length(FIELD) > NUM.", &handlerByteLenGT,
806                 "byte-len-eq",     "<field-list>:NUM   byte-length(FIELD) == NUM.", &handlerByteLenEQ,
807                 "byte-len-ne",     "<field-list>:NUM   byte-length(FIELD) != NUM.", &handlerByteLenNE,
808 
809                 "ff-le",           "FIELD1:FIELD2   FIELD1 <= FIELD2 (numeric).", &handlerFFLE,
810                 "ff-lt",           "FIELD1:FIELD2   FIELD1 <  FIELD2 (numeric).", &handlerFFLT,
811                 "ff-ge",           "FIELD1:FIELD2   FIELD1 >= FIELD2 (numeric).", &handlerFFGE,
812                 "ff-gt",           "FIELD1:FIELD2   FIELD1 >  FIELD2 (numeric).", &handlerFFGT,
813                 "ff-eq",           "FIELD1:FIELD2   FIELD1 == FIELD2 (numeric).", &handlerFFEQ,
814                 "ff-ne",           "FIELD1:FIELD2   FIELD1 != FIELD2 (numeric).", &handlerFFNE,
815                 "ff-str-eq",       "FIELD1:FIELD2   FIELD1 == FIELD2 (string).", &handlerFFStrEQ,
816                 "ff-istr-eq",      "FIELD1:FIELD2   FIELD1 == FIELD2 (string, case-insensitive).", &handlerFFIStrEQ,
817                 "ff-str-ne",       "FIELD1:FIELD2   FIELD1 != FIELD2 (string).", &handlerFFStrNE,
818                 "ff-istr-ne",      "FIELD1:FIELD2   FIELD1 != FIELD2 (string, case-insensitive).", &handlerFFIStrNE,
819 
820                 "ff-absdiff-le",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2) <= NUM", &handlerFFAbsDiffLE,
821                 "ff-absdiff-gt",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2)  > NUM", &handlerFFAbsDiffGT,
822                 "ff-reldiff-le",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2) / min(abs(FIELD1), abs(FIELD2)) <= NUM", &handlerFFRelDiffLE,
823                 "ff-reldiff-gt",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2) / min(abs(FIELD1), abs(FIELD2))  > NUM", &handlerFFRelDiffGT,
824                 );
825 
826             /* Both help texts are a bit long. In this case, for "regular" help, don't
827              * print options, just the text. The text summarizes the options.
828              */
829             if (r.helpWanted)
830             {
831                 stdout.write(helpText);
832                 return tuple(false, 0);
833             }
834             else if (helpVerbose)
835             {
836                 defaultGetoptPrinter(helpTextVerbose, r.options);
837                 return tuple(false, 0);
838             }
839             else if (helpOptions)
840             {
841                 defaultGetoptPrinter(helpTextOptions, r.options);
842                 return tuple(false, 0);
843             }
844             else if (versionWanted)
845             {
846                 import tsv_utils.common.tsvutils_version;
847                 writeln(tsvutilsVersionNotice("tsv-filter"));
848                 return tuple(false, 0);
849             }
850         }
851         catch (Exception e)
852         {
853             stderr.writefln("[%s] Error processing command line arguments: %s", programName, e.msg);
854             return tuple(false, 1);
855         }
856         return tuple(true, 0);
857     }
858 }
859 
860 /** tsvFilter processes the input files and runs the tests.
861  */
862 void tsvFilter(const TsvFilterOptions cmdopt, const string[] inputFiles)
863 {
864     import std.algorithm : all, any, splitter;
865     import std.range;
866     import tsv_utils.common.utils : BufferedOutputRange, bufferedByLine, throwIfWindowsNewlineOnUnix;
867 
868     /* BufferedOutputRange improves performance on narrow files with high percentages of
869      * writes. Want responsive output if output is rare, so ensure the first matched
870      * line is written, and that writes separated by long stretches of non-matched lines
871      * are written.
872      */
873     enum maxInputLinesWithoutBufferFlush = 1024;
874     size_t inputLinesWithoutBufferFlush = maxInputLinesWithoutBufferFlush + 1;
875 
876     auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout);
877 
878     /* Process each input file, one line at a time. */
879     auto lineFields = new char[][](cmdopt.maxFieldIndex + 1);
880     bool headerWritten = false;
881     foreach (filename; (inputFiles.length > 0) ? inputFiles : ["-"])
882     {
883         auto inputStream = (filename == "-") ? stdin : filename.File();
884         foreach (lineNum, line; inputStream.bufferedByLine.enumerate(1))
885         {
886             if (lineNum == 1) throwIfWindowsNewlineOnUnix(line, filename, lineNum);
887             if (lineNum == 1 && cmdopt.hasHeader)
888             {
889                 /* Header. Output on the first file, skip subsequent files. */
890                 if (!headerWritten)
891                 {
892                     bufferedOutput.appendln(line);
893                     headerWritten = true;
894                 }
895             }
896             else
897             {
898                 /* Copy the needed number of fields to the fields array. */
899                 int fieldIndex = -1;
900                 foreach (fieldValue; line.splitter(cmdopt.delim))
901                 {
902                     if (fieldIndex == cast(long) cmdopt.maxFieldIndex) break;
903                     fieldIndex++;
904                     lineFields[fieldIndex] = fieldValue;
905                 }
906 
907                 if (fieldIndex == -1)
908                 {
909                     assert(line.length == 0);
910                     /* Bug work-around. Currently empty lines are not handled properly by splitter.
911                      *   Bug: https://issues.dlang.org/show_bug.cgi?id=15735
912                      *   Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030
913                      * Work-around: Point to the line. It's an empty string.
914                      */
915                     fieldIndex++;
916                     lineFields[fieldIndex] = line;
917                 }
918 
919                 if (fieldIndex < cast(long) cmdopt.maxFieldIndex)
920                 {
921                     throw new Exception(
922                         format("Not enough fields in line. File: %s, Line: %s",
923                                (filename == "-") ? "Standard Input" : filename, lineNum));
924                 }
925 
926                 /* Run the tests. Tests will fail (throw) if a field cannot be converted
927                  * to the expected type.
928                  */
929                 try
930                 {
931                     inputLinesWithoutBufferFlush++;
932                     bool passed = cmdopt.disjunct ?
933                         cmdopt.tests.any!(x => x(lineFields)) :
934                         cmdopt.tests.all!(x => x(lineFields));
935                     if (cmdopt.invert) passed = !passed;
936                     if (passed)
937                     {
938                         const bool wasFlushed = bufferedOutput.appendln(line);
939                         if (wasFlushed) inputLinesWithoutBufferFlush = 0;
940                         else if (inputLinesWithoutBufferFlush > maxInputLinesWithoutBufferFlush)
941                         {
942                             bufferedOutput.flush;
943                             inputLinesWithoutBufferFlush = 0;
944                         }
945                     }
946                 }
947                 catch (Exception e)
948                 {
949                     throw new Exception(
950                         format("Could not process line or field: %s\n  File: %s Line: %s%s",
951                                e.msg, (filename == "-") ? "Standard Input" : filename, lineNum,
952                                (lineNum == 1) ? "\n  Is this a header line? Use --header to skip." : ""));
953                 }
954             }
955         }
956     }
957 }