1 /**
2 Command line tool that filters TSV files.
3 
4 This tool filters tab-delimited files based on numeric or string comparisons
5 against specific fields. See the helpText string for details.
6 
7 Copyright (c) 2015-2020, eBay Inc.
8 Initially written by Jon Degenhardt
9 
10 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt)
11 */
12 module tsv_utils.tsv_filter;
13 
14 import std.algorithm : canFind, equal, findSplit, max, min;
15 import std.conv : to;
16 import std.exception : enforce;
17 import std.format : format;
18 import std.math : abs, isFinite, isInfinity, isNaN;
19 import std.range;
20 import std.regex;
21 import std.stdio;
22 import std.string : isNumeric;
23 import std.typecons;
24 import std.uni: asLowerCase, toLower, byGrapheme;
25 
26 /* The program has two main parts, command line arg processing and processing the input
27  * files. Much of the work is in command line arg processing. This sets up the tests run
28  * against each input line. The tests are an array of delegates (closures) run against the
29  * fields in the line. The tests are based on command line arguments, of which there is
30  * a lengthy set, one for each test.
31  */
32 
33 static if (__VERSION__ >= 2085) extern(C) __gshared string[] rt_options = [ "gcopt=cleanup:none" ];
34 
35 /** Main program. Invokes command line arg processing and tsv-filter to perform
36  * the real work. Any errors are caught and reported.
37  */
38 int main(string[] cmdArgs)
39 {
40     /* When running in DMD code coverage mode, turn on report merging. */
41     version(D_Coverage) version(DigitalMars)
42     {
43         import core.runtime : dmd_coverSetMerge;
44         dmd_coverSetMerge(true);
45     }
46 
47     TsvFilterOptions cmdopt;
48     const r = cmdopt.processArgs(cmdArgs);
49     if (!r[0]) return r[1];
50     version(LDC_Profile)
51     {
52         import ldc.profile : resetAll;
53         resetAll();
54     }
55     try tsvFilter(cmdopt);
56     catch (Exception e)
57     {
58         stderr.writefln("Error [%s]: %s", cmdopt.programName, e.msg);
59         return 1;
60     }
61     return 0;
62 }
63 
64 immutable helpText = q"EOS
65 Synopsis: tsv-filter [options] [file...]
66 
67 Filter tab-delimited files for matching lines via comparison tests against
68 individual fields. Use '--help-verbose' for a more detailed description.
69 
70 Global options:
71   --help-verbose      Print full help.
72   --help-options      Print the options list by itself.
73   --V|version         Print version information and exit.
74   --H|header          Treat the first line of each file as a header.
75   --or                Evaluate tests as an OR rather than an AND clause.
76   --v|invert          Invert the filter, printing lines that do not match.
77   --d|delimiter CHR   Field delimiter. Default: TAB.
78 
79 Operators:
80 * Test if a field is empty (no characters) or blank (empty or whitespace only).
81   Syntax:  --empty|not-empty|blank|not-blank  FIELD
82   Example: --empty 5          // True if field 5 is empty
83 
84 * Test if a field is numeric, finite, NaN, or infinity
85   Syntax:  --is-numeric|is-finite|is-nan|is-infinity FIELD
86   Example: --is-numeric 5 --gt 5:100  // Ensure field 5 is numeric before --gt test.
87 
88 * Compare a field to a number (integer or float)
89   Syntax:  --eq|ne|lt|le|gt|ge  FIELD:NUM
90   Example: --lt 5:1000 --gt 2:0.5  // True if (field 5 < 1000) and (field 2 > 0.5)
91 
92 * Compare a field to a string
93   Syntax:  --str-eq|str-ne  FIELD:STR
94   Example: --str-eq 3:abc        // True if field 3 is "abc"
95 
96 * Test if a field contains a string (substring search)
97   Syntax:  --str-in-fld|str-not-in-fld|istr-in-fld|istr-not-in-fld  FIELD:STR
98   Example: --str-in-fld 1:hello  // True if field 1 contains "hello"
99 
100 * Test if a field matches a regular expression.
101   Syntax:  --regex|iregex|not-regex|not-iregex  FIELD:REGEX
102   Example: --regex '3:ab*c'      // True if field 3 contains "ac", "abc", "abbc", etc.
103 
104 * Test a field's character or byte length
105   Syntax:  --char-len-[le|lt|ge|gt|eq|ne] FIELD:NUM
106            --byte-len-[le|lt|ge|gt|eq|ne] FIELD:NUM
107   Example: --char-len-lt 2:10    // True if field 2 is less than 10 characters long.
108            --byte-len-gt 2:10    // True if field 2 is greater than 10 bytes long.
109 
110 * Field to field comparisons - Similar to field vs literal comparisons, but field vs field.
111   Syntax:  --ff-eq|ff-ne|ff-lt|ff-le|ff-gt|ff-ge  FIELD1:FIELD2
112            --ff-str-eq|ff-str-ne|ff-istr-eq|ff-istr-ne  FIELD1:FIELD2
113   Example: --ff-eq 2:4           // True if fields 2 and 4 are numerically equivalent
114            --ff-str-eq 2:4       // True if fields 2 and 4 are the same strings
115 
116 * Field to field difference comparisons - Absolute and relative difference
117   Syntax:  --ff-absdiff-le|ff-absdiff-gt FIELD1:FIELD2:NUM
118            --ff-reldiff-le|ff-reldiff-gt FIELD1:FIELD2:NUM
119   Example: --ff-absdiff-lt 1:3:0.25   // True if abs(field1 - field2) < 0.25
120 
121 EOS";
122 
123 immutable helpTextVerbose = q"EOS
124 Synopsis: tsv-filter [options] [file...]
125 
126 Filter lines of tab-delimited files via comparison tests against fields. Multiple
127 tests can be specified, by default they are evaluated as AND clause. Lines
128 satisfying the tests are written to standard output.
129 
130 Typical test syntax is '--op field:value', where 'op' is an operator, 'field' is a
131 1-based field index, and 'value' is the comparison basis. For example, '--lt 3:500'
132 tests if field 3 is less than 500. A more complete example:
133 
134   tsv-filter --header --gt 1:50 --lt 1:100 --le 2:1000 data.tsv
135 
136 This outputs all lines from file data.tsv where field 1 is greater than 50 and less
137 than 100, and field 2 is less than or equal to 1000. The header is also output.
138 
139 Field lists can be used to specify multiple fields at once. For example:
140 
141   tsv-filter --not-blank 1-10 --str-ne 1,2,5:'--' data.tsv
142 
143 tests that fields 1-10 are not blank and fields 1,2,5 are not "--".
144 
145 Tests available include:
146   * Test if a field is empty (no characters) or blank (empty or whitespace only).
147   * Test if a field is interpretable as a number, a finite number, NaN, or Infinity.
148   * Compare a field to a number - Numeric equality and relational tests.
149   * Compare a field to a string - String equality and relational tests.
150   * Test if a field matches a regular expression. Case sensitive or insensitive.
151   * Test if a field contains a string. Sub-string search, case sensitive or insensitive.
152   * Test a field's character or byte length.
153   * Field to field comparisons - Similar to the other tests, except comparing
154     one field to another in the same line.
155 
156 Details:
157   * The run is aborted if there are not enough fields in an input line.
158   * Numeric tests will fail and abort the run if a field cannot be interpreted as a
159     number. This includes fields with no text. To avoid this use '--is-numeric' or
160     '--is-finite' prior to the numeric test. For example, '--is-numeric 5 --gt 5:100'
161     ensures field 5 is numeric before running the --gt test.
162   * Regular expression syntax is defined by the D programming language. They follow
163     common conventions (perl, python, etc.). Most common forms work as expected.
164 
165 Options:
166 EOS";
167 
168 immutable helpTextOptions = q"EOS
169 Synopsis: tsv-filter [options] [file...]
170 
171 Options:
172 EOS";
173 
174 /* The next blocks of code define the structure of the boolean tests run against input lines.
175  * This includes function and delegate (closure) signatures, creation mechanisms, option
176  * handlers, etc. Command line arg processing to build the test structure.
177 */
178 
179 /* FieldsPredicate delegate signature - Each input line is run against a set of boolean
180  * tests. Each test is a 'FieldsPredicate'. A FieldsPredicate is a delegate (closure)
181  * containing all info about the test except the field values of the line being tested.
182  * These delegates are created as part of command line arg processing. The wrapped data
183  * includes operation, field indexes, literal values, etc. At run-time the delegate is
184  * passed one argument, the split input line.
185  */
186 alias FieldsPredicate = bool delegate(const char[][] fields);
187 
188 /* FieldsPredicate function signatures - These aliases represent the different function
189  * signatures used in FieldsPredicate delegates. Each alias has a corresponding 'make'
190  * function. The 'make' function takes a real predicate function and closure args and
191  * returns a FieldsPredicate delegate. Predicates types are:
192  *
193  * - FieldUnaryPredicate - Test based on a single field. (e.g. --empty 4)
194  * - FieldVsNumberPredicate - Test based on a field index (used to get the field value)
195  *   and a fixed numeric value. For example, field 2 less than 100 (--lt 2:100).
196  * - FieldVsStringPredicate - Test based on a field and a string. (e.g. --str-eq 2:abc)
197  * - FieldVsIStringPredicate - Case-insensitive test based on a field and a string.
198  *   (e.g. --istr-eq 2:abc)
199  * - FieldVsRegexPredicate - Test based on a field and a regex. (e.g. --regex '2:ab*c')
200  * - FieldVsFieldPredicate - Test based on two fields. (e.g. --ff-le 2:4).
201  *
202  * An actual FieldsPredicate takes the fields from the line and the closure args and
203  * runs the test. For example, a function testing if a field is less than a specific
204  * value would pull the specified field from the fields array, convert the string to
205  * a number, then run the less-than test.
206  */
207 alias FieldUnaryPredicate    = bool function(const char[][] fields, size_t index);
208 alias FieldVsNumberPredicate = bool function(const char[][] fields, size_t index, double value);
209 alias FieldVsStringPredicate = bool function(const char[][] fields, size_t index, string value);
210 alias FieldVsIStringPredicate = bool function(const char[][] fields, size_t index, dstring value);
211 alias FieldVsRegexPredicate  = bool function(const char[][] fields, size_t index, Regex!char value);
212 alias FieldVsFieldPredicate  = bool function(const char[][] fields, size_t index1, size_t index2);
213 alias FieldFieldNumPredicate  = bool function(const char[][] fields, size_t index1, size_t index2, double value);
214 
215 FieldsPredicate makeFieldUnaryDelegate(FieldUnaryPredicate fn, size_t index)
216 {
217     return fields => fn(fields, index);
218 }
219 
220 FieldsPredicate makeFieldVsNumberDelegate(FieldVsNumberPredicate fn, size_t index, double value)
221 {
222     return fields => fn(fields, index, value);
223 }
224 
225 FieldsPredicate makeFieldVsStringDelegate(FieldVsStringPredicate fn, size_t index, string value)
226 {
227     return fields => fn(fields, index, value);
228 }
229 
230 FieldsPredicate makeFieldVsIStringDelegate(FieldVsIStringPredicate fn, size_t index, dstring value)
231 {
232     return fields => fn(fields, index, value);
233 }
234 
235 FieldsPredicate makeFieldVsRegexDelegate(FieldVsRegexPredicate fn, size_t index, Regex!char value)
236 {
237     return fields => fn(fields, index, value);
238 }
239 
240 FieldsPredicate makeFieldVsFieldDelegate(FieldVsFieldPredicate fn, size_t index1, size_t index2)
241 {
242     return fields => fn(fields, index1, index2);
243 }
244 
245 FieldsPredicate makeFieldFieldNumDelegate(FieldFieldNumPredicate fn, size_t index1, size_t index2, double value)
246 {
247     return fields => fn(fields, index1, index2, value);
248 }
249 
250 /* Predicate functions - These are the actual functions used in a FieldsPredicate. They
251  * are a direct reflection of the operators available via command line args. Each matches
252  * one of the FieldsPredicate function aliases defined above.
253  */
254 bool fldEmpty(const char[][] fields, size_t index) { return fields[index].length == 0; }
255 bool fldNotEmpty(const char[][] fields, size_t index) { return fields[index].length != 0; }
256 bool fldBlank(const char[][] fields, size_t index) { return cast(bool) fields[index].matchFirst(ctRegex!`^\s*$`); }
257 bool fldNotBlank(const char[][] fields, size_t index) { return !fields[index].matchFirst(ctRegex!`^\s*$`); }
258 
259 bool fldIsNumeric(const char[][] fields, size_t index) { return fields[index].isNumeric; }
260 bool fldIsFinite(const char[][] fields, size_t index) { return fields[index].isNumeric && fields[index].to!double.isFinite; }
261 bool fldIsNaN(const char[][] fields, size_t index) { return fields[index].isNumeric && fields[index].to!double.isNaN; }
262 bool fldIsInfinity(const char[][] fields, size_t index) { return fields[index].isNumeric && fields[index].to!double.isInfinity; }
263 
264 bool numLE(const char[][] fields, size_t index, double val) { return fields[index].to!double <= val; }
265 bool numLT(const char[][] fields, size_t index, double val) { return fields[index].to!double  < val; }
266 bool numGE(const char[][] fields, size_t index, double val) { return fields[index].to!double >= val; }
267 bool numGT(const char[][] fields, size_t index, double val) { return fields[index].to!double  > val; }
268 bool numEQ(const char[][] fields, size_t index, double val) { return fields[index].to!double == val; }
269 bool numNE(const char[][] fields, size_t index, double val) { return fields[index].to!double != val; }
270 
271 bool strLE(const char[][] fields, size_t index, string val) { return fields[index] <= val; }
272 bool strLT(const char[][] fields, size_t index, string val) { return fields[index]  < val; }
273 bool strGE(const char[][] fields, size_t index, string val) { return fields[index] >= val; }
274 bool strGT(const char[][] fields, size_t index, string val) { return fields[index]  > val; }
275 bool strEQ(const char[][] fields, size_t index, string val) { return fields[index] == val; }
276 bool strNE(const char[][] fields, size_t index, string val) { return fields[index] != val; }
277 bool strInFld(const char[][] fields, size_t index, string val) { return fields[index].canFind(val); }
278 bool strNotInFld(const char[][] fields, size_t index, string val) { return !fields[index].canFind(val); }
279 
280 /* Note: For istr predicates, the command line value has been lower-cased by fieldVsIStringOptionHander.
281  */
282 bool istrEQ(const char[][] fields, size_t index, dstring val) { return fields[index].asLowerCase.equal(val); }
283 bool istrNE(const char[][] fields, size_t index, dstring val) { return !fields[index].asLowerCase.equal(val); }
284 bool istrInFld(const char[][] fields, size_t index, dstring val) { return fields[index].asLowerCase.canFind(val); }
285 bool istrNotInFld(const char[][] fields, size_t index, dstring val) { return !fields[index].asLowerCase.canFind(val); }
286 
287 /* Note: Case-sensitivity is built into the regex value, so these regex predicates are
288  * used for both case-sensitive and case-insensitive regex operators.
289  */
290 bool regexMatch(const char[][] fields, size_t index, Regex!char val) { return cast(bool) fields[index].matchFirst(val); }
291 bool regexNotMatch(const char[][] fields, size_t index, Regex!char val) { return !fields[index].matchFirst(val); }
292 
293 bool charLenLE(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength <= val; }
294 bool charLenLT(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength < val; }
295 bool charLenGE(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength >= val; }
296 bool charLenGT(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength > val; }
297 bool charLenEQ(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength == val; }
298 bool charLenNE(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength != val; }
299 
300 bool byteLenLE(const char[][] fields, size_t index, double val) { return fields[index].length <= val; }
301 bool byteLenLT(const char[][] fields, size_t index, double val) { return fields[index].length < val; }
302 bool byteLenGE(const char[][] fields, size_t index, double val) { return fields[index].length >= val; }
303 bool byteLenGT(const char[][] fields, size_t index, double val) { return fields[index].length > val; }
304 bool byteLenEQ(const char[][] fields, size_t index, double val) { return fields[index].length == val; }
305 bool byteLenNE(const char[][] fields, size_t index, double val) { return fields[index].length != val; }
306 
307 bool ffLE(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double <= fields[index2].to!double; }
308 bool ffLT(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double  < fields[index2].to!double; }
309 bool ffGE(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double >= fields[index2].to!double; }
310 bool ffGT(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double  > fields[index2].to!double; }
311 bool ffEQ(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double == fields[index2].to!double; }
312 bool ffNE(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double != fields[index2].to!double; }
313 bool ffStrEQ(const char[][] fields, size_t index1, size_t index2) { return fields[index1] == fields[index2]; }
314 bool ffStrNE(const char[][] fields, size_t index1, size_t index2) { return fields[index1] != fields[index2]; }
315 bool ffIStrEQ(const char[][] fields, size_t index1, size_t index2)
316 {
317     return equal(fields[index1].asLowerCase, fields[index2].asLowerCase);
318 }
319 bool ffIStrNE(const char[][] fields, size_t index1, size_t index2)
320 {
321     return !equal(fields[index1].asLowerCase, fields[index2].asLowerCase);
322 }
323 
324 auto AbsDiff(double v1, double v2) { return (v1 - v2).abs; }
325 auto RelDiff(double v1, double v2) { return (v1 - v2).abs / min(v1.abs, v2.abs); }
326 
327 bool ffAbsDiffLE(const char[][] fields, size_t index1, size_t index2, double value)
328 {
329     return AbsDiff(fields[index1].to!double, fields[index2].to!double) <= value;
330 }
331 bool ffAbsDiffGT(const char[][] fields, size_t index1, size_t index2, double value)
332 {
333     return AbsDiff(fields[index1].to!double, fields[index2].to!double) > value;
334 }
335 bool ffRelDiffLE(const char[][] fields, size_t index1, size_t index2, double value)
336 {
337     return RelDiff(fields[index1].to!double, fields[index2].to!double) <= value;
338 }
339 bool ffRelDiffGT(const char[][] fields, size_t index1, size_t index2, double value)
340 {
341     return RelDiff(fields[index1].to!double, fields[index2].to!double) > value;
342 }
343 
344 /* Command line option handlers - There is a command line option handler for each
345  * predicate type. That is, one each for FieldUnaryPredicate, FieldVsNumberPredicate,
346  * etc. Option handlers are passed the tests array, the predicate function, and the
347  * command line option arguments. A FieldsPredicate delegate is created and appended to
348  * the tests array. An exception is thrown if errors are detected while processing the
349  * option, the error text is intended for the end user.
350  *
351  * These option handlers have similar functionality, differing in option processing and
352  * error message generation. fieldVsNumberOptionHandler is described as an example. It
353  * handles command options such as '--lt 3:1000', which tests field 3 for a values less
354  * than 1000. It is passed the tests array, the 'numLE' function to use for the test, and
355  * the string "3:1000" representing the option value. It parses the option value into
356  * field index (unsigned int) and value (double). These are wrapped in a FieldsPredicate
357  * which is added to the tests array. An error is signaled if the option string is invalid.
358  *
359  * During processing, fields indexes are converted from one-based to zero-based. As an
360  * optimization, the maximum field index is also tracked. This allows early termination of
361  * line splitting.
362  */
363 void fieldUnaryOptionHandler(
364     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, FieldUnaryPredicate fn, string option, string optionVal)
365 {
366     import std.range : enumerate;
367     import std.typecons : Yes, No;
368     import tsv_utils.common.utils :  parseFieldList;
369 
370     try foreach (fieldNum, fieldIndex;
371                  optionVal.parseFieldList!(size_t, Yes.convertToZeroBasedIndex).enumerate(1))
372         {
373             tests ~= makeFieldUnaryDelegate(fn, fieldIndex);
374             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
375         }
376     catch (Exception e)
377     {
378          import std.format : format;
379          e.msg = format("[--%s %s]. %s\n   Expected: '--%s <field>' or '--%s <field-list>'.",
380                         option, optionVal, e.msg, option, option);
381          throw e;
382     }
383 }
384 
385 void fieldVsNumberOptionHandler(
386     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, FieldVsNumberPredicate fn, string option, string optionVal)
387 {
388     import std.range : enumerate;
389     import std.typecons : Yes, No;
390     import tsv_utils.common.utils :  parseFieldList;
391 
392     auto formatErrorMsg(string option, string optionVal, string errorMessage="")
393     {
394         import std.format;
395 
396         string optionalSpace = (errorMessage.length == 0) ? "" : " ";
397         return format(
398             "Invalid option: '--%s %s'.%s%s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val> where <val> is a number.",
399             option, optionVal, optionalSpace, errorMessage, option, option);
400     }
401 
402     immutable valSplit = findSplit(optionVal, ":");
403 
404     enforce(valSplit[1].length != 0 && valSplit[2].length != 0,
405             formatErrorMsg(option, optionVal));
406 
407     double value;
408     try value = valSplit[2].to!double;
409     catch (Exception e)
410     {
411         throw new Exception(formatErrorMsg(option, optionVal, e.msg));
412     }
413 
414     try foreach (fieldNum, fieldIndex;
415                  valSplit[0].parseFieldList!(size_t, Yes.convertToZeroBasedIndex).enumerate(1))
416         {
417             tests ~= makeFieldVsNumberDelegate(fn, fieldIndex, value);
418             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
419         }
420     catch (Exception e)
421     {
422         import std.format : format;
423         e.msg = format(
424             "[--%s %s]. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val> where <val> is a number.",
425             option, optionVal, e.msg, option, option);
426         throw e;
427     }
428 }
429 
430 void fieldVsStringOptionHandler(
431     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, FieldVsStringPredicate fn, string option, string optionVal)
432 {
433     import std.range : enumerate;
434     import std.typecons : Yes, No;
435     import tsv_utils.common.utils :  parseFieldList;
436 
437     immutable valSplit = findSplit(optionVal, ":");
438 
439     enforce(valSplit[1].length != 0 && valSplit[2].length != 0,
440             format("Invalid option: '--%s %s'.\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a string.",
441                    option, optionVal, option, option));
442 
443     string value = valSplit[2].to!string;
444 
445     try foreach (fieldNum, fieldIndex;
446                  valSplit[0].parseFieldList!(size_t, Yes.convertToZeroBasedIndex).enumerate(1))
447         {
448             tests ~= makeFieldVsStringDelegate(fn, fieldIndex, value);
449             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
450         }
451     catch (Exception e)
452     {
453         import std.format : format;
454         e.msg = format(
455             "[--%s %s]. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a string.",
456             option, optionVal, e.msg, option, option);
457         throw e;
458     }
459 }
460 
461 /* The fieldVsIStringOptionHandler lower-cases the command line argument, assuming the
462  * case-insensitive comparison will be done on lower-cased values.
463  */
464 void fieldVsIStringOptionHandler(
465     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, FieldVsIStringPredicate fn, string option, string optionVal)
466 {
467     import std.range : enumerate;
468     import std.typecons : Yes, No;
469     import tsv_utils.common.utils :  parseFieldList;
470 
471     immutable valSplit = findSplit(optionVal, ":");
472 
473     enforce(valSplit[1].length != 0 && valSplit[2].length != 0,
474             format("Invalid option: '--%s %s'.\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a string.",
475                    option, optionVal, option, option));
476 
477     string value = valSplit[2].to!string;
478 
479     try foreach (fieldNum, fieldIndex;
480                  valSplit[0].parseFieldList!(size_t, Yes.convertToZeroBasedIndex).enumerate(1))
481         {
482             tests ~= makeFieldVsIStringDelegate(fn, fieldIndex, value.to!dstring.toLower);
483             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
484         }
485     catch (Exception e)
486     {
487         import std.format : format;
488         e.msg = format(
489             "[--%s %s]. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a string.",
490             option, optionVal, e.msg, option, option);
491         throw e;
492     }
493 }
494 
495 void fieldVsRegexOptionHandler(
496     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, FieldVsRegexPredicate fn, string option, string optionVal,
497     bool caseSensitive)
498 {
499     import std.range : enumerate;
500     import std.typecons : Yes, No;
501     import tsv_utils.common.utils :  parseFieldList;
502 
503     immutable valSplit = findSplit(optionVal, ":");
504 
505     enforce(valSplit[1].length != 0 && valSplit[2].length != 0,
506             format("Invalid option: '--%s %s'.\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a regular expression.",
507                    option, optionVal, option, option));
508 
509     Regex!char value;
510     try
511     {
512         immutable modifiers = caseSensitive ? "" : "i";
513         value = regex(valSplit[2], modifiers);
514     }
515     catch (Exception e)
516     {
517         throw new Exception(
518             format("Invalid regular expression: '--%s %s'. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a regular expression.",
519                    option, optionVal, e.msg, option, option));
520     }
521 
522     try foreach (fieldNum, fieldIndex;
523                  valSplit[0].parseFieldList!(size_t, Yes.convertToZeroBasedIndex).enumerate(1))
524         {
525             tests ~= makeFieldVsRegexDelegate(fn, fieldIndex, value);
526             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
527         }
528     catch (Exception e)
529     {
530         import std.format : format;
531         e.msg = format(
532             "[--%s %s]. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a regular expression.",
533             option, optionVal, e.msg, option, option);
534         throw e;
535     }
536 }
537 
538 void fieldVsFieldOptionHandler(
539     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, FieldVsFieldPredicate fn, string option, string optionVal)
540 {
541     immutable valSplit = findSplit(optionVal, ":");
542 
543     enforce(valSplit[1].length != 0 && valSplit[2].length != 0,
544             format("Invalid option: '--%s %s'. Expected: '--%s <field1>:<field2>' where fields are 1-upped integers.",
545                    option, optionVal, option));
546 
547     size_t field1;
548     size_t field2;
549     try
550     {
551         field1 = valSplit[0].to!size_t;
552         field2 = valSplit[2].to!size_t;
553     }
554     catch (Exception e)
555     {
556         throw new Exception(
557             format("Invalid values in option: '--%s %s'. Expected: '--%s <field1>:<field2>' where fields are 1-upped integers.",
558                    option, optionVal, option));
559     }
560 
561     enforce(field1 != 0 && field2 != 0,
562             format("Invalid option: '--%s %s'. Zero is not a valid field index.", option, optionVal));
563 
564     enforce(field1 != field2,
565             format("Invalid option: '--%s %s'. Field1 and field2 must be different fields", option, optionVal));
566 
567     immutable size_t zeroBasedIndex1 = field1 - 1;
568     immutable size_t zeroBasedIndex2 = field2 - 1;
569     tests ~= makeFieldVsFieldDelegate(fn, zeroBasedIndex1, zeroBasedIndex2);
570     maxFieldIndex = max(maxFieldIndex, zeroBasedIndex1, zeroBasedIndex2);
571 }
572 
573 
574 void fieldFieldNumOptionHandler(
575     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, FieldFieldNumPredicate fn, string option, string optionVal)
576 {
577     size_t field1;
578     size_t field2;
579     double value;
580     immutable valSplit = findSplit(optionVal, ":");
581     auto isValidOption = (valSplit[1].length != 0 && valSplit[2].length != 0);
582 
583     if (isValidOption)
584     {
585         immutable valSplit2 = findSplit(valSplit[2], ":");
586         isValidOption = (valSplit2[1].length != 0 && valSplit2[2].length != 0);
587 
588         if (isValidOption)
589         {
590             try
591             {
592                 field1 = valSplit[0].to!size_t;
593                 field2 = valSplit2[0].to!size_t;
594                 value = valSplit2[2].to!double;
595             }
596             catch (Exception e)
597             {
598                 isValidOption = false;
599             }
600         }
601     }
602 
603     enforce(isValidOption,
604             format("Invalid values in option: '--%s %s'. Expected: '--%s <field1>:<field2>:<num>' where fields are 1-upped integers.",
605                    option, optionVal, option));
606 
607     enforce(field1 != 0 && field2 != 0,
608             format("Invalid option: '--%s %s'. Zero is not a valid field index.", option, optionVal));
609 
610     enforce(field1 != field2,
611             format("Invalid option: '--%s %s'. Field1 and field2 must be different fields", option, optionVal));
612 
613     immutable size_t zeroBasedIndex1 = field1 - 1;
614     immutable size_t zeroBasedIndex2 = field2 - 1;
615     tests ~= makeFieldFieldNumDelegate(fn, zeroBasedIndex1, zeroBasedIndex2, value);
616     maxFieldIndex = max(maxFieldIndex, zeroBasedIndex1, zeroBasedIndex2);
617 }
618 
619 /** Command line options - This struct holds the results of command line option processing.
620  * It also has a method, processArgs, that invokes command line arg processing.
621  */
622 struct TsvFilterOptions
623 {
624     import tsv_utils.common.utils : inputSourceRange, InputSourceRange, ReadHeader;
625 
626     string programName;
627     InputSourceRange inputSources;   // Input files
628     FieldsPredicate[] tests;         // Derived from tests
629     size_t maxFieldIndex;            // Derived from tests
630     bool hasHeader = false;          // --H|header
631     bool invert = false;             // --invert
632     bool disjunct = false;           // --or
633     char delim = '\t';               // --delimiter
634     bool helpVerbose = false;        // --help-verbose
635     bool helpOptions = false;        // --help-options
636     bool versionWanted = false;      // --V|version
637 
638     /* Returns a tuple. First value is true if command line arguments were successfully
639      * processed and execution should continue, or false if an error occurred or the user
640      * asked for help. If false, the second value is the appropriate exit code (0 or 1).
641      *
642      * Returning true (execution continues) means args have been validated and the
643      * tests array has been established.
644      */
645     auto processArgs (ref string[] cmdArgs)
646     {
647         import std.getopt;
648         import std.path : baseName, stripExtension;
649         import tsv_utils.common.getopt_inorder;
650 
651         programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name";
652 
653         /* Command option handlers - One handler for each option. These conform to the
654          * getopt required handler signature, and separate knowledge the specific command
655          * option text from the option processing.
656          */
657         void handlerFldEmpty(string option, string value)    { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldEmpty,    option, value); }
658         void handlerFldNotEmpty(string option, string value) { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldNotEmpty, option, value); }
659         void handlerFldBlank(string option, string value)    { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldBlank,    option, value); }
660         void handlerFldNotBlank(string option, string value) { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldNotBlank, option, value); }
661 
662         void handlerFldIsNumeric(string option, string value)  { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldIsNumeric, option, value); }
663         void handlerFldIsFinite(string option, string value)   { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldIsFinite, option, value); }
664         void handlerFldIsNaN(string option, string value)      { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldIsNaN, option, value); }
665         void handlerFldIsInfinity(string option, string value) { fieldUnaryOptionHandler(tests, maxFieldIndex, &fldIsInfinity, option, value); }
666 
667         void handlerNumLE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &numLE, option, value); }
668         void handlerNumLT(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &numLT, option, value); }
669         void handlerNumGE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &numGE, option, value); }
670         void handlerNumGT(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &numGT, option, value); }
671         void handlerNumEQ(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &numEQ, option, value); }
672         void handlerNumNE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &numNE, option, value); }
673 
674         void handlerStrLE(string option, string value) { fieldVsStringOptionHandler(tests, maxFieldIndex, &strLE, option, value); }
675         void handlerStrLT(string option, string value) { fieldVsStringOptionHandler(tests, maxFieldIndex, &strLT, option, value); }
676         void handlerStrGE(string option, string value) { fieldVsStringOptionHandler(tests, maxFieldIndex, &strGE, option, value); }
677         void handlerStrGT(string option, string value) { fieldVsStringOptionHandler(tests, maxFieldIndex, &strGT, option, value); }
678         void handlerStrEQ(string option, string value) { fieldVsStringOptionHandler(tests, maxFieldIndex, &strEQ, option, value); }
679         void handlerStrNE(string option, string value) { fieldVsStringOptionHandler(tests, maxFieldIndex, &strNE, option, value); }
680 
681         void handlerStrInFld(string option, string value)    { fieldVsStringOptionHandler(tests, maxFieldIndex, &strInFld,    option, value); }
682         void handlerStrNotInFld(string option, string value) { fieldVsStringOptionHandler(tests, maxFieldIndex, &strNotInFld, option, value); }
683 
684         void handlerIStrEQ(string option, string value)       { fieldVsIStringOptionHandler(tests, maxFieldIndex, &istrEQ,       option, value); }
685         void handlerIStrNE(string option, string value)       { fieldVsIStringOptionHandler(tests, maxFieldIndex, &istrNE,       option, value); }
686         void handlerIStrInFld(string option, string value)    { fieldVsIStringOptionHandler(tests, maxFieldIndex, &istrInFld,    option, value); }
687         void handlerIStrNotInFld(string option, string value) { fieldVsIStringOptionHandler(tests, maxFieldIndex, &istrNotInFld, option, value); }
688 
689         void handlerRegexMatch(string option, string value)     { fieldVsRegexOptionHandler(tests, maxFieldIndex, &regexMatch,    option, value, true); }
690         void handlerRegexNotMatch(string option, string value)  { fieldVsRegexOptionHandler(tests, maxFieldIndex, &regexNotMatch, option, value, true); }
691         void handlerIRegexMatch(string option, string value)    { fieldVsRegexOptionHandler(tests, maxFieldIndex, &regexMatch,    option, value, false); }
692         void handlerIRegexNotMatch(string option, string value) { fieldVsRegexOptionHandler(tests, maxFieldIndex, &regexNotMatch, option, value, false); }
693 
694         void handlerCharLenLE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &charLenLE, option, value); }
695         void handlerCharLenLT(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &charLenLT, option, value); }
696         void handlerCharLenGE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &charLenGE, option, value); }
697         void handlerCharLenGT(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &charLenGT, option, value); }
698         void handlerCharLenEQ(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &charLenEQ, option, value); }
699         void handlerCharLenNE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &charLenNE, option, value); }
700 
701         void handlerByteLenLE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &byteLenLE, option, value); }
702         void handlerByteLenLT(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &byteLenLT, option, value); }
703         void handlerByteLenGE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &byteLenGE, option, value); }
704         void handlerByteLenGT(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &byteLenGT, option, value); }
705         void handlerByteLenEQ(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &byteLenEQ, option, value); }
706         void handlerByteLenNE(string option, string value) { fieldVsNumberOptionHandler(tests, maxFieldIndex, &byteLenNE, option, value); }
707 
708         void handlerFFLE(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffLE, option, value); }
709         void handlerFFLT(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffLT, option, value); }
710         void handlerFFGE(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffGE, option, value); }
711         void handlerFFGT(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffGT, option, value); }
712         void handlerFFEQ(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffEQ, option, value); }
713         void handlerFFNE(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffNE, option, value); }
714 
715         void handlerFFStrEQ(string option, string value)  { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffStrEQ,  option, value); }
716         void handlerFFStrNE(string option, string value)  { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffStrNE,  option, value); }
717         void handlerFFIStrEQ(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffIStrEQ, option, value); }
718         void handlerFFIStrNE(string option, string value) { fieldVsFieldOptionHandler(tests, maxFieldIndex, &ffIStrNE, option, value); }
719 
720         void handlerFFAbsDiffLE(string option, string value) { fieldFieldNumOptionHandler(tests, maxFieldIndex, &ffAbsDiffLE, option, value); }
721         void handlerFFAbsDiffGT(string option, string value) { fieldFieldNumOptionHandler(tests, maxFieldIndex, &ffAbsDiffGT, option, value); }
722         void handlerFFRelDiffLE(string option, string value) { fieldFieldNumOptionHandler(tests, maxFieldIndex, &ffRelDiffLE, option, value); }
723         void handlerFFRelDiffGT(string option, string value) { fieldFieldNumOptionHandler(tests, maxFieldIndex, &ffRelDiffGT, option, value); }
724 
725         try
726         {
727             arraySep = ",";    // Use comma to separate values in command line options
728             auto r = getoptInorder(
729                 cmdArgs,
730                 "help-verbose",    "     Print full help.", &helpVerbose,
731                 "help-options",    "     Print the options list by itself.", &helpOptions,
732                  std.getopt.config.caseSensitive,
733                 "V|version",       "     Print version information and exit.", &versionWanted,
734                 "H|header",        "     Treat the first line of each file as a header.", &hasHeader,
735                 std.getopt.config.caseInsensitive,
736                 "or",              "     Evaluate tests as an OR rather than an AND.", &disjunct,
737                 std.getopt.config.caseSensitive,
738                 "v|invert",        "     Invert the filter, printing lines that do not match.", &invert,
739                 std.getopt.config.caseInsensitive,
740                 "d|delimiter",     "CHR  Field delimiter. Default: TAB. (Single byte UTF-8 characters only.)", &delim,
741 
742                 "empty",           "<field-list>       True if FIELD is empty.", &handlerFldEmpty,
743                 "not-empty",       "<field-list>       True if FIELD is not empty.", &handlerFldNotEmpty,
744                 "blank",           "<field-list>       True if FIELD is empty or all whitespace.", &handlerFldBlank,
745                 "not-blank",       "<field-list>       True if FIELD contains a non-whitespace character.", &handlerFldNotBlank,
746 
747                 "is-numeric",      "<field-list>       True if FIELD is interpretable as a number.", &handlerFldIsNumeric,
748                 "is-finite",       "<field-list>       True if FIELD is interpretable as a number and is not NaN or infinity.", &handlerFldIsFinite,
749                 "is-nan",          "<field-list>       True if FIELD is NaN.", &handlerFldIsNaN,
750                 "is-infinity",     "<field-list>       True if FIELD is infinity.", &handlerFldIsInfinity,
751 
752                 "le",              "<field-list>:NUM   FIELD <= NUM (numeric).", &handlerNumLE,
753                 "lt",              "<field-list>:NUM   FIELD <  NUM (numeric).", &handlerNumLT,
754                 "ge",              "<field-list>:NUM   FIELD >= NUM (numeric).", &handlerNumGE,
755                 "gt",              "<field-list>:NUM   FIELD >  NUM (numeric).", &handlerNumGT,
756                 "eq",              "<field-list>:NUM   FIELD == NUM (numeric).", &handlerNumEQ,
757                 "ne",              "<field-list>:NUM   FIELD != NUM (numeric).", &handlerNumNE,
758 
759                 "str-le",          "<field-list>:STR   FIELD <= STR (string).", &handlerStrLE,
760                 "str-lt",          "<field-list>:STR   FIELD <  STR (string).", &handlerStrLT,
761                 "str-ge",          "<field-list>:STR   FIELD >= STR (string).", &handlerStrGE,
762                 "str-gt",          "<field-list>:STR   FIELD >  STR (string).", &handlerStrGT,
763                 "str-eq",          "<field-list>:STR   FIELD == STR (string).", &handlerStrEQ,
764                 "istr-eq",         "<field-list>:STR   FIELD == STR (string, case-insensitive).", &handlerIStrEQ,
765                 "str-ne",          "<field-list>:STR   FIELD != STR (string).", &handlerStrNE,
766                 "istr-ne",         "<field-list>:STR   FIELD != STR (string, case-insensitive).", &handlerIStrNE,
767                 "str-in-fld",      "<field-list>:STR   FIELD contains STR (substring search).", &handlerStrInFld,
768                 "istr-in-fld",     "<field-list>:STR   FIELD contains STR (substring search, case-insensitive).", &handlerIStrInFld,
769                 "str-not-in-fld",  "<field-list>:STR   FIELD does not contain STR (substring search).", &handlerStrNotInFld,
770                 "istr-not-in-fld", "<field-list>:STR   FIELD does not contain STR (substring search, case-insensitive).", &handlerIStrNotInFld,
771 
772                 "regex",           "<field-list>:REGEX   FIELD matches regular expression.", &handlerRegexMatch,
773                 "iregex",          "<field-list>:REGEX   FIELD matches regular expression, case-insensitive.", &handlerIRegexMatch,
774                 "not-regex",       "<field-list>:REGEX   FIELD does not match regular expression.", &handlerRegexNotMatch,
775                 "not-iregex",      "<field-list>:REGEX   FIELD does not match regular expression, case-insensitive.", &handlerIRegexNotMatch,
776 
777                 "char-len-le",     "<field-list>:NUM   character-length(FIELD) <= NUM.", &handlerCharLenLE,
778                 "char-len-lt",     "<field-list>:NUM   character-length(FIELD) < NUM.", &handlerCharLenLT,
779                 "char-len-ge",     "<field-list>:NUM   character-length(FIELD) >= NUM.", &handlerCharLenGE,
780                 "char-len-gt",     "<field-list>:NUM   character-length(FIELD) > NUM.", &handlerCharLenGT,
781                 "char-len-eq",     "<field-list>:NUM   character-length(FIELD) == NUM.", &handlerCharLenEQ,
782                 "char-len-ne",     "<field-list>:NUM   character-length(FIELD) != NUM.", &handlerCharLenNE,
783 
784                 "byte-len-le",     "<field-list>:NUM   byte-length(FIELD) <= NUM.", &handlerByteLenLE,
785                 "byte-len-lt",     "<field-list>:NUM   byte-length(FIELD) < NUM.", &handlerByteLenLT,
786                 "byte-len-ge",     "<field-list>:NUM   byte-length(FIELD) >= NUM.", &handlerByteLenGE,
787                 "byte-len-gt",     "<field-list>:NUM   byte-length(FIELD) > NUM.", &handlerByteLenGT,
788                 "byte-len-eq",     "<field-list>:NUM   byte-length(FIELD) == NUM.", &handlerByteLenEQ,
789                 "byte-len-ne",     "<field-list>:NUM   byte-length(FIELD) != NUM.", &handlerByteLenNE,
790 
791                 "ff-le",           "FIELD1:FIELD2   FIELD1 <= FIELD2 (numeric).", &handlerFFLE,
792                 "ff-lt",           "FIELD1:FIELD2   FIELD1 <  FIELD2 (numeric).", &handlerFFLT,
793                 "ff-ge",           "FIELD1:FIELD2   FIELD1 >= FIELD2 (numeric).", &handlerFFGE,
794                 "ff-gt",           "FIELD1:FIELD2   FIELD1 >  FIELD2 (numeric).", &handlerFFGT,
795                 "ff-eq",           "FIELD1:FIELD2   FIELD1 == FIELD2 (numeric).", &handlerFFEQ,
796                 "ff-ne",           "FIELD1:FIELD2   FIELD1 != FIELD2 (numeric).", &handlerFFNE,
797                 "ff-str-eq",       "FIELD1:FIELD2   FIELD1 == FIELD2 (string).", &handlerFFStrEQ,
798                 "ff-istr-eq",      "FIELD1:FIELD2   FIELD1 == FIELD2 (string, case-insensitive).", &handlerFFIStrEQ,
799                 "ff-str-ne",       "FIELD1:FIELD2   FIELD1 != FIELD2 (string).", &handlerFFStrNE,
800                 "ff-istr-ne",      "FIELD1:FIELD2   FIELD1 != FIELD2 (string, case-insensitive).", &handlerFFIStrNE,
801 
802                 "ff-absdiff-le",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2) <= NUM", &handlerFFAbsDiffLE,
803                 "ff-absdiff-gt",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2)  > NUM", &handlerFFAbsDiffGT,
804                 "ff-reldiff-le",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2) / min(abs(FIELD1), abs(FIELD2)) <= NUM", &handlerFFRelDiffLE,
805                 "ff-reldiff-gt",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2) / min(abs(FIELD1), abs(FIELD2))  > NUM", &handlerFFRelDiffGT,
806                 );
807 
808             /* Both help texts are a bit long. In this case, for "regular" help, don't
809              * print options, just the text. The text summarizes the options.
810              */
811             if (r.helpWanted)
812             {
813                 stdout.write(helpText);
814                 return tuple(false, 0);
815             }
816             else if (helpVerbose)
817             {
818                 defaultGetoptPrinter(helpTextVerbose, r.options);
819                 return tuple(false, 0);
820             }
821             else if (helpOptions)
822             {
823                 defaultGetoptPrinter(helpTextOptions, r.options);
824                 return tuple(false, 0);
825             }
826             else if (versionWanted)
827             {
828                 import tsv_utils.common.tsvutils_version;
829                 writeln(tsvutilsVersionNotice("tsv-filter"));
830                 return tuple(false, 0);
831             }
832 
833             /* Input files. Remaining command line args are files. */
834             string[] filepaths = (cmdArgs.length > 1) ? cmdArgs[1 .. $] : ["-"];
835             cmdArgs.length = 1;
836             ReadHeader readHeader = hasHeader ? Yes.readHeader : No.readHeader;
837             inputSources = inputSourceRange(filepaths, readHeader);
838         }
839         catch (Exception e)
840         {
841             stderr.writefln("[%s] Error processing command line arguments: %s", programName, e.msg);
842             return tuple(false, 1);
843         }
844         return tuple(true, 0);
845     }
846 }
847 
848 /** tsvFilter processes the input files and runs the tests.
849  */
850 void tsvFilter(ref TsvFilterOptions cmdopt)
851 {
852     import std.algorithm : all, any, splitter;
853     import std.range;
854     import tsv_utils.common.utils : BufferedOutputRange, bufferedByLine, InputSourceRange,
855         throwIfWindowsNewlineOnUnix;
856 
857     /* inputSources must be an InputSourceRange and include at least stdin. */
858     assert(!cmdopt.inputSources.empty);
859     static assert(is(typeof(cmdopt.inputSources) == InputSourceRange));
860 
861     /* BufferedOutputRange improves performance on narrow files with high percentages of
862      * writes. Want responsive output if output is rare, so ensure the first matched
863      * line is written, and that writes separated by long stretches of non-matched lines
864      * are written.
865      */
866     enum maxInputLinesWithoutBufferFlush = 1024;
867     size_t inputLinesWithoutBufferFlush = maxInputLinesWithoutBufferFlush + 1;
868 
869     auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout);
870 
871      /* First header is read during command line argument processing. */
872     if (cmdopt.hasHeader && !cmdopt.inputSources.front.isHeaderEmpty)
873     {
874         auto inputStream = cmdopt.inputSources.front;
875         throwIfWindowsNewlineOnUnix(inputStream.header, inputStream.name, 1);
876         bufferedOutput.appendln(inputStream.header);
877     }
878 
879     /* Process each input file, one line at a time. */
880     immutable size_t fileBodyStartLine = cmdopt.hasHeader ? 2 : 1;
881     auto lineFields = new char[][](cmdopt.maxFieldIndex + 1);
882 
883     foreach (inputStream; cmdopt.inputSources)
884     {
885         if (cmdopt.hasHeader) throwIfWindowsNewlineOnUnix(inputStream.header, inputStream.name, 1);
886 
887         foreach (lineNum, line; inputStream.file.bufferedByLine.enumerate(fileBodyStartLine))
888         {
889             if (lineNum == 1) throwIfWindowsNewlineOnUnix(line, inputStream.name, lineNum);
890 
891             /* Copy the needed number of fields to the fields array. */
892             int fieldIndex = -1;
893             foreach (fieldValue; line.splitter(cmdopt.delim))
894             {
895                 if (fieldIndex == cast(long) cmdopt.maxFieldIndex) break;
896                 fieldIndex++;
897                 lineFields[fieldIndex] = fieldValue;
898             }
899 
900             if (fieldIndex == -1)
901             {
902                 assert(line.length == 0);
903                 /* Bug work-around. Currently empty lines are not handled properly by splitter.
904                  *   Bug: https://issues.dlang.org/show_bug.cgi?id=15735
905                  *   Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030
906                  * Work-around: Point to the line. It's an empty string.
907                  */
908                 fieldIndex++;
909                 lineFields[fieldIndex] = line;
910             }
911 
912             enforce(fieldIndex >= cast(long) cmdopt.maxFieldIndex,
913                     format("Not enough fields in line. File: %s, Line: %s",
914                            inputStream.name, lineNum));
915 
916             /* Run the tests. Tests will fail (throw) if a field cannot be converted
917              * to the expected type.
918              */
919             try
920             {
921                 inputLinesWithoutBufferFlush++;
922                 bool passed = cmdopt.disjunct ?
923                     cmdopt.tests.any!(x => x(lineFields)) :
924                     cmdopt.tests.all!(x => x(lineFields));
925                 if (cmdopt.invert) passed = !passed;
926                 if (passed)
927                 {
928                     const bool wasFlushed = bufferedOutput.appendln(line);
929                     if (wasFlushed) inputLinesWithoutBufferFlush = 0;
930                     else if (inputLinesWithoutBufferFlush > maxInputLinesWithoutBufferFlush)
931                     {
932                         bufferedOutput.flush;
933                         inputLinesWithoutBufferFlush = 0;
934                     }
935                 }
936             }
937             catch (Exception e)
938             {
939                 throw new Exception(
940                     format("Could not process line or field: %s\n  File: %s Line: %s%s",
941                            e.msg, inputStream.name, lineNum,
942                            (lineNum == 1) ? "\n  Is this a header line? Use --header to skip." : ""));
943             }
944         }
945     }
946 }