1 /**
2 Command line tool that filters TSV files.
3 
4 This tool filters tab-delimited files based on numeric or string comparisons
5 against specific fields. See the helpText string for details.
6 
7 Copyright (c) 2015-2020, eBay Inc.
8 Initially written by Jon Degenhardt
9 
10 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt)
11 */
12 module tsv_utils.tsv_filter;
13 
14 import std.algorithm : canFind, equal, findSplit, max, min;
15 import std.conv : to;
16 import std.exception : enforce;
17 import std.format : format;
18 import std.math : abs, isFinite, isInfinity, isNaN;
19 import std.range;
20 import std.regex;
21 import std.stdio;
22 import std..string : isNumeric;
23 import std.typecons;
24 import std.uni: asLowerCase, toLower, byGrapheme;
25 
26 /* The program has two main parts, command line arg processing and processing the input
27  * files. Much of the work is in command line arg processing. This sets up the tests run
28  * against each input line. The tests are an array of delegates (closures) run against the
29  * fields in the line. The tests are based on command line arguments, of which there is
30  * a lengthy set, one for each test.
31  */
32 
33 static if (__VERSION__ >= 2085) extern(C) __gshared string[] rt_options = [ "gcopt=cleanup:none" ];
34 
35 /** Main program. Invokes command line arg processing and tsv-filter to perform
36  * the real work. Any errors are caught and reported.
37  */
38 int main(string[] cmdArgs)
39 {
40     /* When running in DMD code coverage mode, turn on report merging. */
41     version(D_Coverage) version(DigitalMars)
42     {
43         import core.runtime : dmd_coverSetMerge;
44         dmd_coverSetMerge(true);
45     }
46 
47     TsvFilterOptions cmdopt;
48     const r = cmdopt.processArgs(cmdArgs);
49     if (!r[0]) return r[1];
50     version(LDC_Profile)
51     {
52         import ldc.profile : resetAll;
53         resetAll();
54     }
55     try tsvFilter(cmdopt);
56     catch (Exception e)
57     {
58         stderr.writefln("Error [%s]: %s", cmdopt.programName, e.msg);
59         return 1;
60     }
61     return 0;
62 }
63 
64 immutable helpText = q"EOS
65 Synopsis: tsv-filter [options] [file...]
66 
67 Filter tab-delimited files for matching lines via comparison tests against
68 individual fields. Use '--help-verbose' for a more detailed description.
69 
70 Fields are specified using field number or field name. Field names require
71 that the input file has a header line. Use '--help-fields' for details.
72 
73 Global options:
74   --help-verbose      Print full help.
75   --help-options      Print the options list by itself.
76   --help-fields       Print help on specifying fields.
77   --V|version         Print version information and exit.
78   --H|header          Treat the first line of each file as a header.
79   --or                Evaluate tests as an OR rather than an AND clause.
80   --v|invert          Invert the filter, printing lines that do not match.
81   --d|delimiter CHR   Field delimiter. Default: TAB.
82 
83 Operators:
84 * Test if a field is empty (no characters) or blank (empty or whitespace only).
85   Syntax:  --empty|not-empty|blank|not-blank  FIELD
86   Example: --empty name               # True if the 'name' field is empty
87 
88 * Test if a field is numeric, finite, NaN, or infinity
89   Syntax:  --is-numeric|is-finite|is-nan|is-infinity FIELD
90   Example: --is-numeric 5 --gt 5:100  # Ensure field 5 is numeric before --gt test.
91 
92 * Compare a field to a number (integer or float)
93   Syntax:  --eq|ne|lt|le|gt|ge  FIELD:NUM
94   Example: --lt size:1000 --gt weight:0.5  # ('size' < 1000) and ('weight' > 0.5)
95 
96 * Compare a field to a string
97   Syntax:  --str-eq|str-ne|istr-eq|istr-ne  FIELD:STR
98   Example: --str-eq color:red         # True if 'color' field is "red"
99 
100 * Test if a field contains a string (substring search)
101   Syntax:  --str-in-fld|str-not-in-fld|istr-in-fld|istr-not-in-fld  FIELD:STR
102   Example: --str-in-fld color:dark    # True if 'color field contains "dark"
103 
104 * Test if a field matches a regular expression.
105   Syntax:  --regex|iregex|not-regex|not-iregex  FIELD:REGEX
106   Example: --regex '3:ab*c'     # True if field 3 contains "ac", "abc", "abbc", etc.
107 
108 * Test a field's character or byte length
109   Syntax:  --char-len-[le|lt|ge|gt|eq|ne] FIELD:NUM
110            --byte-len-[le|lt|ge|gt|eq|ne] FIELD:NUM
111   Example: --char-len-lt 2:10   # True if field 2 is less than 10 characters long.
112            --byte-len-gt 2:10   # True if field 2 is greater than 10 bytes long.
113 
114 * Field to field comparisons - Similar to field vs literal comparisons, but field vs field.
115   Syntax:  --ff-eq|ff-ne|ff-lt|ff-le|ff-gt|ff-ge  FIELD1:FIELD2
116            --ff-str-eq|ff-str-ne|ff-istr-eq|ff-istr-ne  FIELD1:FIELD2
117   Example: --ff-eq 2:4          # True if fields 2 and 4 are numerically equivalent
118            --ff-str-eq 2:4      # True if fields 2 and 4 are the same strings
119 
120 * Field to field difference comparisons - Absolute and relative difference
121   Syntax:  --ff-absdiff-le|ff-absdiff-gt FIELD1:FIELD2:NUM
122            --ff-reldiff-le|ff-reldiff-gt FIELD1:FIELD2:NUM
123   Example: --ff-absdiff-lt 1:3:0.25   # True if abs(field1 - field2) < 0.25
124 
125 EOS";
126 
127 immutable helpTextVerbose = q"EOS
128 Synopsis: tsv-filter [options] [file...]
129 
130 Filter lines of tab-delimited files via comparison tests against fields.
131 Multiple tests can be specified, by default they are evaluated as an AND
132 clause. Lines satisfying the tests are written to standard output.
133 
134 Typical test syntax is '--op field:value', where 'op' is an operator,
135 'field' is a either a field name and or field number, and 'value' is the
136 comparison basis. For example, '--lt length:500' tests if the 'length'
137 field is less than 500. A more complete example:
138 
139   tsv-filter --header --gt length:50 --lt length:100 --le width:200 data.tsv
140 
141 This outputs all lines from file data.tsv where the 'length' field is
142 greater than 50 and less than 100, and the 'width' field is less than or
143 equal to 200. The header line is also output.
144 
145 Field numbers can also be used to identify fields, and must be used when
146 the input file doesn't have a header line. For example:
147 
148   tsv-filter --gt 1:50 --lt 1:100 --le 2:200 data.tsv
149 
150 Field lists can be used to specify multiple fields at once. For example:
151 
152   tsv-filter --not-blank 1-10 --str-ne 1,2,5:'--' data.tsv
153 
154 tests that fields 1-10 are not blank and fields 1,2,5 are not "--".
155 
156 Wildcarded field names can also be used to specify multiple fields. The
157 following finds lines where any field name ending in '*_id' is empty:
158 
159   tsv-filter -H --or --empty '*_id'
160 
161 Use '--help-fields' for details on using field names.
162 
163 Tests available include:
164   * Test if a field is empty (no characters) or blank (empty or whitespace only).
165   * Test if a field is interpretable as a number, a finite number, NaN, or Infinity.
166   * Compare a field to a number - Numeric equality and relational tests.
167   * Compare a field to a string - String equality and relational tests.
168   * Test if a field matches a regular expression. Case sensitive or insensitive.
169   * Test if a field contains a string. Sub-string search, case sensitive or insensitive.
170   * Test a field's character or byte length.
171   * Field to field comparisons - Similar to the other tests, except comparing
172     one field to another in the same line.
173 
174 Details:
175   * The run is aborted if there are not enough fields in an input line.
176   * Numeric tests will fail and abort the run if a field cannot be interpreted as a
177     number. This includes fields with no text. To avoid this use '--is-numeric' or
178     '--is-finite' prior to the numeric test. For example, '--is-numeric 5 --gt 5:100'
179     ensures field 5 is numeric before running the --gt test.
180   * Regular expression syntax is defined by the D programming language. They follow
181     common conventions (perl, python, etc.). Most common forms work as expected.
182 
183 Options:
184 EOS";
185 
186 immutable helpTextOptions = q"EOS
187 Synopsis: tsv-filter [options] [file...]
188 
189 Options:
190 EOS";
191 
192 /* The next blocks of code define the structure of the boolean tests run against input lines.
193  * This includes function and delegate (closure) signatures, creation mechanisms, option
194  * handlers, etc. Command line arg processing to build the test structure.
195 */
196 
197 /* FieldsPredicate delegate signature - Each input line is run against a set of boolean
198  * tests. Each test is a 'FieldsPredicate'. A FieldsPredicate is a delegate (closure)
199  * containing all info about the test except the field values of the line being tested.
200  * These delegates are created as part of command line arg processing. The wrapped data
201  * includes operation, field indexes, literal values, etc. At run-time the delegate is
202  * passed one argument, the split input line.
203  */
204 alias FieldsPredicate = bool delegate(const char[][] fields);
205 
206 /* FieldsPredicate function signatures - These aliases represent the different function
207  * signatures used in FieldsPredicate delegates. Each alias has a corresponding 'make'
208  * function. The 'make' function takes a real predicate function and closure args and
209  * returns a FieldsPredicate delegate. Predicates types are:
210  *
211  * - FieldUnaryPredicate - Test based on a single field. (e.g. --empty 4)
212  * - FieldVsNumberPredicate - Test based on a field index (used to get the field value)
213  *   and a fixed numeric value. For example, field 2 less than 100 (--lt 2:100).
214  * - FieldVsStringPredicate - Test based on a field and a string. (e.g. --str-eq 2:abc)
215  * - FieldVsIStringPredicate - Case-insensitive test based on a field and a string.
216  *   (e.g. --istr-eq 2:abc)
217  * - FieldVsRegexPredicate - Test based on a field and a regex. (e.g. --regex '2:ab*c')
218  * - FieldVsFieldPredicate - Test based on two fields. (e.g. --ff-le 2:4).
219  *
220  * An actual FieldsPredicate takes the fields from the line and the closure args and
221  * runs the test. For example, a function testing if a field is less than a specific
222  * value would pull the specified field from the fields array, convert the string to
223  * a number, then run the less-than test.
224  */
225 alias FieldUnaryPredicate    = bool function(const char[][] fields, size_t index);
226 alias FieldVsNumberPredicate = bool function(const char[][] fields, size_t index, double value);
227 alias FieldVsStringPredicate = bool function(const char[][] fields, size_t index, string value);
228 alias FieldVsIStringPredicate = bool function(const char[][] fields, size_t index, dstring value);
229 alias FieldVsRegexPredicate  = bool function(const char[][] fields, size_t index, Regex!char value);
230 alias FieldVsFieldPredicate  = bool function(const char[][] fields, size_t index1, size_t index2);
231 alias FieldFieldNumPredicate  = bool function(const char[][] fields, size_t index1, size_t index2, double value);
232 
233 FieldsPredicate makeFieldUnaryDelegate(FieldUnaryPredicate fn, size_t index)
234 {
235     return fields => fn(fields, index);
236 }
237 
238 FieldsPredicate makeFieldVsNumberDelegate(FieldVsNumberPredicate fn, size_t index, double value)
239 {
240     return fields => fn(fields, index, value);
241 }
242 
243 FieldsPredicate makeFieldVsStringDelegate(FieldVsStringPredicate fn, size_t index, string value)
244 {
245     return fields => fn(fields, index, value);
246 }
247 
248 FieldsPredicate makeFieldVsIStringDelegate(FieldVsIStringPredicate fn, size_t index, dstring value)
249 {
250     return fields => fn(fields, index, value);
251 }
252 
253 FieldsPredicate makeFieldVsRegexDelegate(FieldVsRegexPredicate fn, size_t index, Regex!char value)
254 {
255     return fields => fn(fields, index, value);
256 }
257 
258 FieldsPredicate makeFieldVsFieldDelegate(FieldVsFieldPredicate fn, size_t index1, size_t index2)
259 {
260     return fields => fn(fields, index1, index2);
261 }
262 
263 FieldsPredicate makeFieldFieldNumDelegate(FieldFieldNumPredicate fn, size_t index1, size_t index2, double value)
264 {
265     return fields => fn(fields, index1, index2, value);
266 }
267 
268 /* Predicate functions - These are the actual functions used in a FieldsPredicate. They
269  * are a direct reflection of the operators available via command line args. Each matches
270  * one of the FieldsPredicate function aliases defined above.
271  */
272 bool fldEmpty(const char[][] fields, size_t index) { return fields[index].length == 0; }
273 bool fldNotEmpty(const char[][] fields, size_t index) { return fields[index].length != 0; }
274 bool fldBlank(const char[][] fields, size_t index) { return cast(bool) fields[index].matchFirst(ctRegex!`^\s*$`); }
275 bool fldNotBlank(const char[][] fields, size_t index) { return !fields[index].matchFirst(ctRegex!`^\s*$`); }
276 
277 bool fldIsNumeric(const char[][] fields, size_t index) { return fields[index].isNumeric; }
278 bool fldIsFinite(const char[][] fields, size_t index) { return fields[index].isNumeric && fields[index].to!double.isFinite; }
279 bool fldIsNaN(const char[][] fields, size_t index) { return fields[index].isNumeric && fields[index].to!double.isNaN; }
280 bool fldIsInfinity(const char[][] fields, size_t index) { return fields[index].isNumeric && fields[index].to!double.isInfinity; }
281 
282 bool numLE(const char[][] fields, size_t index, double val) { return fields[index].to!double <= val; }
283 bool numLT(const char[][] fields, size_t index, double val) { return fields[index].to!double  < val; }
284 bool numGE(const char[][] fields, size_t index, double val) { return fields[index].to!double >= val; }
285 bool numGT(const char[][] fields, size_t index, double val) { return fields[index].to!double  > val; }
286 bool numEQ(const char[][] fields, size_t index, double val) { return fields[index].to!double == val; }
287 bool numNE(const char[][] fields, size_t index, double val) { return fields[index].to!double != val; }
288 
289 bool strLE(const char[][] fields, size_t index, string val) { return fields[index] <= val; }
290 bool strLT(const char[][] fields, size_t index, string val) { return fields[index]  < val; }
291 bool strGE(const char[][] fields, size_t index, string val) { return fields[index] >= val; }
292 bool strGT(const char[][] fields, size_t index, string val) { return fields[index]  > val; }
293 bool strEQ(const char[][] fields, size_t index, string val) { return fields[index] == val; }
294 bool strNE(const char[][] fields, size_t index, string val) { return fields[index] != val; }
295 bool strInFld(const char[][] fields, size_t index, string val) { return fields[index].canFind(val); }
296 bool strNotInFld(const char[][] fields, size_t index, string val) { return !fields[index].canFind(val); }
297 
298 /* Note: For istr predicates, the command line value has been lower-cased by fieldVsIStringOptionHander.
299  */
300 bool istrEQ(const char[][] fields, size_t index, dstring val) { return fields[index].asLowerCase.equal(val); }
301 bool istrNE(const char[][] fields, size_t index, dstring val) { return !fields[index].asLowerCase.equal(val); }
302 bool istrInFld(const char[][] fields, size_t index, dstring val) { return fields[index].asLowerCase.canFind(val); }
303 bool istrNotInFld(const char[][] fields, size_t index, dstring val) { return !fields[index].asLowerCase.canFind(val); }
304 
305 /* Note: Case-sensitivity is built into the regex value, so these regex predicates are
306  * used for both case-sensitive and case-insensitive regex operators.
307  */
308 bool regexMatch(const char[][] fields, size_t index, Regex!char val) { return cast(bool) fields[index].matchFirst(val); }
309 bool regexNotMatch(const char[][] fields, size_t index, Regex!char val) { return !fields[index].matchFirst(val); }
310 
311 bool charLenLE(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength <= val; }
312 bool charLenLT(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength < val; }
313 bool charLenGE(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength >= val; }
314 bool charLenGT(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength > val; }
315 bool charLenEQ(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength == val; }
316 bool charLenNE(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength != val; }
317 
318 bool byteLenLE(const char[][] fields, size_t index, double val) { return fields[index].length <= val; }
319 bool byteLenLT(const char[][] fields, size_t index, double val) { return fields[index].length < val; }
320 bool byteLenGE(const char[][] fields, size_t index, double val) { return fields[index].length >= val; }
321 bool byteLenGT(const char[][] fields, size_t index, double val) { return fields[index].length > val; }
322 bool byteLenEQ(const char[][] fields, size_t index, double val) { return fields[index].length == val; }
323 bool byteLenNE(const char[][] fields, size_t index, double val) { return fields[index].length != val; }
324 
325 bool ffLE(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double <= fields[index2].to!double; }
326 bool ffLT(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double  < fields[index2].to!double; }
327 bool ffGE(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double >= fields[index2].to!double; }
328 bool ffGT(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double  > fields[index2].to!double; }
329 bool ffEQ(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double == fields[index2].to!double; }
330 bool ffNE(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double != fields[index2].to!double; }
331 bool ffStrEQ(const char[][] fields, size_t index1, size_t index2) { return fields[index1] == fields[index2]; }
332 bool ffStrNE(const char[][] fields, size_t index1, size_t index2) { return fields[index1] != fields[index2]; }
333 bool ffIStrEQ(const char[][] fields, size_t index1, size_t index2)
334 {
335     return equal(fields[index1].asLowerCase, fields[index2].asLowerCase);
336 }
337 bool ffIStrNE(const char[][] fields, size_t index1, size_t index2)
338 {
339     return !equal(fields[index1].asLowerCase, fields[index2].asLowerCase);
340 }
341 
342 auto AbsDiff(double v1, double v2) { return (v1 - v2).abs; }
343 auto RelDiff(double v1, double v2) { return (v1 - v2).abs / min(v1.abs, v2.abs); }
344 
345 bool ffAbsDiffLE(const char[][] fields, size_t index1, size_t index2, double value)
346 {
347     return AbsDiff(fields[index1].to!double, fields[index2].to!double) <= value;
348 }
349 bool ffAbsDiffGT(const char[][] fields, size_t index1, size_t index2, double value)
350 {
351     return AbsDiff(fields[index1].to!double, fields[index2].to!double) > value;
352 }
353 bool ffRelDiffLE(const char[][] fields, size_t index1, size_t index2, double value)
354 {
355     return RelDiff(fields[index1].to!double, fields[index2].to!double) <= value;
356 }
357 bool ffRelDiffGT(const char[][] fields, size_t index1, size_t index2, double value)
358 {
359     return RelDiff(fields[index1].to!double, fields[index2].to!double) > value;
360 }
361 
362 /* Command line option handlers - There is a command line option handler for each
363  * predicate type. That is, one each for FieldUnaryPredicate, FieldVsNumberPredicate,
364  * etc. Option handlers are passed the tests array, the predicate function, and the
365  * command line option arguments. A FieldsPredicate delegate is created and appended to
366  * the tests array. An exception is thrown if errors are detected while processing the
367  * option, the error text is intended for the end user.
368  *
369  * All the option handlers have similar functionality, differing in option processing and
370  * error message generation. fieldVsNumberOptionHandler is described as an example. It
371  * handles command options such as '--lt 3:1000', which tests field 3 for a values less
372  * than 1000. It is passed the tests array, the 'numLE' predicate function used for the
373  * test, and the string "3:1000" representing the option value. It is also passed the
374  * header line from the first input file and an indication of whether header processing
375  * is enabled (--H|header). parseFieldList (fieldlist module) is used to parse the
376  * field-list component of the option ("3" in the example). The comparison value ("1000")
377  * is converted to a double. These are wrapped in a FieldsPredicate delegate which is
378  * added to the tests array. An error is signaled if the option string is invalid.
379  *
380  * During processing, fields indexes are converted from one-based to zero-based. As an
381  * optimization, the maximum field index is also tracked. This allows early termination of
382  * line splitting.
383  *
384  * The header line from the input file is not available when std.getop processes the
385  * command line option. The processing described above must be deferred. This is done
386  * using a 'CmdOptionHandler' delegate. There is a 'make' function for every Command line
387  * option handler that creates these. These are created during std.getopt processing.
388  * They are run when the header line becomes available.
389  *
390  * The final setup for the '--lt' (numeric less-than) operator' is as follows:
391  *   - Function 'handlerNumLE' (in TsvFilterOptions.processArgs) is associated with the
392  *     command line option "--lt <val>". When called by std.getopt it creates an option
393  *     hander delegate via 'makeFieldVsNumberOptionHandler'. This is appended to an
394  *     array of delegates.
395  *   - 'fieldVsNumberOptionHandler' is invoked via the delegate after the header line
396  *     becomes available (in TsvFilterOptions.processArgs). If args are valid,
397  *     'makeFieldVsNumberDelegate' is used to create a delegate invoking the 'numLE'
398  *     predicate function. This delegate is added to the set of run-time tests.
399  *
400  * Note that in the above setup the 'numLE' predicate is specified in 'handlerNumLE'
401  * and passed through all the steps. This is how the command line option gets
402  * associated with the predicate function.
403  */
404 
405 /* CmdOptionHandler delegate signature - This is the call made to process the command
406  * line option arguments after the header line has been read.
407  */
408 alias CmdOptionHandler = void delegate(ref FieldsPredicate[] tests, ref size_t maxFieldIndex,
409                                        bool hasHeader, string[] headerFields);
410 
411 CmdOptionHandler makeFieldUnaryOptionHandler(FieldUnaryPredicate predicateFn, string option, string optionVal)
412 {
413     return
414         (ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields)
415         => fieldUnaryOptionHandler(tests, maxFieldIndex, hasHeader, headerFields, predicateFn, option, optionVal);
416 }
417 
418 void fieldUnaryOptionHandler(
419     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields,
420     FieldUnaryPredicate fn, string option, string optionVal)
421 {
422     import tsv_utils.common.fieldlist;
423 
424     try foreach (fieldNum, fieldIndex;
425                  optionVal
426                  .parseFieldList!(size_t, Yes.convertToZeroBasedIndex)(hasHeader, headerFields)
427                  .enumerate(1))
428         {
429             tests ~= makeFieldUnaryDelegate(fn, fieldIndex);
430             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
431         }
432     catch (Exception e)
433     {
434          e.msg = format("Invalid option: [--%s %s]. %s\n   Expected: '--%s <field>' or '--%s <field-list>'.",
435                         option, optionVal, e.msg, option, option);
436          throw e;
437     }
438 }
439 
440 CmdOptionHandler makeFieldVsNumberOptionHandler(FieldVsNumberPredicate predicateFn, string option, string optionVal)
441 {
442     return
443         (ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields)
444         => fieldVsNumberOptionHandler(tests, maxFieldIndex, hasHeader, headerFields, predicateFn, option, optionVal);
445 }
446 
447 void fieldVsNumberOptionHandler(
448     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields,
449     FieldVsNumberPredicate fn, string option, string optionVal)
450 {
451     import tsv_utils.common.fieldlist;
452 
453     auto formatErrorMsg(string option, string optionVal, string errorMessage="")
454     {
455         string optionalSpace = (errorMessage.length == 0) ? "" : " ";
456         return format(
457             "Invalid option: [--%s %s].%s%s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val> where <val> is a number.",
458             option, optionVal, optionalSpace, errorMessage, option, option);
459     }
460 
461     try
462     {
463         auto optionValParse =
464             optionVal
465             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, No.consumeEntireFieldListString)
466             (hasHeader, headerFields);
467 
468         auto fieldIndices = optionValParse.array;
469         enforce(optionVal.length - optionValParse.consumed > 1, "No value after field list.");
470         double value = optionVal[optionValParse.consumed + 1 .. $].to!double;
471 
472         foreach (fieldIndex; fieldIndices)
473         {
474             tests ~= makeFieldVsNumberDelegate(fn, fieldIndex, value);
475             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
476         }
477     }
478     catch (Exception e)
479     {
480         e.msg = formatErrorMsg(option, optionVal, e.msg);
481         throw e;
482     }
483 }
484 
485 CmdOptionHandler makeFieldVsStringOptionHandler(FieldVsStringPredicate predicateFn, string option, string optionVal)
486 {
487     return
488         (ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields)
489         => fieldVsStringOptionHandler(tests, maxFieldIndex, hasHeader, headerFields, predicateFn, option, optionVal);
490 }
491 
492 void fieldVsStringOptionHandler(
493     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields,
494     FieldVsStringPredicate fn, string option, string optionVal)
495 {
496     import tsv_utils.common.fieldlist;
497 
498     try
499     {
500         auto optionValParse =
501             optionVal
502             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, No.consumeEntireFieldListString)
503             (hasHeader, headerFields);
504 
505         auto fieldIndices = optionValParse.array;
506         enforce(optionVal.length - optionValParse.consumed > 1, "No value after field list.");
507         string value = optionVal[optionValParse.consumed + 1 .. $].idup;
508 
509         foreach (fieldIndex; fieldIndices)
510         {
511             tests ~= makeFieldVsStringDelegate(fn, fieldIndex, value);
512             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
513         }
514 
515     }
516     catch (Exception e)
517     {
518         e.msg = format(
519             "[--%s %s]. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a string.",
520             option, optionVal, e.msg, option, option);
521         throw e;
522     }
523 }
524 
525 CmdOptionHandler makeFieldVsIStringOptionHandler(FieldVsIStringPredicate predicateFn, string option, string optionVal)
526 {
527     return
528         (ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields)
529         => fieldVsIStringOptionHandler(tests, maxFieldIndex, hasHeader, headerFields, predicateFn, option, optionVal);
530 }
531 
532 /* The fieldVsIStringOptionHandler lower-cases the command line argument, assuming the
533  * case-insensitive comparison will be done on lower-cased values.
534  */
535 void fieldVsIStringOptionHandler(
536     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields,
537     FieldVsIStringPredicate fn, string option, string optionVal)
538 {
539     import tsv_utils.common.fieldlist;
540 
541     try
542     {
543         auto optionValParse =
544             optionVal
545             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, No.consumeEntireFieldListString)
546             (hasHeader, headerFields);
547 
548         auto fieldIndices = optionValParse.array;
549         enforce(optionVal.length - optionValParse.consumed > 1, "No value after field list.");
550         string value = optionVal[optionValParse.consumed + 1 .. $].idup;
551 
552         foreach (fieldIndex; fieldIndices)
553         {
554             tests ~= makeFieldVsIStringDelegate(fn, fieldIndex, value.to!dstring.toLower);
555             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
556         }
557     }
558     catch (Exception e)
559     {
560         e.msg = format(
561             "[--%s %s]. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a string.",
562             option, optionVal, e.msg, option, option);
563         throw e;
564     }
565 }
566 
567 CmdOptionHandler makeFieldVsRegexOptionHandler(FieldVsRegexPredicate predicateFn, string option, string optionVal, bool caseSensitive)
568 {
569     return
570         (ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields)
571         => fieldVsRegexOptionHandler(tests, maxFieldIndex, hasHeader, headerFields, predicateFn, option, optionVal, caseSensitive);
572 }
573 
574 void fieldVsRegexOptionHandler(
575     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields,
576     FieldVsRegexPredicate fn, string option, string optionVal, bool caseSensitive)
577 {
578     import tsv_utils.common.fieldlist;
579 
580     try
581     {
582         auto optionValParse =
583             optionVal
584             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, No.consumeEntireFieldListString)
585             (hasHeader, headerFields);
586 
587         auto fieldIndices = optionValParse.array;
588         enforce(optionVal.length - optionValParse.consumed > 1, "No value after field list.");
589 
590         immutable modifiers = caseSensitive ? "" : "i";
591         Regex!char value =
592             optionVal[optionValParse.consumed + 1 .. $]
593             .regex(modifiers);
594 
595         foreach (fieldIndex; fieldIndices)
596         {
597             tests ~= makeFieldVsRegexDelegate(fn, fieldIndex, value);
598             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
599         }
600     }
601     catch (RegexException e)
602     {
603         e.msg = format(
604             "[--%s %s]. Invalid regular expression: %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a regular expression.",
605             option, optionVal, e.msg, option, option);
606         throw e;
607     }
608     catch (Exception e)
609     {
610         e.msg = format(
611             "[--%s %s]. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a regular expression.",
612             option, optionVal, e.msg, option, option);
613         throw e;
614     }
615 }
616 
617 
618 CmdOptionHandler makeFieldVsFieldOptionHandler(FieldVsFieldPredicate predicateFn, string option, string optionVal)
619 {
620     return
621         (ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields)
622         => fieldVsFieldOptionHandler(tests, maxFieldIndex, hasHeader, headerFields, predicateFn, option, optionVal);
623 }
624 
625 void fieldVsFieldOptionHandler(
626     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields,
627     FieldVsFieldPredicate fn, string option, string optionVal)
628 {
629     import tsv_utils.common.fieldlist;
630 
631     try
632     {
633         auto optionValParse =
634             optionVal
635             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, No.consumeEntireFieldListString)
636             (hasHeader, headerFields);
637 
638         auto fieldIndices1 = optionValParse.array;
639 
640         enforce(fieldIndices1.length != 0, "First field argument is empty.");
641         enforce(fieldIndices1.length == 1, "First field argument references multiple fields.");
642         enforce(optionVal.length - optionValParse.consumed > 1, " Second field argument is empty.");
643 
644         auto fieldIndices2 =
645             optionVal[optionValParse.consumed + 1 .. $]
646             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, Yes.consumeEntireFieldListString)
647             (hasHeader, headerFields)
648             .array;
649 
650         enforce(fieldIndices2.length != 0, "Second field argument is empty.");
651         enforce(fieldIndices2.length == 1, "Second field argument references multiple fields.");
652 
653         enforce(fieldIndices1[0] != fieldIndices2[0],
654                 format("Invalid option: '--%s %s'. Field1 and field2 must be different fields", option, optionVal));
655 
656         tests ~= makeFieldVsFieldDelegate(fn, fieldIndices1[0], fieldIndices2[0]);
657         maxFieldIndex = max(maxFieldIndex, fieldIndices1[0], fieldIndices2[0]);
658     }
659     catch (Exception e)
660     {
661         e.msg = format(
662             "[--%s %s]. %s\n   Expected: '--%s <field1>:<field2>' where <field1> and <field2> are individual fields.",
663             option, optionVal, e.msg, option);
664         throw e;
665     }
666 }
667 
668 CmdOptionHandler makeFieldFieldNumOptionHandler(FieldFieldNumPredicate predicateFn, string option, string optionVal)
669 {
670     return
671         (ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields)
672         => fieldFieldNumOptionHandler(tests, maxFieldIndex, hasHeader, headerFields, predicateFn, option, optionVal);
673 }
674 
675 void fieldFieldNumOptionHandler(
676     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields,
677     FieldFieldNumPredicate fn, string option, string optionVal)
678 {
679     import tsv_utils.common.fieldlist;
680 
681     try
682     {
683         auto optionValParse1 =
684             optionVal
685             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, No.consumeEntireFieldListString)
686             (hasHeader, headerFields);
687 
688         auto fieldIndices1 = optionValParse1.array;
689 
690         enforce(fieldIndices1.length != 0, "First field argument is empty.");
691         enforce(fieldIndices1.length == 1, "First field argument references multiple fields.");
692         enforce(optionVal.length - optionValParse1.consumed > 1, " Second field argument is empty.");
693 
694         auto optionValSegment2 = optionVal[optionValParse1.consumed + 1 .. $];
695         auto optionValParse2 =
696             optionValSegment2
697             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, No.consumeEntireFieldListString)
698             (hasHeader, headerFields);
699 
700         auto fieldIndices2 = optionValParse2.array;
701 
702         enforce(fieldIndices2.length != 0, "Second field argument is empty.");
703         enforce(fieldIndices2.length == 1, "Second field argument references multiple fields.");
704         enforce(optionValSegment2.length - optionValParse2.consumed > 1, "Number argument is empty.");
705 
706         size_t field1 = fieldIndices1[0];
707         size_t field2 = fieldIndices2[0];
708         double value = optionValSegment2[optionValParse2.consumed + 1 .. $].to!double;
709 
710         enforce(field1 != field2,
711                 format("Invalid option: '--%s %s'. Field1 and field2 must be different fields", option, optionVal));
712 
713         tests ~= makeFieldFieldNumDelegate(fn, field1, field2, value);
714         maxFieldIndex = max(maxFieldIndex, field1, field2);
715     }
716     catch (Exception e)
717     {
718         e.msg = format(
719             "[--%s %s]. %s\n   Expected: '--%s <field1>:<field2>:<num>' where <field1> and <field2> are individual fields.",
720             option, optionVal, e.msg, option);
721         throw e;
722     }
723 }
724 
725 /** Command line options - This struct holds the results of command line option processing.
726  * It also has a method, processArgs, that invokes command line arg processing.
727  */
728 struct TsvFilterOptions
729 {
730     import tsv_utils.common.utils : inputSourceRange, InputSourceRange, ReadHeader;
731 
732     string programName;
733     InputSourceRange inputSources;   /// Input files
734     FieldsPredicate[] tests;         /// Derived from tests
735     size_t maxFieldIndex;            /// Derived from tests
736     bool hasHeader = false;          /// --H|header
737     bool invert = false;             /// --invert
738     bool disjunct = false;           /// --or
739     char delim = '\t';               /// --delimiter
740 
741     /* Returns a tuple. First value is true if command line arguments were successfully
742      * processed and execution should continue, or false if an error occurred or the user
743      * asked for help. If false, the second value is the appropriate exit code (0 or 1).
744      *
745      * Returning true (execution continues) means args have been validated and the
746      * tests array has been established.
747      */
748     auto processArgs (ref string[] cmdArgs)
749     {
750         import std.algorithm : each;
751         import std.array : split;
752         import std.conv : to;
753         import std.getopt;
754         import std.path : baseName, stripExtension;
755         import tsv_utils.common.getopt_inorder;
756         import tsv_utils.common.utils : throwIfWindowsNewline;
757 
758         bool helpVerbose = false;        // --help-verbose
759         bool helpOptions = false;        // --help-options
760         bool helpFields = false;         // --help-fields
761         bool versionWanted = false;      // --V|version
762 
763         programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name";
764 
765         /* Command option handlers - One handler for each option. These conform to the
766          * getopt required handler signature, and separate knowledge the specific command
767          * option text from the option processing.
768          */
769 
770         CmdOptionHandler[] cmdLineTestOptions;
771 
772         void handlerFldEmpty(string option, string value)    { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldEmpty,    option, value); }
773         void handlerFldNotEmpty(string option, string value) { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldNotEmpty, option, value); }
774         void handlerFldBlank(string option, string value)    { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldBlank,    option, value); }
775         void handlerFldNotBlank(string option, string value) { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldNotBlank, option, value); }
776 
777         void handlerFldIsNumeric(string option, string value)  { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldIsNumeric,  option, value); }
778         void handlerFldIsFinite(string option, string value)   { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldIsFinite,   option, value); }
779         void handlerFldIsNaN(string option, string value)      { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldIsNaN,      option, value); }
780         void handlerFldIsInfinity(string option, string value) { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldIsInfinity, option, value); }
781 
782         void handlerNumLE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&numLE, option, value); }
783         void handlerNumLT(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&numLT, option, value); }
784         void handlerNumGE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&numGE, option, value); }
785         void handlerNumGT(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&numGT, option, value); }
786         void handlerNumEQ(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&numEQ, option, value); }
787         void handlerNumNE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&numNE, option, value); }
788 
789         void handlerStrLE(string option, string value) { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strLE, option, value); }
790         void handlerStrLT(string option, string value) { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strLT, option, value); }
791         void handlerStrGE(string option, string value) { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strGE, option, value); }
792         void handlerStrGT(string option, string value) { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strGT, option, value); }
793         void handlerStrEQ(string option, string value) { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strEQ, option, value); }
794         void handlerStrNE(string option, string value) { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strNE, option, value); }
795 
796         void handlerStrInFld(string option, string value)    { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strInFld,    option, value); }
797         void handlerStrNotInFld(string option, string value) { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strNotInFld, option, value); }
798 
799         void handlerIStrEQ(string option, string value)       { cmdLineTestOptions ~= makeFieldVsIStringOptionHandler(&istrEQ,       option, value); }
800         void handlerIStrNE(string option, string value)       { cmdLineTestOptions ~= makeFieldVsIStringOptionHandler(&istrNE,       option, value); }
801         void handlerIStrInFld(string option, string value)    { cmdLineTestOptions ~= makeFieldVsIStringOptionHandler(&istrInFld,    option, value); }
802         void handlerIStrNotInFld(string option, string value) { cmdLineTestOptions ~= makeFieldVsIStringOptionHandler(&istrNotInFld, option, value); }
803 
804         void handlerRegexMatch(string option, string value)     { cmdLineTestOptions ~= makeFieldVsRegexOptionHandler(&regexMatch,    option, value, true); }
805         void handlerRegexNotMatch(string option, string value)  { cmdLineTestOptions ~= makeFieldVsRegexOptionHandler(&regexNotMatch, option, value, true); }
806         void handlerIRegexMatch(string option, string value)    { cmdLineTestOptions ~= makeFieldVsRegexOptionHandler(&regexMatch,    option, value, false); }
807         void handlerIRegexNotMatch(string option, string value) { cmdLineTestOptions ~= makeFieldVsRegexOptionHandler(&regexNotMatch, option, value, false); }
808 
809         void handlerCharLenLE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&charLenLE, option, value); }
810         void handlerCharLenLT(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&charLenLT, option, value); }
811         void handlerCharLenGE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&charLenGE, option, value); }
812         void handlerCharLenGT(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&charLenGT, option, value); }
813         void handlerCharLenEQ(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&charLenEQ, option, value); }
814         void handlerCharLenNE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&charLenNE, option, value); }
815 
816         void handlerByteLenLE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&byteLenLE, option, value); }
817         void handlerByteLenLT(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&byteLenLT, option, value); }
818         void handlerByteLenGE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&byteLenGE, option, value); }
819         void handlerByteLenGT(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&byteLenGT, option, value); }
820         void handlerByteLenEQ(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&byteLenEQ, option, value); }
821         void handlerByteLenNE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&byteLenNE, option, value); }
822 
823         void handlerFFLE(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffLE, option, value); }
824         void handlerFFLT(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffLT, option, value); }
825         void handlerFFGE(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffGE, option, value); }
826         void handlerFFGT(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffGT, option, value); }
827         void handlerFFEQ(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffEQ, option, value); }
828         void handlerFFNE(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffNE, option, value); }
829 
830         void handlerFFStrEQ(string option, string value)  { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffStrEQ,  option, value); }
831         void handlerFFStrNE(string option, string value)  { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffStrNE,  option, value); }
832         void handlerFFIStrEQ(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffIStrEQ, option, value); }
833         void handlerFFIStrNE(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffIStrNE, option, value); }
834 
835         void handlerFFAbsDiffLE(string option, string value) { cmdLineTestOptions ~= makeFieldFieldNumOptionHandler(&ffAbsDiffLE, option, value); }
836         void handlerFFAbsDiffGT(string option, string value) { cmdLineTestOptions ~= makeFieldFieldNumOptionHandler(&ffAbsDiffGT, option, value); }
837         void handlerFFRelDiffLE(string option, string value) { cmdLineTestOptions ~= makeFieldFieldNumOptionHandler(&ffRelDiffLE, option, value); }
838         void handlerFFRelDiffGT(string option, string value) { cmdLineTestOptions ~= makeFieldFieldNumOptionHandler(&ffRelDiffGT, option, value); }
839 
840         try
841         {
842             arraySep = ",";    // Use comma to separate values in command line options
843             auto r = getoptInorder(
844                 cmdArgs,
845                 "help-verbose",    "     Print full help.", &helpVerbose,
846                 "help-options",    "     Print the options list by itself.", &helpOptions,
847                 "help-fields",     "     Print help on specifying fields.", &helpFields,
848                  std.getopt.config.caseSensitive,
849                 "V|version",       "     Print version information and exit.", &versionWanted,
850                 "H|header",        "     Treat the first line of each file as a header.", &hasHeader,
851                 std.getopt.config.caseInsensitive,
852                 "or",              "     Evaluate tests as an OR rather than an AND.", &disjunct,
853                 std.getopt.config.caseSensitive,
854                 "v|invert",        "     Invert the filter, printing lines that do not match.", &invert,
855                 std.getopt.config.caseInsensitive,
856                 "d|delimiter",     "CHR  Field delimiter. Default: TAB. (Single byte UTF-8 characters only.)", &delim,
857 
858                 "empty",           "<field-list>       True if FIELD is empty.", &handlerFldEmpty,
859                 "not-empty",       "<field-list>       True if FIELD is not empty.", &handlerFldNotEmpty,
860                 "blank",           "<field-list>       True if FIELD is empty or all whitespace.", &handlerFldBlank,
861                 "not-blank",       "<field-list>       True if FIELD contains a non-whitespace character.", &handlerFldNotBlank,
862 
863                 "is-numeric",      "<field-list>       True if FIELD is interpretable as a number.", &handlerFldIsNumeric,
864                 "is-finite",       "<field-list>       True if FIELD is interpretable as a number and is not NaN or infinity.", &handlerFldIsFinite,
865                 "is-nan",          "<field-list>       True if FIELD is NaN.", &handlerFldIsNaN,
866                 "is-infinity",     "<field-list>       True if FIELD is infinity.", &handlerFldIsInfinity,
867 
868                 "le",              "<field-list>:NUM   FIELD <= NUM (numeric).", &handlerNumLE,
869                 "lt",              "<field-list>:NUM   FIELD <  NUM (numeric).", &handlerNumLT,
870                 "ge",              "<field-list>:NUM   FIELD >= NUM (numeric).", &handlerNumGE,
871                 "gt",              "<field-list>:NUM   FIELD >  NUM (numeric).", &handlerNumGT,
872                 "eq",              "<field-list>:NUM   FIELD == NUM (numeric).", &handlerNumEQ,
873                 "ne",              "<field-list>:NUM   FIELD != NUM (numeric).", &handlerNumNE,
874 
875                 "str-le",          "<field-list>:STR   FIELD <= STR (string).", &handlerStrLE,
876                 "str-lt",          "<field-list>:STR   FIELD <  STR (string).", &handlerStrLT,
877                 "str-ge",          "<field-list>:STR   FIELD >= STR (string).", &handlerStrGE,
878                 "str-gt",          "<field-list>:STR   FIELD >  STR (string).", &handlerStrGT,
879                 "str-eq",          "<field-list>:STR   FIELD == STR (string).", &handlerStrEQ,
880                 "istr-eq",         "<field-list>:STR   FIELD == STR (string, case-insensitive).", &handlerIStrEQ,
881                 "str-ne",          "<field-list>:STR   FIELD != STR (string).", &handlerStrNE,
882                 "istr-ne",         "<field-list>:STR   FIELD != STR (string, case-insensitive).", &handlerIStrNE,
883                 "str-in-fld",      "<field-list>:STR   FIELD contains STR (substring search).", &handlerStrInFld,
884                 "istr-in-fld",     "<field-list>:STR   FIELD contains STR (substring search, case-insensitive).", &handlerIStrInFld,
885                 "str-not-in-fld",  "<field-list>:STR   FIELD does not contain STR (substring search).", &handlerStrNotInFld,
886                 "istr-not-in-fld", "<field-list>:STR   FIELD does not contain STR (substring search, case-insensitive).", &handlerIStrNotInFld,
887 
888                 "regex",           "<field-list>:REGEX   FIELD matches regular expression.", &handlerRegexMatch,
889                 "iregex",          "<field-list>:REGEX   FIELD matches regular expression, case-insensitive.", &handlerIRegexMatch,
890                 "not-regex",       "<field-list>:REGEX   FIELD does not match regular expression.", &handlerRegexNotMatch,
891                 "not-iregex",      "<field-list>:REGEX   FIELD does not match regular expression, case-insensitive.", &handlerIRegexNotMatch,
892 
893                 "char-len-le",     "<field-list>:NUM   character-length(FIELD) <= NUM.", &handlerCharLenLE,
894                 "char-len-lt",     "<field-list>:NUM   character-length(FIELD) < NUM.", &handlerCharLenLT,
895                 "char-len-ge",     "<field-list>:NUM   character-length(FIELD) >= NUM.", &handlerCharLenGE,
896                 "char-len-gt",     "<field-list>:NUM   character-length(FIELD) > NUM.", &handlerCharLenGT,
897                 "char-len-eq",     "<field-list>:NUM   character-length(FIELD) == NUM.", &handlerCharLenEQ,
898                 "char-len-ne",     "<field-list>:NUM   character-length(FIELD) != NUM.", &handlerCharLenNE,
899 
900                 "byte-len-le",     "<field-list>:NUM   byte-length(FIELD) <= NUM.", &handlerByteLenLE,
901                 "byte-len-lt",     "<field-list>:NUM   byte-length(FIELD) < NUM.", &handlerByteLenLT,
902                 "byte-len-ge",     "<field-list>:NUM   byte-length(FIELD) >= NUM.", &handlerByteLenGE,
903                 "byte-len-gt",     "<field-list>:NUM   byte-length(FIELD) > NUM.", &handlerByteLenGT,
904                 "byte-len-eq",     "<field-list>:NUM   byte-length(FIELD) == NUM.", &handlerByteLenEQ,
905                 "byte-len-ne",     "<field-list>:NUM   byte-length(FIELD) != NUM.", &handlerByteLenNE,
906 
907                 "ff-le",           "FIELD1:FIELD2   FIELD1 <= FIELD2 (numeric).", &handlerFFLE,
908                 "ff-lt",           "FIELD1:FIELD2   FIELD1 <  FIELD2 (numeric).", &handlerFFLT,
909                 "ff-ge",           "FIELD1:FIELD2   FIELD1 >= FIELD2 (numeric).", &handlerFFGE,
910                 "ff-gt",           "FIELD1:FIELD2   FIELD1 >  FIELD2 (numeric).", &handlerFFGT,
911                 "ff-eq",           "FIELD1:FIELD2   FIELD1 == FIELD2 (numeric).", &handlerFFEQ,
912                 "ff-ne",           "FIELD1:FIELD2   FIELD1 != FIELD2 (numeric).", &handlerFFNE,
913                 "ff-str-eq",       "FIELD1:FIELD2   FIELD1 == FIELD2 (string).", &handlerFFStrEQ,
914                 "ff-istr-eq",      "FIELD1:FIELD2   FIELD1 == FIELD2 (string, case-insensitive).", &handlerFFIStrEQ,
915                 "ff-str-ne",       "FIELD1:FIELD2   FIELD1 != FIELD2 (string).", &handlerFFStrNE,
916                 "ff-istr-ne",      "FIELD1:FIELD2   FIELD1 != FIELD2 (string, case-insensitive).", &handlerFFIStrNE,
917 
918                 "ff-absdiff-le",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2) <= NUM", &handlerFFAbsDiffLE,
919                 "ff-absdiff-gt",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2)  > NUM", &handlerFFAbsDiffGT,
920                 "ff-reldiff-le",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2) / min(abs(FIELD1), abs(FIELD2)) <= NUM", &handlerFFRelDiffLE,
921                 "ff-reldiff-gt",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2) / min(abs(FIELD1), abs(FIELD2))  > NUM", &handlerFFRelDiffGT,
922                 );
923 
924             /* Both help texts are a bit long. In this case, for "regular" help, don't
925              * print options, just the text. The text summarizes the options.
926              */
927             if (r.helpWanted)
928             {
929                 stdout.write(helpText);
930                 return tuple(false, 0);
931             }
932             else if (helpVerbose)
933             {
934                 defaultGetoptPrinter(helpTextVerbose, r.options);
935                 return tuple(false, 0);
936             }
937             else if (helpOptions)
938             {
939                 defaultGetoptPrinter(helpTextOptions, r.options);
940                 return tuple(false, 0);
941             }
942             else if (helpFields)
943             {
944                 import tsv_utils.common.fieldlist : fieldListHelpText ;
945                 writeln(fieldListHelpText);
946                 return tuple(false, 0);
947             }
948             else if (versionWanted)
949             {
950                 import tsv_utils.common.tsvutils_version;
951                 writeln(tsvutilsVersionNotice("tsv-filter"));
952                 return tuple(false, 0);
953             }
954 
955             /* Input files. Remaining command line args are files. */
956             string[] filepaths = (cmdArgs.length > 1) ? cmdArgs[1 .. $] : ["-"];
957             cmdArgs.length = 1;
958 
959             string[] headerFields;
960 
961             /* FieldListArgProcessing encapsulates the field list processing. It is
962              * called prior to reading the header line if headers are not being used,
963              * and after if headers are being used.
964              */
965             void fieldListArgProcessing()
966             {
967                 cmdLineTestOptions.each!(dg => dg(tests, maxFieldIndex, hasHeader, headerFields));
968             }
969 
970             if (!hasHeader) fieldListArgProcessing();
971 
972             ReadHeader readHeader = hasHeader ? Yes.readHeader : No.readHeader;
973             inputSources = inputSourceRange(filepaths, readHeader);
974 
975             if (hasHeader)
976             {
977                 throwIfWindowsNewline(inputSources.front.header, inputSources.front.name, 1);
978                 headerFields = inputSources.front.header.split(delim).to!(string[]);
979                 fieldListArgProcessing();
980             }
981         }
982         catch (Exception e)
983         {
984             stderr.writefln("[%s] Error processing command line arguments: %s", programName, e.msg);
985             return tuple(false, 1);
986         }
987         return tuple(true, 0);
988     }
989 }
990 
991 /** tsvFilter processes the input files and runs the tests.
992  */
993 void tsvFilter(ref TsvFilterOptions cmdopt)
994 {
995     import std.algorithm : all, any, splitter;
996     import std.range;
997     import tsv_utils.common.utils : BufferedOutputRange, bufferedByLine, InputSourceRange,
998         throwIfWindowsNewline;
999 
1000     /* inputSources must be an InputSourceRange and include at least stdin. */
1001     assert(!cmdopt.inputSources.empty);
1002     static assert(is(typeof(cmdopt.inputSources) == InputSourceRange));
1003 
1004     /* BufferedOutputRange improves performance on narrow files with high percentages of
1005      * writes. Want responsive output if output is rare, so ensure the first matched
1006      * line is written, and that writes separated by long stretches of non-matched lines
1007      * are written.
1008      */
1009     enum maxInputLinesWithoutBufferFlush = 1024;
1010     size_t inputLinesWithoutBufferFlush = maxInputLinesWithoutBufferFlush + 1;
1011 
1012     auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout);
1013 
1014      /* First header is read during command line argument processing. Immediately
1015       * flush it so subsequent processes in a unix command pipeline see it early.
1016       * This helps provide timely error messages.
1017       */
1018     if (cmdopt.hasHeader && !cmdopt.inputSources.front.isHeaderEmpty)
1019     {
1020         auto inputStream = cmdopt.inputSources.front;
1021         bufferedOutput.appendln(inputStream.header);
1022         bufferedOutput.flush;
1023     }
1024 
1025     /* Process each input file, one line at a time. */
1026     immutable size_t fileBodyStartLine = cmdopt.hasHeader ? 2 : 1;
1027     auto lineFields = new char[][](cmdopt.maxFieldIndex + 1);
1028 
1029     foreach (inputStream; cmdopt.inputSources)
1030     {
1031         if (cmdopt.hasHeader) throwIfWindowsNewline(inputStream.header, inputStream.name, 1);
1032 
1033         foreach (lineNum, line; inputStream.file.bufferedByLine.enumerate(fileBodyStartLine))
1034         {
1035             if (lineNum == 1) throwIfWindowsNewline(line, inputStream.name, lineNum);
1036 
1037             /* Copy the needed number of fields to the fields array. */
1038             int fieldIndex = -1;
1039             foreach (fieldValue; line.splitter(cmdopt.delim))
1040             {
1041                 if (fieldIndex == cast(long) cmdopt.maxFieldIndex) break;
1042                 fieldIndex++;
1043                 lineFields[fieldIndex] = fieldValue;
1044             }
1045 
1046             if (fieldIndex == -1)
1047             {
1048                 assert(line.length == 0);
1049                 /* Bug work-around. Currently empty lines are not handled properly by splitter.
1050                  *   Bug: https://issues.dlang.org/show_bug.cgi?id=15735
1051                  *   Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030
1052                  * Work-around: Point to the line. It's an empty string.
1053                  */
1054                 fieldIndex++;
1055                 lineFields[fieldIndex] = line;
1056             }
1057 
1058             enforce(fieldIndex >= cast(long) cmdopt.maxFieldIndex,
1059                     format("Not enough fields in line. File: %s, Line: %s",
1060                            inputStream.name, lineNum));
1061 
1062             /* Run the tests. Tests will fail (throw) if a field cannot be converted
1063              * to the expected type.
1064              */
1065             try
1066             {
1067                 inputLinesWithoutBufferFlush++;
1068                 bool passed = cmdopt.disjunct ?
1069                     cmdopt.tests.any!(x => x(lineFields)) :
1070                     cmdopt.tests.all!(x => x(lineFields));
1071                 if (cmdopt.invert) passed = !passed;
1072                 if (passed)
1073                 {
1074                     const bool wasFlushed = bufferedOutput.appendln(line);
1075                     if (wasFlushed) inputLinesWithoutBufferFlush = 0;
1076                     else if (inputLinesWithoutBufferFlush > maxInputLinesWithoutBufferFlush)
1077                     {
1078                         bufferedOutput.flush;
1079                         inputLinesWithoutBufferFlush = 0;
1080                     }
1081                 }
1082             }
1083             catch (Exception e)
1084             {
1085                 throw new Exception(
1086                     format("Could not process line or field: %s\n  File: %s Line: %s%s",
1087                            e.msg, inputStream.name, lineNum,
1088                            (lineNum == 1) ? "\n  Is this a header line? Use --header to skip." : ""));
1089             }
1090         }
1091     }
1092 }