1 /**
2 Command line tool that filters TSV files.
3 
4 This tool filters tab-delimited files based on numeric or string comparisons
5 against specific fields. See the helpText string for details.
6 
7 Copyright (c) 2015-2021, eBay Inc.
8 Initially written by Jon Degenhardt
9 
10 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt)
11 */
12 module tsv_utils.tsv_filter;
13 
14 import std.algorithm : canFind, equal, findSplit, max, min;
15 import std.conv : to;
16 import std.exception : enforce;
17 import std.format : format;
18 import std.math : abs, isFinite, isInfinity, isNaN;
19 import std.range;
20 import std.regex;
21 import std.stdio;
22 import std..string : isNumeric;
23 import std.typecons;
24 import std.uni: asLowerCase, toLower, byGrapheme;
25 
26 /* The program has two main parts, command line arg processing and processing the input
27  * files. Much of the work is in command line arg processing. This sets up the tests run
28  * against each input line. The tests are an array of delegates (closures) run against the
29  * fields in the line. The tests are based on command line arguments, of which there is
30  * a lengthy set, one for each test.
31  */
32 
33 static if (__VERSION__ >= 2085) extern(C) __gshared string[] rt_options = [ "gcopt=cleanup:none" ];
34 
35 /** Main program. Invokes command line arg processing and tsv-filter to perform
36  * the real work. Any errors are caught and reported.
37  */
38 int main(string[] cmdArgs)
39 {
40     /* When running in DMD code coverage mode, turn on report merging. */
41     version(D_Coverage) version(DigitalMars)
42     {
43         import core.runtime : dmd_coverSetMerge;
44         dmd_coverSetMerge(true);
45     }
46 
47     TsvFilterOptions cmdopt;
48     const r = cmdopt.processArgs(cmdArgs);
49     if (!r[0]) return r[1];
50     version(LDC_Profile)
51     {
52         import ldc.profile : resetAll;
53         resetAll();
54     }
55     try tsvFilterCommand(cmdopt);
56     catch (Exception e)
57     {
58         stderr.writefln("Error [%s]: %s", cmdopt.programName, e.msg);
59         return 1;
60     }
61     return 0;
62 }
63 
64 immutable helpText = q"EOS
65 Synopsis: tsv-filter [options] [file...]
66 
67 Filter tab-delimited files for matching lines via comparison tests against
68 individual fields. Use '--help-verbose' for a more detailed description.
69 
70 Fields are specified using field number or field name. Field names require
71 that the input file has a header line. Use '--help-fields' for details.
72 
73 Global options:
74   --help-verbose      Print full help.
75   --help-options      Print the options list by itself.
76   --help-fields       Print help on specifying fields.
77   --V|version         Print version information and exit.
78   --H|header          Treat the first line of each file as a header.
79   --or                Evaluate tests as an OR rather than an AND clause.
80   --v|invert          Invert the filter, printing lines that do not match.
81   --c|count           Print only a count of the matched lines.
82   --d|delimiter CHR   Field delimiter. Default: TAB.
83   --label STR         Rather than filter, mark each record as passing the
84                          filter or not. STR is the header, ignored if there
85                          is no header line.
86   --label-values STR1:STR2
87                       The pass/no-pass values used by '--label'. Defaults
88                          to '1' and '0'.
89   --line-buffered     Immediately output every matched line.
90 
91 Operators:
92 * Test if a field is empty (no characters) or blank (empty or whitespace only).
93   Syntax:  --empty|not-empty|blank|not-blank  FIELD
94   Example: --empty name               # True if the 'name' field is empty
95 
96 * Test if a field is numeric, finite, NaN, or infinity
97   Syntax:  --is-numeric|is-finite|is-nan|is-infinity FIELD
98   Example: --is-numeric 5 --gt 5:100  # Ensure field 5 is numeric before --gt test.
99 
100 * Compare a field to a number (integer or float)
101   Syntax:  --eq|ne|lt|le|gt|ge  FIELD:NUM
102   Example: --lt size:1000 --gt weight:0.5  # ('size' < 1000) and ('weight' > 0.5)
103 
104 * Compare a field to a string
105   Syntax:  --str-eq|str-ne|istr-eq|istr-ne  FIELD:STR
106   Example: --str-eq color:red         # True if 'color' field is "red"
107 
108 * Test if a field contains a string (substring search)
109   Syntax:  --str-in-fld|str-not-in-fld|istr-in-fld|istr-not-in-fld  FIELD:STR
110   Example: --str-in-fld color:dark    # True if 'color field contains "dark"
111 
112 * Test if a field matches a regular expression.
113   Syntax:  --regex|iregex|not-regex|not-iregex  FIELD:REGEX
114   Example: --regex '3:ab*c'     # True if field 3 contains "ac", "abc", "abbc", etc.
115 
116 * Test a field's character or byte length
117   Syntax:  --char-len-[le|lt|ge|gt|eq|ne] FIELD:NUM
118            --byte-len-[le|lt|ge|gt|eq|ne] FIELD:NUM
119   Example: --char-len-lt 2:10   # True if field 2 is less than 10 characters long.
120            --byte-len-gt 2:10   # True if field 2 is greater than 10 bytes long.
121 
122 * Field to field comparisons - Similar to field vs literal comparisons, but field vs field.
123   Syntax:  --ff-eq|ff-ne|ff-lt|ff-le|ff-gt|ff-ge  FIELD1:FIELD2
124            --ff-str-eq|ff-str-ne|ff-istr-eq|ff-istr-ne  FIELD1:FIELD2
125   Example: --ff-eq 2:4          # True if fields 2 and 4 are numerically equivalent
126            --ff-str-eq 2:4      # True if fields 2 and 4 are the same strings
127 
128 * Field to field difference comparisons - Absolute and relative difference
129   Syntax:  --ff-absdiff-le|ff-absdiff-gt FIELD1:FIELD2:NUM
130            --ff-reldiff-le|ff-reldiff-gt FIELD1:FIELD2:NUM
131   Example: --ff-absdiff-lt 1:3:0.25   # True if abs(field1 - field2) < 0.25
132 
133 EOS";
134 
135 immutable helpTextVerbose = q"EOS
136 Synopsis: tsv-filter [options] [file...]
137 
138 Filter lines of tab-delimited files via comparison tests against fields.
139 Multiple tests can be specified, by default they are evaluated as an AND
140 clause. Lines satisfying the tests are written to standard output.
141 
142 Typical test syntax is '--op field:value', where 'op' is an operator,
143 'field' is a either a field name and or field number, and 'value' is the
144 comparison basis. For example, '--lt length:500' tests if the 'length'
145 field is less than 500. A more complete example:
146 
147   tsv-filter --header --gt length:50 --lt length:100 --le width:200 data.tsv
148 
149 This outputs all lines from file data.tsv where the 'length' field is
150 greater than 50 and less than 100, and the 'width' field is less than or
151 equal to 200. The header line is also output.
152 
153 Field numbers can also be used to identify fields, and must be used when
154 the input file doesn't have a header line. For example:
155 
156   tsv-filter --gt 1:50 --lt 1:100 --le 2:200 data.tsv
157 
158 Field lists can be used to specify multiple fields at once. For example:
159 
160   tsv-filter --not-blank 1-10 --str-ne 1,2,5:'--' data.tsv
161 
162 tests that fields 1-10 are not blank and fields 1,2,5 are not "--".
163 
164 Wildcarded field names can also be used to specify multiple fields. The
165 following finds lines where any field name ending in '*_id' is empty:
166 
167   tsv-filter -H --or --empty '*_id'
168 
169 Use '--help-fields' for details on using field names.
170 
171 Tests available include:
172   * Test if a field is empty (no characters) or blank (empty or whitespace only).
173   * Test if a field is interpretable as a number, a finite number, NaN, or Infinity.
174   * Compare a field to a number - Numeric equality and relational tests.
175   * Compare a field to a string - String equality and relational tests.
176   * Test if a field matches a regular expression. Case sensitive or insensitive.
177   * Test if a field contains a string. Sub-string search, case sensitive or insensitive.
178   * Test a field's character or byte length.
179   * Field to field comparisons - Similar to the other tests, except comparing
180     one field to another in the same line.
181 
182 As an alternative to filtering, records can be marked to indicate if they meet
183 the filter criteria or not. For example, the following will add a field to each
184 record indicating if the 'Color' field is a primary color.
185 
186   tsv-filter -H --or --str-eq Color:Red --str-eq Color:Yellow str-eq Color:Blue \
187   --label IsPrimaryColor data.tsv
188 
189 Values default to '1' and '0' and can be changed using '--label-values'. The
190 header name pass to '--label' is ignored if headers are not being used.
191 
192 Details:
193   * The run is aborted if there are not enough fields in an input line.
194   * Numeric tests will fail and abort the run if a field cannot be interpreted as a
195     number. This includes fields with no text. To avoid this use '--is-numeric' or
196     '--is-finite' prior to the numeric test. For example, '--is-numeric 5 --gt 5:100'
197     ensures field 5 is numeric before running the --gt test.
198   * Regular expression syntax is defined by the D programming language. They follow
199     common conventions (perl, python, etc.). Most common forms work as expected.
200   * Output is buffered by default to improve performance. Use '--line-buffered' to
201     have each matched line immediately written out.
202 
203 Options:
204 EOS";
205 
206 immutable helpTextOptions = q"EOS
207 Synopsis: tsv-filter [options] [file...]
208 
209 Options:
210 EOS";
211 
212 /* The next blocks of code define the structure of the boolean tests run against input lines.
213  * This includes function and delegate (closure) signatures, creation mechanisms, option
214  * handlers, etc. Command line arg processing to build the test structure.
215 */
216 
217 /* FieldsPredicate delegate signature - Each input line is run against a set of boolean
218  * tests. Each test is a 'FieldsPredicate'. A FieldsPredicate is a delegate (closure)
219  * containing all info about the test except the field values of the line being tested.
220  * These delegates are created as part of command line arg processing. The wrapped data
221  * includes operation, field indexes, literal values, etc. At run-time the delegate is
222  * passed one argument, the split input line.
223  */
224 alias FieldsPredicate = bool delegate(const char[][] fields);
225 
226 /* FieldsPredicate function signatures - These aliases represent the different function
227  * signatures used in FieldsPredicate delegates. Each alias has a corresponding 'make'
228  * function. The 'make' function takes a real predicate function and closure args and
229  * returns a FieldsPredicate delegate. Predicates types are:
230  *
231  * - FieldUnaryPredicate - Test based on a single field. (e.g. --empty 4)
232  * - FieldVsNumberPredicate - Test based on a field index (used to get the field value)
233  *   and a fixed numeric value. For example, field 2 less than 100 (--lt 2:100).
234  * - FieldVsStringPredicate - Test based on a field and a string. (e.g. --str-eq 2:abc)
235  * - FieldVsIStringPredicate - Case-insensitive test based on a field and a string.
236  *   (e.g. --istr-eq 2:abc)
237  * - FieldVsRegexPredicate - Test based on a field and a regex. (e.g. --regex '2:ab*c')
238  * - FieldVsFieldPredicate - Test based on two fields. (e.g. --ff-le 2:4).
239  *
240  * An actual FieldsPredicate takes the fields from the line and the closure args and
241  * runs the test. For example, a function testing if a field is less than a specific
242  * value would pull the specified field from the fields array, convert the string to
243  * a number, then run the less-than test.
244  */
245 alias FieldUnaryPredicate    = bool function(const char[][] fields, size_t index);
246 alias FieldVsNumberPredicate = bool function(const char[][] fields, size_t index, double value);
247 alias FieldVsStringPredicate = bool function(const char[][] fields, size_t index, string value);
248 alias FieldVsIStringPredicate = bool function(const char[][] fields, size_t index, dstring value);
249 alias FieldVsRegexPredicate  = bool function(const char[][] fields, size_t index, Regex!char value);
250 alias FieldVsFieldPredicate  = bool function(const char[][] fields, size_t index1, size_t index2);
251 alias FieldFieldNumPredicate  = bool function(const char[][] fields, size_t index1, size_t index2, double value);
252 
253 FieldsPredicate makeFieldUnaryDelegate(FieldUnaryPredicate fn, size_t index)
254 {
255     return fields => fn(fields, index);
256 }
257 
258 FieldsPredicate makeFieldVsNumberDelegate(FieldVsNumberPredicate fn, size_t index, double value)
259 {
260     return fields => fn(fields, index, value);
261 }
262 
263 FieldsPredicate makeFieldVsStringDelegate(FieldVsStringPredicate fn, size_t index, string value)
264 {
265     return fields => fn(fields, index, value);
266 }
267 
268 FieldsPredicate makeFieldVsIStringDelegate(FieldVsIStringPredicate fn, size_t index, dstring value)
269 {
270     return fields => fn(fields, index, value);
271 }
272 
273 FieldsPredicate makeFieldVsRegexDelegate(FieldVsRegexPredicate fn, size_t index, Regex!char value)
274 {
275     return fields => fn(fields, index, value);
276 }
277 
278 FieldsPredicate makeFieldVsFieldDelegate(FieldVsFieldPredicate fn, size_t index1, size_t index2)
279 {
280     return fields => fn(fields, index1, index2);
281 }
282 
283 FieldsPredicate makeFieldFieldNumDelegate(FieldFieldNumPredicate fn, size_t index1, size_t index2, double value)
284 {
285     return fields => fn(fields, index1, index2, value);
286 }
287 
288 /* Predicate functions - These are the actual functions used in a FieldsPredicate. They
289  * are a direct reflection of the operators available via command line args. Each matches
290  * one of the FieldsPredicate function aliases defined above.
291  */
292 bool fldEmpty(const char[][] fields, size_t index) { return fields[index].length == 0; }
293 bool fldNotEmpty(const char[][] fields, size_t index) { return fields[index].length != 0; }
294 bool fldBlank(const char[][] fields, size_t index) { return cast(bool) fields[index].matchFirst(ctRegex!`^\s*$`); }
295 bool fldNotBlank(const char[][] fields, size_t index) { return !fields[index].matchFirst(ctRegex!`^\s*$`); }
296 
297 bool fldIsNumeric(const char[][] fields, size_t index) { return fields[index].isNumeric; }
298 bool fldIsFinite(const char[][] fields, size_t index) { return fields[index].isNumeric && fields[index].to!double.isFinite; }
299 bool fldIsNaN(const char[][] fields, size_t index) { return fields[index].isNumeric && fields[index].to!double.isNaN; }
300 bool fldIsInfinity(const char[][] fields, size_t index) { return fields[index].isNumeric && fields[index].to!double.isInfinity; }
301 
302 bool numLE(const char[][] fields, size_t index, double val) { return fields[index].to!double <= val; }
303 bool numLT(const char[][] fields, size_t index, double val) { return fields[index].to!double  < val; }
304 bool numGE(const char[][] fields, size_t index, double val) { return fields[index].to!double >= val; }
305 bool numGT(const char[][] fields, size_t index, double val) { return fields[index].to!double  > val; }
306 bool numEQ(const char[][] fields, size_t index, double val) { return fields[index].to!double == val; }
307 bool numNE(const char[][] fields, size_t index, double val) { return fields[index].to!double != val; }
308 
309 bool strLE(const char[][] fields, size_t index, string val) { return fields[index] <= val; }
310 bool strLT(const char[][] fields, size_t index, string val) { return fields[index]  < val; }
311 bool strGE(const char[][] fields, size_t index, string val) { return fields[index] >= val; }
312 bool strGT(const char[][] fields, size_t index, string val) { return fields[index]  > val; }
313 bool strEQ(const char[][] fields, size_t index, string val) { return fields[index] == val; }
314 bool strNE(const char[][] fields, size_t index, string val) { return fields[index] != val; }
315 bool strInFld(const char[][] fields, size_t index, string val) { return fields[index].canFind(val); }
316 bool strNotInFld(const char[][] fields, size_t index, string val) { return !fields[index].canFind(val); }
317 
318 /* Note: For istr predicates, the command line value has been lower-cased by fieldVsIStringOptionHander.
319  */
320 bool istrEQ(const char[][] fields, size_t index, dstring val) { return fields[index].asLowerCase.equal(val); }
321 bool istrNE(const char[][] fields, size_t index, dstring val) { return !fields[index].asLowerCase.equal(val); }
322 bool istrInFld(const char[][] fields, size_t index, dstring val) { return fields[index].asLowerCase.canFind(val); }
323 bool istrNotInFld(const char[][] fields, size_t index, dstring val) { return !fields[index].asLowerCase.canFind(val); }
324 
325 /* Note: Case-sensitivity is built into the regex value, so these regex predicates are
326  * used for both case-sensitive and case-insensitive regex operators.
327  */
328 bool regexMatch(const char[][] fields, size_t index, Regex!char val) { return cast(bool) fields[index].matchFirst(val); }
329 bool regexNotMatch(const char[][] fields, size_t index, Regex!char val) { return !fields[index].matchFirst(val); }
330 
331 bool charLenLE(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength <= val; }
332 bool charLenLT(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength < val; }
333 bool charLenGE(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength >= val; }
334 bool charLenGT(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength > val; }
335 bool charLenEQ(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength == val; }
336 bool charLenNE(const char[][] fields, size_t index, double val) { return fields[index].byGrapheme.walkLength != val; }
337 
338 bool byteLenLE(const char[][] fields, size_t index, double val) { return fields[index].length <= val; }
339 bool byteLenLT(const char[][] fields, size_t index, double val) { return fields[index].length < val; }
340 bool byteLenGE(const char[][] fields, size_t index, double val) { return fields[index].length >= val; }
341 bool byteLenGT(const char[][] fields, size_t index, double val) { return fields[index].length > val; }
342 bool byteLenEQ(const char[][] fields, size_t index, double val) { return fields[index].length == val; }
343 bool byteLenNE(const char[][] fields, size_t index, double val) { return fields[index].length != val; }
344 
345 bool ffLE(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double <= fields[index2].to!double; }
346 bool ffLT(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double  < fields[index2].to!double; }
347 bool ffGE(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double >= fields[index2].to!double; }
348 bool ffGT(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double  > fields[index2].to!double; }
349 bool ffEQ(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double == fields[index2].to!double; }
350 bool ffNE(const char[][] fields, size_t index1, size_t index2) { return fields[index1].to!double != fields[index2].to!double; }
351 bool ffStrEQ(const char[][] fields, size_t index1, size_t index2) { return fields[index1] == fields[index2]; }
352 bool ffStrNE(const char[][] fields, size_t index1, size_t index2) { return fields[index1] != fields[index2]; }
353 bool ffIStrEQ(const char[][] fields, size_t index1, size_t index2)
354 {
355     return equal(fields[index1].asLowerCase, fields[index2].asLowerCase);
356 }
357 bool ffIStrNE(const char[][] fields, size_t index1, size_t index2)
358 {
359     return !equal(fields[index1].asLowerCase, fields[index2].asLowerCase);
360 }
361 
362 auto AbsDiff(double v1, double v2) { return (v1 - v2).abs; }
363 auto RelDiff(double v1, double v2) { return (v1 - v2).abs / min(v1.abs, v2.abs); }
364 
365 bool ffAbsDiffLE(const char[][] fields, size_t index1, size_t index2, double value)
366 {
367     return AbsDiff(fields[index1].to!double, fields[index2].to!double) <= value;
368 }
369 bool ffAbsDiffGT(const char[][] fields, size_t index1, size_t index2, double value)
370 {
371     return AbsDiff(fields[index1].to!double, fields[index2].to!double) > value;
372 }
373 bool ffRelDiffLE(const char[][] fields, size_t index1, size_t index2, double value)
374 {
375     return RelDiff(fields[index1].to!double, fields[index2].to!double) <= value;
376 }
377 bool ffRelDiffGT(const char[][] fields, size_t index1, size_t index2, double value)
378 {
379     return RelDiff(fields[index1].to!double, fields[index2].to!double) > value;
380 }
381 
382 /* Command line option handlers - There is a command line option handler for each
383  * predicate type. That is, one each for FieldUnaryPredicate, FieldVsNumberPredicate,
384  * etc. Option handlers are passed the tests array, the predicate function, and the
385  * command line option arguments. A FieldsPredicate delegate is created and appended to
386  * the tests array. An exception is thrown if errors are detected while processing the
387  * option, the error text is intended for the end user.
388  *
389  * All the option handlers have similar functionality, differing in option processing and
390  * error message generation. fieldVsNumberOptionHandler is described as an example. It
391  * handles command options such as '--lt 3:1000', which tests field 3 for a values less
392  * than 1000. It is passed the tests array, the 'numLE' predicate function used for the
393  * test, and the string "3:1000" representing the option value. It is also passed the
394  * header line from the first input file and an indication of whether header processing
395  * is enabled (--H|header). parseFieldList (fieldlist module) is used to parse the
396  * field-list component of the option ("3" in the example). The comparison value ("1000")
397  * is converted to a double. These are wrapped in a FieldsPredicate delegate which is
398  * added to the tests array. An error is signaled if the option string is invalid.
399  *
400  * During processing, fields indexes are converted from one-based to zero-based. As an
401  * optimization, the maximum field index is also tracked. This allows early termination of
402  * line splitting.
403  *
404  * The header line from the input file is not available when std.getop processes the
405  * command line option. The processing described above must be deferred. This is done
406  * using a 'CmdOptionHandler' delegate. There is a 'make' function for every Command line
407  * option handler that creates these. These are created during std.getopt processing.
408  * They are run when the header line becomes available.
409  *
410  * The final setup for the '--lt' (numeric less-than) operator' is as follows:
411  *   - Function 'handlerNumLE' (in TsvFilterOptions.processArgs) is associated with the
412  *     command line option "--lt <val>". When called by std.getopt it creates an option
413  *     hander delegate via 'makeFieldVsNumberOptionHandler'. This is appended to an
414  *     array of delegates.
415  *   - 'fieldVsNumberOptionHandler' is invoked via the delegate after the header line
416  *     becomes available (in TsvFilterOptions.processArgs). If args are valid,
417  *     'makeFieldVsNumberDelegate' is used to create a delegate invoking the 'numLE'
418  *     predicate function. This delegate is added to the set of run-time tests.
419  *
420  * Note that in the above setup the 'numLE' predicate is specified in 'handlerNumLE'
421  * and passed through all the steps. This is how the command line option gets
422  * associated with the predicate function.
423  */
424 
425 /* CmdOptionHandler delegate signature - This is the call made to process the command
426  * line option arguments after the header line has been read.
427  */
428 alias CmdOptionHandler = void delegate(ref FieldsPredicate[] tests, ref size_t maxFieldIndex,
429                                        bool hasHeader, string[] headerFields);
430 
431 CmdOptionHandler makeFieldUnaryOptionHandler(FieldUnaryPredicate predicateFn, string option, string optionVal)
432 {
433     return
434         (ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields)
435         => fieldUnaryOptionHandler(tests, maxFieldIndex, hasHeader, headerFields, predicateFn, option, optionVal);
436 }
437 
438 void fieldUnaryOptionHandler(
439     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields,
440     FieldUnaryPredicate fn, string option, string optionVal)
441 {
442     import tsv_utils.common.fieldlist;
443 
444     try foreach (fieldNum, fieldIndex;
445                  optionVal
446                  .parseFieldList!(size_t, Yes.convertToZeroBasedIndex)(hasHeader, headerFields)
447                  .enumerate(1))
448         {
449             tests ~= makeFieldUnaryDelegate(fn, fieldIndex);
450             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
451         }
452     catch (Exception e)
453     {
454          e.msg = format("Invalid option: [--%s %s]. %s\n   Expected: '--%s <field>' or '--%s <field-list>'.",
455                         option, optionVal, e.msg, option, option);
456          throw e;
457     }
458 }
459 
460 CmdOptionHandler makeFieldVsNumberOptionHandler(FieldVsNumberPredicate predicateFn, string option, string optionVal)
461 {
462     return
463         (ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields)
464         => fieldVsNumberOptionHandler(tests, maxFieldIndex, hasHeader, headerFields, predicateFn, option, optionVal);
465 }
466 
467 void fieldVsNumberOptionHandler(
468     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields,
469     FieldVsNumberPredicate fn, string option, string optionVal)
470 {
471     import tsv_utils.common.fieldlist;
472 
473     auto formatErrorMsg(string option, string optionVal, string errorMessage="")
474     {
475         string optionalSpace = (errorMessage.length == 0) ? "" : " ";
476         return format(
477             "Invalid option: [--%s %s].%s%s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val> where <val> is a number.",
478             option, optionVal, optionalSpace, errorMessage, option, option);
479     }
480 
481     try
482     {
483         auto optionValParse =
484             optionVal
485             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, No.consumeEntireFieldListString)
486             (hasHeader, headerFields);
487 
488         auto fieldIndices = optionValParse.array;
489         enforce(optionVal.length - optionValParse.consumed > 1, "No value after field list.");
490         double value = optionVal[optionValParse.consumed + 1 .. $].to!double;
491 
492         foreach (fieldIndex; fieldIndices)
493         {
494             tests ~= makeFieldVsNumberDelegate(fn, fieldIndex, value);
495             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
496         }
497     }
498     catch (Exception e)
499     {
500         e.msg = formatErrorMsg(option, optionVal, e.msg);
501         throw e;
502     }
503 }
504 
505 CmdOptionHandler makeFieldVsStringOptionHandler(FieldVsStringPredicate predicateFn, string option, string optionVal)
506 {
507     return
508         (ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields)
509         => fieldVsStringOptionHandler(tests, maxFieldIndex, hasHeader, headerFields, predicateFn, option, optionVal);
510 }
511 
512 void fieldVsStringOptionHandler(
513     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields,
514     FieldVsStringPredicate fn, string option, string optionVal)
515 {
516     import tsv_utils.common.fieldlist;
517 
518     try
519     {
520         auto optionValParse =
521             optionVal
522             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, No.consumeEntireFieldListString)
523             (hasHeader, headerFields);
524 
525         auto fieldIndices = optionValParse.array;
526         enforce(optionVal.length - optionValParse.consumed > 1, "No value after field list.");
527         string value = optionVal[optionValParse.consumed + 1 .. $].idup;
528 
529         foreach (fieldIndex; fieldIndices)
530         {
531             tests ~= makeFieldVsStringDelegate(fn, fieldIndex, value);
532             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
533         }
534 
535     }
536     catch (Exception e)
537     {
538         e.msg = format(
539             "[--%s %s]. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a string.",
540             option, optionVal, e.msg, option, option);
541         throw e;
542     }
543 }
544 
545 CmdOptionHandler makeFieldVsIStringOptionHandler(FieldVsIStringPredicate predicateFn, string option, string optionVal)
546 {
547     return
548         (ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields)
549         => fieldVsIStringOptionHandler(tests, maxFieldIndex, hasHeader, headerFields, predicateFn, option, optionVal);
550 }
551 
552 /* The fieldVsIStringOptionHandler lower-cases the command line argument, assuming the
553  * case-insensitive comparison will be done on lower-cased values.
554  */
555 void fieldVsIStringOptionHandler(
556     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields,
557     FieldVsIStringPredicate fn, string option, string optionVal)
558 {
559     import tsv_utils.common.fieldlist;
560 
561     try
562     {
563         auto optionValParse =
564             optionVal
565             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, No.consumeEntireFieldListString)
566             (hasHeader, headerFields);
567 
568         auto fieldIndices = optionValParse.array;
569         enforce(optionVal.length - optionValParse.consumed > 1, "No value after field list.");
570         string value = optionVal[optionValParse.consumed + 1 .. $].idup;
571 
572         foreach (fieldIndex; fieldIndices)
573         {
574             tests ~= makeFieldVsIStringDelegate(fn, fieldIndex, value.to!dstring.toLower);
575             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
576         }
577     }
578     catch (Exception e)
579     {
580         e.msg = format(
581             "[--%s %s]. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a string.",
582             option, optionVal, e.msg, option, option);
583         throw e;
584     }
585 }
586 
587 CmdOptionHandler makeFieldVsRegexOptionHandler(FieldVsRegexPredicate predicateFn, string option, string optionVal, bool caseSensitive)
588 {
589     return
590         (ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields)
591         => fieldVsRegexOptionHandler(tests, maxFieldIndex, hasHeader, headerFields, predicateFn, option, optionVal, caseSensitive);
592 }
593 
594 void fieldVsRegexOptionHandler(
595     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields,
596     FieldVsRegexPredicate fn, string option, string optionVal, bool caseSensitive)
597 {
598     import tsv_utils.common.fieldlist;
599 
600     try
601     {
602         auto optionValParse =
603             optionVal
604             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, No.consumeEntireFieldListString)
605             (hasHeader, headerFields);
606 
607         auto fieldIndices = optionValParse.array;
608         enforce(optionVal.length - optionValParse.consumed > 1, "No value after field list.");
609 
610         immutable modifiers = caseSensitive ? "" : "i";
611         Regex!char value =
612             optionVal[optionValParse.consumed + 1 .. $]
613             .regex(modifiers);
614 
615         foreach (fieldIndex; fieldIndices)
616         {
617             tests ~= makeFieldVsRegexDelegate(fn, fieldIndex, value);
618             maxFieldIndex = (fieldIndex > maxFieldIndex) ? fieldIndex : maxFieldIndex;
619         }
620     }
621     catch (RegexException e)
622     {
623         e.msg = format(
624             "[--%s %s]. Invalid regular expression: %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a regular expression.",
625             option, optionVal, e.msg, option, option);
626         throw e;
627     }
628     catch (Exception e)
629     {
630         e.msg = format(
631             "[--%s %s]. %s\n   Expected: '--%s <field>:<val>' or '--%s <field-list>:<val>' where <val> is a regular expression.",
632             option, optionVal, e.msg, option, option);
633         throw e;
634     }
635 }
636 
637 
638 CmdOptionHandler makeFieldVsFieldOptionHandler(FieldVsFieldPredicate predicateFn, string option, string optionVal)
639 {
640     return
641         (ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields)
642         => fieldVsFieldOptionHandler(tests, maxFieldIndex, hasHeader, headerFields, predicateFn, option, optionVal);
643 }
644 
645 void fieldVsFieldOptionHandler(
646     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields,
647     FieldVsFieldPredicate fn, string option, string optionVal)
648 {
649     import tsv_utils.common.fieldlist;
650 
651     try
652     {
653         auto optionValParse =
654             optionVal
655             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, No.consumeEntireFieldListString)
656             (hasHeader, headerFields);
657 
658         auto fieldIndices1 = optionValParse.array;
659 
660         enforce(fieldIndices1.length != 0, "First field argument is empty.");
661         enforce(fieldIndices1.length == 1, "First field argument references multiple fields.");
662         enforce(optionVal.length - optionValParse.consumed > 1, " Second field argument is empty.");
663 
664         auto fieldIndices2 =
665             optionVal[optionValParse.consumed + 1 .. $]
666             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, Yes.consumeEntireFieldListString)
667             (hasHeader, headerFields)
668             .array;
669 
670         enforce(fieldIndices2.length != 0, "Second field argument is empty.");
671         enforce(fieldIndices2.length == 1, "Second field argument references multiple fields.");
672 
673         enforce(fieldIndices1[0] != fieldIndices2[0],
674                 format("Invalid option: '--%s %s'. Field1 and field2 must be different fields", option, optionVal));
675 
676         tests ~= makeFieldVsFieldDelegate(fn, fieldIndices1[0], fieldIndices2[0]);
677         maxFieldIndex = max(maxFieldIndex, fieldIndices1[0], fieldIndices2[0]);
678     }
679     catch (Exception e)
680     {
681         e.msg = format(
682             "[--%s %s]. %s\n   Expected: '--%s <field1>:<field2>' where <field1> and <field2> are individual fields.",
683             option, optionVal, e.msg, option);
684         throw e;
685     }
686 }
687 
688 CmdOptionHandler makeFieldFieldNumOptionHandler(FieldFieldNumPredicate predicateFn, string option, string optionVal)
689 {
690     return
691         (ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields)
692         => fieldFieldNumOptionHandler(tests, maxFieldIndex, hasHeader, headerFields, predicateFn, option, optionVal);
693 }
694 
695 void fieldFieldNumOptionHandler(
696     ref FieldsPredicate[] tests, ref size_t maxFieldIndex, bool hasHeader, string[] headerFields,
697     FieldFieldNumPredicate fn, string option, string optionVal)
698 {
699     import tsv_utils.common.fieldlist;
700 
701     try
702     {
703         auto optionValParse1 =
704             optionVal
705             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, No.consumeEntireFieldListString)
706             (hasHeader, headerFields);
707 
708         auto fieldIndices1 = optionValParse1.array;
709 
710         enforce(fieldIndices1.length != 0, "First field argument is empty.");
711         enforce(fieldIndices1.length == 1, "First field argument references multiple fields.");
712         enforce(optionVal.length - optionValParse1.consumed > 1, " Second field argument is empty.");
713 
714         auto optionValSegment2 = optionVal[optionValParse1.consumed + 1 .. $];
715         auto optionValParse2 =
716             optionValSegment2
717             .parseFieldList!(size_t, Yes.convertToZeroBasedIndex, No.allowFieldNumZero, No.consumeEntireFieldListString)
718             (hasHeader, headerFields);
719 
720         auto fieldIndices2 = optionValParse2.array;
721 
722         enforce(fieldIndices2.length != 0, "Second field argument is empty.");
723         enforce(fieldIndices2.length == 1, "Second field argument references multiple fields.");
724         enforce(optionValSegment2.length - optionValParse2.consumed > 1, "Number argument is empty.");
725 
726         size_t field1 = fieldIndices1[0];
727         size_t field2 = fieldIndices2[0];
728         double value = optionValSegment2[optionValParse2.consumed + 1 .. $].to!double;
729 
730         enforce(field1 != field2,
731                 format("Invalid option: '--%s %s'. Field1 and field2 must be different fields", option, optionVal));
732 
733         tests ~= makeFieldFieldNumDelegate(fn, field1, field2, value);
734         maxFieldIndex = max(maxFieldIndex, field1, field2);
735     }
736     catch (Exception e)
737     {
738         e.msg = format(
739             "[--%s %s]. %s\n   Expected: '--%s <field1>:<field2>:<num>' where <field1> and <field2> are individual fields.",
740             option, optionVal, e.msg, option);
741         throw e;
742     }
743 }
744 
745 /** Command line options - This struct holds the results of command line option processing.
746  * It also has a method, processArgs, that invokes command line arg processing.
747  */
748 struct TsvFilterOptions
749 {
750     import tsv_utils.common.utils : inputSourceRange, InputSourceRange, ReadHeader;
751 
752     string programName;
753     InputSourceRange inputSources;      /// Input files
754     FieldsPredicate[] tests;            /// Derived from tests
755     size_t maxFieldIndex = 0;           /// Derived from tests
756     bool hasHeader = false;             /// --H|header
757     bool invert = false;                /// --invert
758     bool disjunct = false;              /// --or
759     bool countMatches = false;          /// --c|count
760     char delim = '\t';                  /// --delimiter
761     string label;                       /// --label
762     bool labelValuesOptionUsed = false; /// --label-values
763     bool lineBuffered = false;          /// --line-buffered
764     bool isLabeling = false;            /// Derived
765     string trueLabel = "1";             /// Derived
766     string falseLabel = "0";            /// Derived
767 
768     /* Returns a tuple. First value is true if command line arguments were successfully
769      * processed and execution should continue, or false if an error occurred or the user
770      * asked for help. If false, the second value is the appropriate exit code (0 or 1).
771      *
772      * Returning true (execution continues) means args have been validated and the
773      * tests array has been established.
774      */
775     auto processArgs (ref string[] cmdArgs)
776     {
777         import std.algorithm : each;
778         import std.array : split;
779         import std.conv : to;
780         import std.getopt;
781         import std.path : baseName, stripExtension;
782         import tsv_utils.common.getopt_inorder;
783         import tsv_utils.common.utils : throwIfWindowsNewline;
784 
785         bool helpVerbose = false;        // --help-verbose
786         bool helpOptions = false;        // --help-options
787         bool helpFields = false;         // --help-fields
788         bool versionWanted = false;      // --V|version
789 
790         programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name";
791 
792         /* Command option handlers - One handler for each option. These conform to the
793          * getopt required handler signature, and separate knowledge the specific command
794          * option text from the option processing.
795          */
796 
797         CmdOptionHandler[] cmdLineTestOptions;
798 
799         void handlerFldEmpty(string option, string value)    { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldEmpty,    option, value); }
800         void handlerFldNotEmpty(string option, string value) { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldNotEmpty, option, value); }
801         void handlerFldBlank(string option, string value)    { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldBlank,    option, value); }
802         void handlerFldNotBlank(string option, string value) { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldNotBlank, option, value); }
803 
804         void handlerFldIsNumeric(string option, string value)  { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldIsNumeric,  option, value); }
805         void handlerFldIsFinite(string option, string value)   { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldIsFinite,   option, value); }
806         void handlerFldIsNaN(string option, string value)      { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldIsNaN,      option, value); }
807         void handlerFldIsInfinity(string option, string value) { cmdLineTestOptions ~= makeFieldUnaryOptionHandler(&fldIsInfinity, option, value); }
808 
809         void handlerNumLE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&numLE, option, value); }
810         void handlerNumLT(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&numLT, option, value); }
811         void handlerNumGE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&numGE, option, value); }
812         void handlerNumGT(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&numGT, option, value); }
813         void handlerNumEQ(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&numEQ, option, value); }
814         void handlerNumNE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&numNE, option, value); }
815 
816         void handlerStrLE(string option, string value) { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strLE, option, value); }
817         void handlerStrLT(string option, string value) { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strLT, option, value); }
818         void handlerStrGE(string option, string value) { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strGE, option, value); }
819         void handlerStrGT(string option, string value) { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strGT, option, value); }
820         void handlerStrEQ(string option, string value) { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strEQ, option, value); }
821         void handlerStrNE(string option, string value) { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strNE, option, value); }
822 
823         void handlerStrInFld(string option, string value)    { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strInFld,    option, value); }
824         void handlerStrNotInFld(string option, string value) { cmdLineTestOptions ~= makeFieldVsStringOptionHandler(&strNotInFld, option, value); }
825 
826         void handlerIStrEQ(string option, string value)       { cmdLineTestOptions ~= makeFieldVsIStringOptionHandler(&istrEQ,       option, value); }
827         void handlerIStrNE(string option, string value)       { cmdLineTestOptions ~= makeFieldVsIStringOptionHandler(&istrNE,       option, value); }
828         void handlerIStrInFld(string option, string value)    { cmdLineTestOptions ~= makeFieldVsIStringOptionHandler(&istrInFld,    option, value); }
829         void handlerIStrNotInFld(string option, string value) { cmdLineTestOptions ~= makeFieldVsIStringOptionHandler(&istrNotInFld, option, value); }
830 
831         void handlerRegexMatch(string option, string value)     { cmdLineTestOptions ~= makeFieldVsRegexOptionHandler(&regexMatch,    option, value, true); }
832         void handlerRegexNotMatch(string option, string value)  { cmdLineTestOptions ~= makeFieldVsRegexOptionHandler(&regexNotMatch, option, value, true); }
833         void handlerIRegexMatch(string option, string value)    { cmdLineTestOptions ~= makeFieldVsRegexOptionHandler(&regexMatch,    option, value, false); }
834         void handlerIRegexNotMatch(string option, string value) { cmdLineTestOptions ~= makeFieldVsRegexOptionHandler(&regexNotMatch, option, value, false); }
835 
836         void handlerCharLenLE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&charLenLE, option, value); }
837         void handlerCharLenLT(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&charLenLT, option, value); }
838         void handlerCharLenGE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&charLenGE, option, value); }
839         void handlerCharLenGT(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&charLenGT, option, value); }
840         void handlerCharLenEQ(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&charLenEQ, option, value); }
841         void handlerCharLenNE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&charLenNE, option, value); }
842 
843         void handlerByteLenLE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&byteLenLE, option, value); }
844         void handlerByteLenLT(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&byteLenLT, option, value); }
845         void handlerByteLenGE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&byteLenGE, option, value); }
846         void handlerByteLenGT(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&byteLenGT, option, value); }
847         void handlerByteLenEQ(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&byteLenEQ, option, value); }
848         void handlerByteLenNE(string option, string value) { cmdLineTestOptions ~= makeFieldVsNumberOptionHandler(&byteLenNE, option, value); }
849 
850         void handlerFFLE(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffLE, option, value); }
851         void handlerFFLT(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffLT, option, value); }
852         void handlerFFGE(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffGE, option, value); }
853         void handlerFFGT(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffGT, option, value); }
854         void handlerFFEQ(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffEQ, option, value); }
855         void handlerFFNE(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffNE, option, value); }
856 
857         void handlerFFStrEQ(string option, string value)  { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffStrEQ,  option, value); }
858         void handlerFFStrNE(string option, string value)  { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffStrNE,  option, value); }
859         void handlerFFIStrEQ(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffIStrEQ, option, value); }
860         void handlerFFIStrNE(string option, string value) { cmdLineTestOptions ~= makeFieldVsFieldOptionHandler(&ffIStrNE, option, value); }
861 
862         void handlerFFAbsDiffLE(string option, string value) { cmdLineTestOptions ~= makeFieldFieldNumOptionHandler(&ffAbsDiffLE, option, value); }
863         void handlerFFAbsDiffGT(string option, string value) { cmdLineTestOptions ~= makeFieldFieldNumOptionHandler(&ffAbsDiffGT, option, value); }
864         void handlerFFRelDiffLE(string option, string value) { cmdLineTestOptions ~= makeFieldFieldNumOptionHandler(&ffRelDiffLE, option, value); }
865         void handlerFFRelDiffGT(string option, string value) { cmdLineTestOptions ~= makeFieldFieldNumOptionHandler(&ffRelDiffGT, option, value); }
866 
867         /* The handleLabelValuesOption is different from the other handlers in that it is
868          * not generic. Instead it simply parses and validates the argument passed to the
869          * --label-values option. If the option is valid, it populates the `trueLabel`
870          * and `falseLabel` member variables. Otherwise an exception is thrown.
871          */
872         void handleLabelValuesOption(string option, string optionVal)
873         {
874             immutable valSplit = optionVal.findSplit(":");
875 
876             enforce(valSplit && !valSplit[2].canFind(":") && valSplit[0] != valSplit[2],
877                     format("Invalid option: '--%s %s'.\n" ~
878                            "  Expected: '--%s STR1:STR2'. STR1 and STR2 must be different strings.\n" ~
879                            "  The colon (':') is required, niether string can contain a colon.",
880                            option, optionVal, option));
881 
882             labelValuesOptionUsed = true;
883             trueLabel = valSplit[0];
884             falseLabel = valSplit[2];
885         }
886 
887         try
888         {
889             arraySep = ",";    // Use comma to separate values in command line options
890             auto r = getoptInorder(
891                 cmdArgs,
892                 "help-verbose",    "     Print full help.", &helpVerbose,
893                 "help-options",    "     Print the options list by itself.", &helpOptions,
894                 "help-fields",     "     Print help on specifying fields.", &helpFields,
895                  std.getopt.config.caseSensitive,
896                 "V|version",       "     Print version information and exit.", &versionWanted,
897                 "H|header",        "     Treat the first line of each file as a header.", &hasHeader,
898                 std.getopt.config.caseInsensitive,
899                 "or",              "     Evaluate tests as an OR rather than an AND.", &disjunct,
900                 std.getopt.config.caseSensitive,
901                 "v|invert",        "     Invert the filter, printing lines that do not match.", &invert,
902                 std.getopt.config.caseInsensitive,
903                 "c|count",         "     Print only a count of the matched lines, excluding the header.", &countMatches,
904                 "d|delimiter",     "CHR  Field delimiter. Default: TAB. (Single byte UTF-8 characters only.)", &delim,
905 
906                 "label",
907                 "STR  Do not filter. Instead, mark each record as passing the filter or not. STR is the header, ignored if there is no header line.",
908                 &label,
909 
910                 "label-values",
911                                    "STR1:STR2   The pass/no-pass values used by '--label'. Defaults to '1' and '0'.",
912                 &handleLabelValuesOption,
913 
914                 "line-buffered",   "     Immediately output every matched line.", &lineBuffered,
915 
916                 "empty",           "<field-list>       True if FIELD is empty.", &handlerFldEmpty,
917                 "not-empty",       "<field-list>       True if FIELD is not empty.", &handlerFldNotEmpty,
918                 "blank",           "<field-list>       True if FIELD is empty or all whitespace.", &handlerFldBlank,
919                 "not-blank",       "<field-list>       True if FIELD contains a non-whitespace character.", &handlerFldNotBlank,
920 
921                 "is-numeric",      "<field-list>       True if FIELD is interpretable as a number.", &handlerFldIsNumeric,
922                 "is-finite",       "<field-list>       True if FIELD is interpretable as a number and is not NaN or infinity.", &handlerFldIsFinite,
923                 "is-nan",          "<field-list>       True if FIELD is NaN.", &handlerFldIsNaN,
924                 "is-infinity",     "<field-list>       True if FIELD is infinity.", &handlerFldIsInfinity,
925 
926                 "le",              "<field-list>:NUM   FIELD <= NUM (numeric).", &handlerNumLE,
927                 "lt",              "<field-list>:NUM   FIELD <  NUM (numeric).", &handlerNumLT,
928                 "ge",              "<field-list>:NUM   FIELD >= NUM (numeric).", &handlerNumGE,
929                 "gt",              "<field-list>:NUM   FIELD >  NUM (numeric).", &handlerNumGT,
930                 "eq",              "<field-list>:NUM   FIELD == NUM (numeric).", &handlerNumEQ,
931                 "ne",              "<field-list>:NUM   FIELD != NUM (numeric).", &handlerNumNE,
932 
933                 "str-le",          "<field-list>:STR   FIELD <= STR (string).", &handlerStrLE,
934                 "str-lt",          "<field-list>:STR   FIELD <  STR (string).", &handlerStrLT,
935                 "str-ge",          "<field-list>:STR   FIELD >= STR (string).", &handlerStrGE,
936                 "str-gt",          "<field-list>:STR   FIELD >  STR (string).", &handlerStrGT,
937                 "str-eq",          "<field-list>:STR   FIELD == STR (string).", &handlerStrEQ,
938                 "istr-eq",         "<field-list>:STR   FIELD == STR (string, case-insensitive).", &handlerIStrEQ,
939                 "str-ne",          "<field-list>:STR   FIELD != STR (string).", &handlerStrNE,
940                 "istr-ne",         "<field-list>:STR   FIELD != STR (string, case-insensitive).", &handlerIStrNE,
941                 "str-in-fld",      "<field-list>:STR   FIELD contains STR (substring search).", &handlerStrInFld,
942                 "istr-in-fld",     "<field-list>:STR   FIELD contains STR (substring search, case-insensitive).", &handlerIStrInFld,
943                 "str-not-in-fld",  "<field-list>:STR   FIELD does not contain STR (substring search).", &handlerStrNotInFld,
944                 "istr-not-in-fld", "<field-list>:STR   FIELD does not contain STR (substring search, case-insensitive).", &handlerIStrNotInFld,
945 
946                 "regex",           "<field-list>:REGEX   FIELD matches regular expression.", &handlerRegexMatch,
947                 "iregex",          "<field-list>:REGEX   FIELD matches regular expression, case-insensitive.", &handlerIRegexMatch,
948                 "not-regex",       "<field-list>:REGEX   FIELD does not match regular expression.", &handlerRegexNotMatch,
949                 "not-iregex",      "<field-list>:REGEX   FIELD does not match regular expression, case-insensitive.", &handlerIRegexNotMatch,
950 
951                 "char-len-le",     "<field-list>:NUM   character-length(FIELD) <= NUM.", &handlerCharLenLE,
952                 "char-len-lt",     "<field-list>:NUM   character-length(FIELD) < NUM.", &handlerCharLenLT,
953                 "char-len-ge",     "<field-list>:NUM   character-length(FIELD) >= NUM.", &handlerCharLenGE,
954                 "char-len-gt",     "<field-list>:NUM   character-length(FIELD) > NUM.", &handlerCharLenGT,
955                 "char-len-eq",     "<field-list>:NUM   character-length(FIELD) == NUM.", &handlerCharLenEQ,
956                 "char-len-ne",     "<field-list>:NUM   character-length(FIELD) != NUM.", &handlerCharLenNE,
957 
958                 "byte-len-le",     "<field-list>:NUM   byte-length(FIELD) <= NUM.", &handlerByteLenLE,
959                 "byte-len-lt",     "<field-list>:NUM   byte-length(FIELD) < NUM.", &handlerByteLenLT,
960                 "byte-len-ge",     "<field-list>:NUM   byte-length(FIELD) >= NUM.", &handlerByteLenGE,
961                 "byte-len-gt",     "<field-list>:NUM   byte-length(FIELD) > NUM.", &handlerByteLenGT,
962                 "byte-len-eq",     "<field-list>:NUM   byte-length(FIELD) == NUM.", &handlerByteLenEQ,
963                 "byte-len-ne",     "<field-list>:NUM   byte-length(FIELD) != NUM.", &handlerByteLenNE,
964 
965                 "ff-le",           "FIELD1:FIELD2   FIELD1 <= FIELD2 (numeric).", &handlerFFLE,
966                 "ff-lt",           "FIELD1:FIELD2   FIELD1 <  FIELD2 (numeric).", &handlerFFLT,
967                 "ff-ge",           "FIELD1:FIELD2   FIELD1 >= FIELD2 (numeric).", &handlerFFGE,
968                 "ff-gt",           "FIELD1:FIELD2   FIELD1 >  FIELD2 (numeric).", &handlerFFGT,
969                 "ff-eq",           "FIELD1:FIELD2   FIELD1 == FIELD2 (numeric).", &handlerFFEQ,
970                 "ff-ne",           "FIELD1:FIELD2   FIELD1 != FIELD2 (numeric).", &handlerFFNE,
971                 "ff-str-eq",       "FIELD1:FIELD2   FIELD1 == FIELD2 (string).", &handlerFFStrEQ,
972                 "ff-istr-eq",      "FIELD1:FIELD2   FIELD1 == FIELD2 (string, case-insensitive).", &handlerFFIStrEQ,
973                 "ff-str-ne",       "FIELD1:FIELD2   FIELD1 != FIELD2 (string).", &handlerFFStrNE,
974                 "ff-istr-ne",      "FIELD1:FIELD2   FIELD1 != FIELD2 (string, case-insensitive).", &handlerFFIStrNE,
975 
976                 "ff-absdiff-le",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2) <= NUM", &handlerFFAbsDiffLE,
977                 "ff-absdiff-gt",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2) >  NUM", &handlerFFAbsDiffGT,
978                 "ff-reldiff-le",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2) / min(abs(FIELD1), abs(FIELD2)) <= NUM", &handlerFFRelDiffLE,
979                 "ff-reldiff-gt",   "FIELD1:FIELD2:NUM   abs(FIELD1 - FIELD2) / min(abs(FIELD1), abs(FIELD2)) >  NUM", &handlerFFRelDiffGT,
980                 );
981 
982             /* Both help texts are a bit long. In this case, for "regular" help, don't
983              * print options, just the text. The text summarizes the options.
984              */
985             if (r.helpWanted)
986             {
987                 stdout.write(helpText);
988                 return tuple(false, 0);
989             }
990             else if (helpVerbose)
991             {
992                 defaultGetoptPrinter(helpTextVerbose, r.options);
993                 return tuple(false, 0);
994             }
995             else if (helpOptions)
996             {
997                 defaultGetoptPrinter(helpTextOptions, r.options);
998                 return tuple(false, 0);
999             }
1000             else if (helpFields)
1001             {
1002                 import tsv_utils.common.fieldlist : fieldListHelpText ;
1003                 writeln(fieldListHelpText);
1004                 return tuple(false, 0);
1005             }
1006             else if (versionWanted)
1007             {
1008                 import tsv_utils.common.tsvutils_version;
1009                 writeln(tsvutilsVersionNotice("tsv-filter"));
1010                 return tuple(false, 0);
1011             }
1012 
1013             /* Input files. Remaining command line args are files. */
1014             string[] filepaths = (cmdArgs.length > 1) ? cmdArgs[1 .. $] : ["-"];
1015             cmdArgs.length = 1;
1016 
1017             /* Validations and derivations. Currently all are related to label mode. */
1018             if (!label.empty || labelValuesOptionUsed)
1019             {
1020                 enforce(!label.empty || !hasHeader,
1021                         "--label is required when using --label-values and --H|header.");
1022 
1023                 isLabeling = true;
1024             }
1025 
1026             enforce (!isLabeling || !countMatches,
1027                      format("--c|count cannot be used with --label or --label-values."));
1028 
1029             string[] headerFields;
1030 
1031             /* FieldListArgProcessing encapsulates the field list processing. It is
1032              * called prior to reading the header line if headers are not being used,
1033              * and after if headers are being used.
1034              */
1035             void fieldListArgProcessing()
1036             {
1037                 cmdLineTestOptions.each!(dg => dg(tests, maxFieldIndex, hasHeader, headerFields));
1038             }
1039 
1040             if (!hasHeader) fieldListArgProcessing();
1041 
1042             ReadHeader readHeader = hasHeader ? Yes.readHeader : No.readHeader;
1043             inputSources = inputSourceRange(filepaths, readHeader);
1044 
1045             if (hasHeader)
1046             {
1047                 throwIfWindowsNewline(inputSources.front.header, inputSources.front.name, 1);
1048                 headerFields = inputSources.front.header.split(delim).to!(string[]);
1049                 fieldListArgProcessing();
1050             }
1051         }
1052         catch (Exception e)
1053         {
1054             stderr.writefln("[%s] Error processing command line arguments: %s", programName, e.msg);
1055             return tuple(false, 1);
1056         }
1057         return tuple(true, 0);
1058     }
1059 }
1060 
1061 enum FilterMode { filter, count, label };
1062 
1063 void tsvFilterCommand(ref TsvFilterOptions cmdopt)
1064 {
1065     if (cmdopt.countMatches) tsvFilter!(FilterMode.count)(cmdopt);
1066     else if (cmdopt.isLabeling) tsvFilter!(FilterMode.label)(cmdopt);
1067     else tsvFilter!(FilterMode.filter)(cmdopt);
1068 }
1069 
1070 /** tsvFilter processes the input files and runs the tests.
1071  */
1072 void tsvFilter(FilterMode mode)(ref TsvFilterOptions cmdopt)
1073 {
1074     import std.algorithm : all, any, splitter;
1075     import std.format : formattedWrite;
1076     import std.range;
1077     import tsv_utils.common.utils : bufferedByLine, BufferedOutputRange, InputSourceRange,
1078         LineBuffered, throwIfWindowsNewline;
1079 
1080     static if (mode != FilterMode.count) assert(!cmdopt.countMatches);
1081     static if (mode != FilterMode.label) assert(!cmdopt.isLabeling);
1082 
1083     /* inputSources must be an InputSourceRange and include at least stdin. */
1084     assert(!cmdopt.inputSources.empty);
1085     static assert(is(typeof(cmdopt.inputSources) == InputSourceRange));
1086 
1087     /* BufferedOutputRange improves performance on narrow files with high percentages of
1088      * writes.
1089      */
1090     static if (mode == FilterMode.count)
1091     {
1092         immutable LineBuffered isLineBuffered = No.lineBuffered;
1093     }
1094     else
1095     {
1096         immutable LineBuffered isLineBuffered =
1097             cmdopt.lineBuffered ? Yes.lineBuffered : No.lineBuffered;
1098 
1099         auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout, isLineBuffered);
1100     }
1101 
1102     static if (mode == FilterMode.count) size_t matchedLines = 0;
1103 
1104      /* First header is read during command line argument processing. Immediately
1105       * flush it so subsequent processes in a unix command pipeline see it early.
1106       * This helps provide timely error messages.
1107       */
1108     static if (mode != FilterMode.count)
1109     {
1110         if (cmdopt.hasHeader && !cmdopt.inputSources.front.isHeaderEmpty)
1111         {
1112             auto inputStream = cmdopt.inputSources.front;
1113 
1114             static if (mode == FilterMode.label)
1115             {
1116                 bufferedOutput.appendln(inputStream.header, cmdopt.delim, cmdopt.label);
1117             }
1118             else
1119             {
1120                 bufferedOutput.appendln(inputStream.header);
1121             }
1122 
1123             bufferedOutput.flush;
1124         }
1125     }
1126 
1127     immutable size_t fieldIndexEnd = cmdopt.tests.empty ? 0 : cmdopt.maxFieldIndex + 1;
1128 
1129     /* Process each input file, one line at a time. */
1130     immutable size_t numTests = cmdopt.tests.length;
1131     immutable size_t fileBodyStartLine = cmdopt.hasHeader ? 2 : 1;
1132     auto lineFields = new char[][](fieldIndexEnd);
1133 
1134     foreach (inputStream; cmdopt.inputSources)
1135     {
1136         if (cmdopt.hasHeader) throwIfWindowsNewline(inputStream.header, inputStream.name, 1);
1137 
1138         foreach (lineNum, line; inputStream.file.bufferedByLine(isLineBuffered).enumerate(fileBodyStartLine))
1139         {
1140             if (lineNum == 1) throwIfWindowsNewline(line, inputStream.name, lineNum);
1141 
1142             /* Copy the needed number of fields to the fields array. */
1143             size_t fieldIndex = 0;
1144 
1145             foreach (fieldValue; line.splitter(cmdopt.delim).take(fieldIndexEnd))
1146             {
1147                 lineFields[fieldIndex] = fieldValue;
1148                 fieldIndex++;
1149             }
1150 
1151             if (fieldIndex == 0 && fieldIndexEnd != 0)
1152             {
1153                 assert(line.length == 0);
1154                 /* Bug work-around. Currently empty lines are not handled properly by splitter.
1155                  *   Bug: https://issues.dlang.org/show_bug.cgi?id=15735
1156                  *   Pull Request: https://github.com/D-Programming-Language/phobos/pull/4030
1157                  * Work-around: Point to the line. It's an empty string.
1158                  */
1159                 lineFields[fieldIndex] = line;
1160                 fieldIndex++;
1161             }
1162 
1163             enforce(fieldIndex == cast(long) fieldIndexEnd,
1164                     format("Not enough fields in line. File: %s, Line: %s",
1165                            inputStream.name, lineNum));
1166 
1167             /* Run the tests. Tests will fail (throw) if a field cannot be converted
1168              * to the expected type.
1169              */
1170             try
1171             {
1172                 bool passed = cmdopt.disjunct ?
1173                     cmdopt.tests.any!(x => x(lineFields)) :
1174                     cmdopt.tests.all!(x => x(lineFields));
1175                 if (cmdopt.invert) passed = !passed;
1176 
1177                 static if (mode == FilterMode.count)
1178                 {
1179                     if (passed) ++matchedLines;
1180                 }
1181                 else static if (mode == FilterMode.label)
1182                 {
1183                     bufferedOutput.appendln(line, cmdopt.delim,
1184                                             passed ? cmdopt.trueLabel : cmdopt.falseLabel);
1185                 }
1186                 else
1187                 {
1188                     if (passed) bufferedOutput.appendln(line);
1189                 }
1190             }
1191             catch (Exception e)
1192             {
1193                 static if (mode != FilterMode.count) bufferedOutput.flush;
1194                 throw new Exception(
1195                     format("Could not process line or field: %s\n  File: %s Line: %s%s",
1196                            e.msg, inputStream.name, lineNum,
1197                            (lineNum == 1) ? "\n  Is this a header line? Use --header to skip." : ""));
1198             }
1199         }
1200     }
1201 
1202     static if (mode == FilterMode.count) writeln(matchedLines);
1203 }