1 /**
2 A simple version of the unix 'nl' program.
3 
4 This program is a simpler version of the unix 'nl' (number lines) program. It reads
5 text from files or standard input and adds a line number to each line.
6 
7 Copyright (c) 2015-2021, eBay Inc.
8 Initially written by Jon Degenhardt
9 
10 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt)
11 */
12 module tsv_utils.number_lines;
13 
14 import std.stdio;
15 import std.typecons : tuple;
16 
17 auto helpText = q"EOS
18 Synopsis: number-lines [options] [file...]
19 
20 number-lines reads from files or standard input and writes each line to standard
21 output preceded by a line number. It is a simplified version of the unix 'nl'
22 program. It supports one feature 'nl' does not: the ability to treat the first
23 line of files as a header. This is useful when working with tab-separated-value
24 files. If header processing used, a header line is written for the first file,
25 and the header lines are dropped from any subsequent files.
26 
27 Examples:
28    number-lines myfile.txt
29    cat myfile.txt | number-lines --header linenum
30    number-lines *.txt
31 
32 Options:
33 EOS";
34 
35 /** Container for command line options.
36  */
37 struct NumberLinesOptions
38 {
39     enum defaultHeaderString = "line";
40 
41     string programName;
42     bool hasHeader = false;       /// --H|header
43     string headerString = "";     /// --s|header-string
44     long startNum = 1;            /// --n|start-num
45     char delim = '\t';            /// --d|delimiter
46     bool lineBuffered = false;    /// --line-buffered
47     bool versionWanted = false;   /// --V|version
48 
49     /* Returns a tuple. First value is true if command line arguments were successfully
50      * processed and execution should continue, or false if an error occurred or the user
51      * asked for help. If false, the second value is the appropriate exit code (0 or 1).
52      */
53     auto processArgs (ref string[] cmdArgs)
54     {
55         import std.algorithm : any, each;
56         import std.getopt;
57         import std.path : baseName, stripExtension;
58 
59         programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name";
60 
61         try
62         {
63             auto r = getopt(
64                 cmdArgs,
65                 std.getopt.config.caseSensitive,
66                 "H|header",        "     Treat the first line of each file as a header. The first input file's header is output, subsequent file headers are discarded.", &hasHeader,
67                 std.getopt.config.caseInsensitive,
68                 "s|header-string", "STR  String to use in the header row. Implies --header. Default: 'line'", &headerString,
69                 "n|start-number",  "NUM  Number to use for the first line. Default: 1", &startNum,
70                 "d|delimiter",     "CHR  Character appended to line number, preceding the rest of the line. Default: TAB (Single byte UTF-8 characters only.)", &delim,
71                 "line-buffered",   "     Immediately output every line.", &lineBuffered,
72                 std.getopt.config.caseSensitive,
73                 "V|version",       "     Print version information and exit.", &versionWanted,
74                 std.getopt.config.caseInsensitive,
75             );
76 
77             if (r.helpWanted)
78             {
79                 defaultGetoptPrinter(helpText, r.options);
80                 return tuple(false, 0);
81             }
82             else if (versionWanted)
83             {
84                 import tsv_utils.common.tsvutils_version;
85                 writeln(tsvutilsVersionNotice("number-lines"));
86                 return tuple(false, 0);
87             }
88 
89             /* Derivations. */
90             if (headerString.length > 0) hasHeader = true;
91             else headerString = defaultHeaderString;
92         }
93         catch (Exception exc)
94         {
95             stderr.writefln("[%s] Error processing command line arguments: %s", programName, exc.msg);
96             return tuple(false, 1);
97         }
98         return tuple(true, 0);
99     }
100 }
101 
102 static if (__VERSION__ >= 2085) extern(C) __gshared string[] rt_options = [ "gcopt=cleanup:none" ];
103 
104 /** Main program. */
105 int main(string[] cmdArgs)
106 {
107     /* When running in DMD code coverage mode, turn on report merging. */
108     version(D_Coverage) version(DigitalMars)
109     {
110         import core.runtime : dmd_coverSetMerge;
111         dmd_coverSetMerge(true);
112     }
113 
114     NumberLinesOptions cmdopt;
115     auto r = cmdopt.processArgs(cmdArgs);
116     if (!r[0]) return r[1];
117     try numberLines(cmdopt, cmdArgs[1..$]);
118     catch (Exception exc)
119     {
120         stderr.writefln("Error [%s]: %s", cmdopt.programName, exc.msg);
121         return 1;
122     }
123 
124     return 0;
125 }
126 
127 /** Implements the primary logic behind number lines.
128  *
129  * Reads lines lines from each file, outputing each with a line number prepended. The
130  * header from the first file is written, the header from subsequent files is dropped.
131  */
132 void numberLines(const NumberLinesOptions cmdopt, const string[] inputFiles)
133 {
134     import std.conv : to;
135     import std.range;
136     import tsv_utils.common.utils : bufferedByLine, BufferedOutputRange, LineBuffered, ReadHeader;
137 
138     immutable LineBuffered isLineBuffered = cmdopt.lineBuffered ? Yes.lineBuffered : No.lineBuffered;
139     immutable ReadHeader useReadHeader = cmdopt.hasHeader ? Yes.readHeader : No.readHeader;
140 
141     auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout, isLineBuffered);
142 
143     long lineNum = cmdopt.startNum;
144     bool headerWritten = false;
145 
146     foreach (filename; (inputFiles.length > 0) ? inputFiles : ["-"])
147     {
148         auto inputStream = (filename == "-") ? stdin : filename.File();
149         foreach (fileLineNum, line;
150                  inputStream
151                  .bufferedByLine!(KeepTerminator.no)(isLineBuffered, useReadHeader)
152                  .enumerate(1))
153         {
154             if (cmdopt.hasHeader && fileLineNum == 1)
155             {
156                 if (!headerWritten)
157                 {
158                     bufferedOutput.appendln(cmdopt.headerString, cmdopt.delim, line);
159                     headerWritten = true;
160 
161                     /* Flush the header immediately. This helps tasks further on in a
162                      * unix pipeline detect errors quickly, without waiting for all
163                      * the data to flow through the pipeline. Note that an upstream
164                      * task may have flushed its header line, so the header may
165                      * arrive long before the main block of data.
166                      */
167                     bufferedOutput.flush;
168                 }
169             }
170             else
171             {
172                 bufferedOutput.appendln(lineNum.to!string, cmdopt.delim, line);
173                 lineNum++;
174             }
175         }
176     }
177 }