1 /**
2 A simple version of the unix 'nl' program.
3 
4 This program is a simpler version of the unix 'nl' (number lines) program. It reads
5 text from files or standard input and adds a line number to each line.
6 
7 Copyright (c) 2015-2018, eBay Software Foundation
8 Initially written by Jon Degenhardt
9 
10 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt)
11 */
12 module tsv_utils.number_lines;
13 
14 import std.stdio;
15 import std.typecons : tuple;
16 
17 auto helpText = q"EOS
18 Synopsis: number-lines [options] [file...]
19 
20 number-lines reads from files or standard input and writes each line to standard
21 output preceded by a line number. It is a simplified version of the unix 'nl'
22 program. It supports one feature 'nl' does not: the ability to treat the first
23 line of files as a header. This is useful when working with tab-separated-value
24 files. If header processing used, a header line is written for the first file,
25 and the header lines are dropped from any subsequent files.
26 
27 Examples:
28    number-lines myfile.txt
29    cat myfile.txt | number-lines --header linenum
30    number-lines *.txt
31 
32 Options:
33 EOS";
34 
35 /** Container for command line options.
36  */
37 struct NumberLinesOptions
38 {
39     enum defaultHeaderString = "line";
40 
41     string programName;
42     bool hasHeader = false;       // --H|header
43     string headerString = "";     // --s|header-string
44     long startNum = 1;            // --n|start-num
45     char delim = '\t';            // --d|delimiter
46     bool versionWanted = false;   // --V|version
47 
48     /* Returns a tuple. First value is true if command line arguments were successfully
49      * processed and execution should continue, or false if an error occurred or the user
50      * asked for help. If false, the second value is the appropriate exit code (0 or 1).
51      */
52     auto processArgs (ref string[] cmdArgs)
53     {
54         import std.algorithm : any, each;
55         import std.getopt;
56         import std.path : baseName, stripExtension;
57 
58         programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name";
59 
60         try
61         {
62             auto r = getopt(
63                 cmdArgs,
64                 std.getopt.config.caseSensitive,
65                 "H|header",        "     Treat the first line of each file as a header. The first input file's header is output, subsequent file headers are discarded.", &hasHeader,
66                 std.getopt.config.caseInsensitive,
67                 "s|header-string", "STR  String to use in the header row. Implies --header. Default: 'line'", &headerString,
68                 "n|start-number",  "NUM  Number to use for the first line. Default: 1", &startNum,
69                 "d|delimiter",     "CHR  Character appended to line number, preceding the rest of the line. Default: TAB (Single byte UTF-8 characters only.)", &delim,
70                 std.getopt.config.caseSensitive,
71                 "V|version",       "     Print version information and exit.", &versionWanted,
72                 std.getopt.config.caseInsensitive,
73             );
74 
75             if (r.helpWanted)
76             {
77                 defaultGetoptPrinter(helpText, r.options);
78                 return tuple(false, 0);
79             }
80             else if (versionWanted)
81             {
82                 import tsv_utils.common.tsvutils_version;
83                 writeln(tsvutilsVersionNotice("number-lines"));
84                 return tuple(false, 0);
85             }
86 
87             /* Derivations. */
88             if (headerString.length > 0) hasHeader = true;
89             else headerString = defaultHeaderString;
90         }
91         catch (Exception exc)
92         {
93             stderr.writefln("[%s] Error processing command line arguments: %s", programName, exc.msg);
94             return tuple(false, 1);
95         }
96         return tuple(true, 0);
97     }
98 }
99 
100 static if (__VERSION__ >= 2085) extern(C) __gshared string[] rt_options = [ "gcopt=cleanup:none" ];
101 
102 /** Main program. */
103 int main(string[] cmdArgs)
104 {
105     /* When running in DMD code coverage mode, turn on report merging. */
106     version(D_Coverage) version(DigitalMars)
107     {
108         import core.runtime : dmd_coverSetMerge;
109         dmd_coverSetMerge(true);
110     }
111 
112     NumberLinesOptions cmdopt;
113     auto r = cmdopt.processArgs(cmdArgs);
114     if (!r[0]) return r[1];
115     try numberLines(cmdopt, cmdArgs[1..$]);
116     catch (Exception exc)
117     {
118         stderr.writefln("Error [%s]: %s", cmdopt.programName, exc.msg);
119         return 1;
120     }
121 
122     return 0;
123 }
124 
125 /** Implements the primary logic behind number lines.
126  *
127  * Reads lines lines from each file, outputing each with a line number prepended. The
128  * header from the first file is written, the header from subsequent files is dropped.
129  */
130 void numberLines(in NumberLinesOptions cmdopt, in string[] inputFiles)
131 {
132     import std.conv : to;
133     import std.range;
134     import tsv_utils.common.utils : bufferedByLine, BufferedOutputRange;
135 
136     auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout);
137 
138     long lineNum = cmdopt.startNum;
139     bool headerWritten = false;
140     foreach (filename; (inputFiles.length > 0) ? inputFiles : ["-"])
141     {
142         auto inputStream = (filename == "-") ? stdin : filename.File();
143         foreach (fileLineNum, line; inputStream.bufferedByLine!(KeepTerminator.no).enumerate(1))
144         {
145             if (cmdopt.hasHeader && fileLineNum == 1)
146             {
147                 if (!headerWritten)
148                 {
149                     bufferedOutput.append(cmdopt.headerString);
150                     bufferedOutput.append(cmdopt.delim);
151                     bufferedOutput.appendln(line);
152                     headerWritten = true;
153                 }
154             }
155             else
156             {
157                 bufferedOutput.append(lineNum.to!string);
158                 bufferedOutput.append(cmdopt.delim);
159                 bufferedOutput.appendln(line);
160                 lineNum++;
161             }
162         }
163     }
164 }