1 /** 2 A simple version of the unix 'nl' program. 3 4 This program is a simpler version of the unix 'nl' (number lines) program. It reads 5 text from files or standard input and adds a line number to each line. 6 7 Copyright (c) 2015-2021, eBay Inc. 8 Initially written by Jon Degenhardt 9 10 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt) 11 */ 12 module tsv_utils.number_lines; 13 14 import std.stdio; 15 import std.typecons : tuple; 16 17 auto helpText = q"EOS 18 Synopsis: number-lines [options] [file...] 19 20 number-lines reads from files or standard input and writes each line to standard 21 output preceded by a line number. It is a simplified version of the unix 'nl' 22 program. It supports one feature 'nl' does not: the ability to treat the first 23 line of files as a header. This is useful when working with tab-separated-value 24 files. If header processing used, a header line is written for the first file, 25 and the header lines are dropped from any subsequent files. 26 27 Examples: 28 number-lines myfile.txt 29 cat myfile.txt | number-lines --header linenum 30 number-lines *.txt 31 32 Options: 33 EOS"; 34 35 /** Container for command line options. 36 */ 37 struct NumberLinesOptions 38 { 39 enum defaultHeaderString = "line"; 40 41 string programName; 42 bool hasHeader = false; /// --H|header 43 string headerString = ""; /// --s|header-string 44 long startNum = 1; /// --n|start-num 45 char delim = '\t'; /// --d|delimiter 46 bool lineBuffered = false; /// --line-buffered 47 bool versionWanted = false; /// --V|version 48 49 /* Returns a tuple. First value is true if command line arguments were successfully 50 * processed and execution should continue, or false if an error occurred or the user 51 * asked for help. If false, the second value is the appropriate exit code (0 or 1). 52 */ 53 auto processArgs (ref string[] cmdArgs) 54 { 55 import std.algorithm : any, each; 56 import std.getopt; 57 import std.path : baseName, stripExtension; 58 59 programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name"; 60 61 try 62 { 63 auto r = getopt( 64 cmdArgs, 65 std.getopt.config.caseSensitive, 66 "H|header", " Treat the first line of each file as a header. The first input file's header is output, subsequent file headers are discarded.", &hasHeader, 67 std.getopt.config.caseInsensitive, 68 "s|header-string", "STR String to use in the header row. Implies --header. Default: 'line'", &headerString, 69 "n|start-number", "NUM Number to use for the first line. Default: 1", &startNum, 70 "d|delimiter", "CHR Character appended to line number, preceding the rest of the line. Default: TAB (Single byte UTF-8 characters only.)", &delim, 71 "line-buffered", " Immediately output every line.", &lineBuffered, 72 std.getopt.config.caseSensitive, 73 "V|version", " Print version information and exit.", &versionWanted, 74 std.getopt.config.caseInsensitive, 75 ); 76 77 if (r.helpWanted) 78 { 79 defaultGetoptPrinter(helpText, r.options); 80 return tuple(false, 0); 81 } 82 else if (versionWanted) 83 { 84 import tsv_utils.common.tsvutils_version; 85 writeln(tsvutilsVersionNotice("number-lines")); 86 return tuple(false, 0); 87 } 88 89 /* Derivations. */ 90 if (headerString.length > 0) hasHeader = true; 91 else headerString = defaultHeaderString; 92 } 93 catch (Exception exc) 94 { 95 stderr.writefln("[%s] Error processing command line arguments: %s", programName, exc.msg); 96 return tuple(false, 1); 97 } 98 return tuple(true, 0); 99 } 100 } 101 102 static if (__VERSION__ >= 2085) extern(C) __gshared string[] rt_options = [ "gcopt=cleanup:none" ]; 103 104 /** Main program. */ 105 int main(string[] cmdArgs) 106 { 107 /* When running in DMD code coverage mode, turn on report merging. */ 108 version(D_Coverage) version(DigitalMars) 109 { 110 import core.runtime : dmd_coverSetMerge; 111 dmd_coverSetMerge(true); 112 } 113 114 NumberLinesOptions cmdopt; 115 auto r = cmdopt.processArgs(cmdArgs); 116 if (!r[0]) return r[1]; 117 try numberLines(cmdopt, cmdArgs[1..$]); 118 catch (Exception exc) 119 { 120 stderr.writefln("Error [%s]: %s", cmdopt.programName, exc.msg); 121 return 1; 122 } 123 124 return 0; 125 } 126 127 /** Implements the primary logic behind number lines. 128 * 129 * Reads lines lines from each file, outputing each with a line number prepended. The 130 * header from the first file is written, the header from subsequent files is dropped. 131 */ 132 void numberLines(const NumberLinesOptions cmdopt, const string[] inputFiles) 133 { 134 import std.conv : to; 135 import std.range; 136 import tsv_utils.common.utils : bufferedByLine, BufferedOutputRange, LineBuffered, ReadHeader; 137 138 immutable LineBuffered isLineBuffered = cmdopt.lineBuffered ? Yes.lineBuffered : No.lineBuffered; 139 immutable ReadHeader useReadHeader = cmdopt.hasHeader ? Yes.readHeader : No.readHeader; 140 141 auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout, isLineBuffered); 142 143 long lineNum = cmdopt.startNum; 144 bool headerWritten = false; 145 146 foreach (filename; (inputFiles.length > 0) ? inputFiles : ["-"]) 147 { 148 auto inputStream = (filename == "-") ? stdin : filename.File(); 149 foreach (fileLineNum, line; 150 inputStream 151 .bufferedByLine!(KeepTerminator.no)(isLineBuffered, useReadHeader) 152 .enumerate(1)) 153 { 154 if (cmdopt.hasHeader && fileLineNum == 1) 155 { 156 if (!headerWritten) 157 { 158 bufferedOutput.appendln(cmdopt.headerString, cmdopt.delim, line); 159 headerWritten = true; 160 161 /* Flush the header immediately. This helps tasks further on in a 162 * unix pipeline detect errors quickly, without waiting for all 163 * the data to flow through the pipeline. Note that an upstream 164 * task may have flushed its header line, so the header may 165 * arrive long before the main block of data. 166 */ 167 bufferedOutput.flush; 168 } 169 } 170 else 171 { 172 bufferedOutput.appendln(lineNum.to!string, cmdopt.delim, line); 173 lineNum++; 174 } 175 } 176 } 177 }