1 /** 2 A simple version of the unix 'nl' program. 3 4 This program is a simpler version of the unix 'nl' (number lines) program. It reads 5 text from files or standard input and adds a line number to each line. 6 7 Copyright (c) 2015-2018, eBay Software Foundation 8 Initially written by Jon Degenhardt 9 10 License: Boost Licence 1.0 (http://boost.org/LICENSE_1_0.txt) 11 */ 12 module tsv_utils.number_lines; 13 14 import std.stdio; 15 import std.typecons : tuple; 16 17 auto helpText = q"EOS 18 Synopsis: number-lines [options] [file...] 19 20 number-lines reads from files or standard input and writes each line to standard 21 output preceded by a line number. It is a simplified version of the unix 'nl' 22 program. It supports one feature 'nl' does not: the ability to treat the first 23 line of files as a header. This is useful when working with tab-separated-value 24 files. If header processing used, a header line is written for the first file, 25 and the header lines are dropped from any subsequent files. 26 27 Examples: 28 number-lines myfile.txt 29 cat myfile.txt | number-lines --header linenum 30 number-lines *.txt 31 32 Options: 33 EOS"; 34 35 /** Container for command line options. 36 */ 37 struct NumberLinesOptions 38 { 39 enum defaultHeaderString = "line"; 40 41 string programName; 42 bool hasHeader = false; // --H|header 43 string headerString = ""; // --s|header-string 44 long startNum = 1; // --n|start-num 45 char delim = '\t'; // --d|delimiter 46 bool versionWanted = false; // --V|version 47 48 /* Returns a tuple. First value is true if command line arguments were successfully 49 * processed and execution should continue, or false if an error occurred or the user 50 * asked for help. If false, the second value is the appropriate exit code (0 or 1). 51 */ 52 auto processArgs (ref string[] cmdArgs) 53 { 54 import std.algorithm : any, each; 55 import std.getopt; 56 import std.path : baseName, stripExtension; 57 58 programName = (cmdArgs.length > 0) ? cmdArgs[0].stripExtension.baseName : "Unknown_program_name"; 59 60 try 61 { 62 auto r = getopt( 63 cmdArgs, 64 std.getopt.config.caseSensitive, 65 "H|header", " Treat the first line of each file as a header. The first input file's header is output, subsequent file headers are discarded.", &hasHeader, 66 std.getopt.config.caseInsensitive, 67 "s|header-string", "STR String to use in the header row. Implies --header. Default: 'line'", &headerString, 68 "n|start-number", "NUM Number to use for the first line. Default: 1", &startNum, 69 "d|delimiter", "CHR Character appended to line number, preceding the rest of the line. Default: TAB (Single byte UTF-8 characters only.)", &delim, 70 std.getopt.config.caseSensitive, 71 "V|version", " Print version information and exit.", &versionWanted, 72 std.getopt.config.caseInsensitive, 73 ); 74 75 if (r.helpWanted) 76 { 77 defaultGetoptPrinter(helpText, r.options); 78 return tuple(false, 0); 79 } 80 else if (versionWanted) 81 { 82 import tsv_utils.common.tsvutils_version; 83 writeln(tsvutilsVersionNotice("number-lines")); 84 return tuple(false, 0); 85 } 86 87 /* Derivations. */ 88 if (headerString.length > 0) hasHeader = true; 89 else headerString = defaultHeaderString; 90 } 91 catch (Exception exc) 92 { 93 stderr.writefln("[%s] Error processing command line arguments: %s", programName, exc.msg); 94 return tuple(false, 1); 95 } 96 return tuple(true, 0); 97 } 98 } 99 100 static if (__VERSION__ >= 2085) extern(C) __gshared string[] rt_options = [ "gcopt=cleanup:none" ]; 101 102 /** Main program. */ 103 int main(string[] cmdArgs) 104 { 105 /* When running in DMD code coverage mode, turn on report merging. */ 106 version(D_Coverage) version(DigitalMars) 107 { 108 import core.runtime : dmd_coverSetMerge; 109 dmd_coverSetMerge(true); 110 } 111 112 NumberLinesOptions cmdopt; 113 auto r = cmdopt.processArgs(cmdArgs); 114 if (!r[0]) return r[1]; 115 try numberLines(cmdopt, cmdArgs[1..$]); 116 catch (Exception exc) 117 { 118 stderr.writefln("Error [%s]: %s", cmdopt.programName, exc.msg); 119 return 1; 120 } 121 122 return 0; 123 } 124 125 /** Implements the primary logic behind number lines. 126 * 127 * Reads lines lines from each file, outputing each with a line number prepended. The 128 * header from the first file is written, the header from subsequent files is dropped. 129 */ 130 void numberLines(in NumberLinesOptions cmdopt, in string[] inputFiles) 131 { 132 import std.conv : to; 133 import std.range; 134 import tsv_utils.common.utils : bufferedByLine, BufferedOutputRange; 135 136 auto bufferedOutput = BufferedOutputRange!(typeof(stdout))(stdout); 137 138 long lineNum = cmdopt.startNum; 139 bool headerWritten = false; 140 foreach (filename; (inputFiles.length > 0) ? inputFiles : ["-"]) 141 { 142 auto inputStream = (filename == "-") ? stdin : filename.File(); 143 foreach (fileLineNum, line; inputStream.bufferedByLine!(KeepTerminator.no).enumerate(1)) 144 { 145 if (cmdopt.hasHeader && fileLineNum == 1) 146 { 147 if (!headerWritten) 148 { 149 bufferedOutput.append(cmdopt.headerString); 150 bufferedOutput.append(cmdopt.delim); 151 bufferedOutput.appendln(line); 152 headerWritten = true; 153 } 154 } 155 else 156 { 157 bufferedOutput.append(lineNum.to!string); 158 bufferedOutput.append(cmdopt.delim); 159 bufferedOutput.appendln(line); 160 lineNum++; 161 } 162 } 163 } 164 }