File: box/coby.go 1 package main 2 3 import ( 4 "errors" 5 "io" 6 "io/fs" 7 "os" 8 "path/filepath" 9 "runtime" 10 "strconv" 11 "sync" 12 ) 13 14 // cobyHeader is the first output line 15 var cobyHeader = []string{ 16 `name`, 17 `bytes`, 18 `runes`, 19 `lines`, 20 `lf`, 21 `crlf`, 22 `spaces`, 23 `tabs`, 24 `trails`, 25 `nulls`, 26 `fulls`, 27 `highs`, 28 } 29 30 // cobyEvent has what the output-reporting task needs to show the results of 31 // a task which has just completed, perhaps unsuccessfully 32 type cobyEvent struct { 33 // Index points to the task's entry in the results-slice 34 Index int 35 36 // Stats has all the byte-related stats 37 Stats cobyStats 38 39 // Err is the completed task's error, or lack of 40 Err error 41 } 42 43 func coby(w writer, r io.Reader, folders []string) error { 44 // show first/heading line right away, to let users know things are 45 // happening 46 for i, s := range cobyHeader { 47 if i > 0 { 48 w.WriteByte('\t') 49 } 50 w.WriteString(s) 51 } 52 // assume an error means later stages/apps in a pipe had enough input and 53 // quit successfully, so quit successfully too 54 if err := endLine(w); err != nil { 55 return err 56 } 57 58 // names has all filepaths given, ignoring repetitions 59 names, ok := findAllFiles(uniqueStrings(folders)) 60 if !ok { 61 return multipleErrors{} 62 } 63 if len(names) == 0 { 64 names = []string{`-`} 65 } 66 67 events := make(chan cobyEvent) 68 go cobyHandleInputs(names, events, r) 69 if !cobyHandleOutput(w, len(names), events) { 70 return multipleErrors{} 71 } 72 return nil 73 } 74 75 // cobyHandleInputs launches all the tasks which do the actual work, limiting 76 // how many inputs are being worked on at the same time 77 func cobyHandleInputs(names []string, events chan cobyEvent, r io.Reader) { 78 // allow output-reporter task to end, and thus the app 79 defer close(events) 80 81 // permissions limits how many worker tasks can be active at the same 82 // time: when given many filepaths to work on, rate-limiting avoids 83 // a massive number of concurrent tasks which read and process input 84 permissions := make(chan struct{}, runtime.NumCPU()) 85 defer close(permissions) 86 87 var inputs sync.WaitGroup 88 for i := range names { 89 // wait until some concurrency-room is available 90 permissions <- struct{}{} 91 inputs.Add(1) 92 93 go func(i int) { 94 defer inputs.Done() 95 res, err := cobyHandleInput(names[i], r) 96 events <- cobyEvent{i, res, err} 97 <-permissions 98 }(i) 99 } 100 101 // wait for all inputs, before closing the `events` channel 102 inputs.Wait() 103 } 104 105 // cobyHandleInput handles each work-item for func handleInputs 106 func cobyHandleInput(path string, r io.Reader) (cobyStats, error) { 107 var res cobyStats 108 res.name = path 109 110 if path == `-` { 111 err := res.updateStats(r) 112 return res, err 113 } 114 115 f, err := os.Open(path) 116 if err != nil { 117 res.result = resultError 118 // on windows, file-not-found error messages may mention `CreateFile`, 119 // even when trying to open files in read-only mode 120 return res, errors.New(`can't open file named ` + path) 121 } 122 defer f.Close() 123 124 err = res.updateStats(f) 125 return res, err 126 } 127 128 // cobyHandleOutput asynchronously updates output as results are known, whether 129 // it's errors or successful results; returns whether it succeeded, which 130 // means no errors happened 131 func cobyHandleOutput(w writer, rescount int, events chan cobyEvent) (ok bool) { 132 results := make([]cobyStats, rescount) 133 134 // keep track of which tasks are over, so that on each event all leading 135 // results which are ready are shown: all of this ensures prompt output 136 // updates as soon as results come in, while keeping the original order 137 // of the names/filepaths given 138 resultsLeft := results 139 140 for v := range events { 141 results[v.Index] = v.Stats 142 if v.Err != nil { 143 ok = false 144 w.Flush() 145 showError(v.Err) 146 147 // stay in the current loop, in case this failure was keeping 148 // previous successes from showing up 149 } 150 151 n := countLeadingReady(resultsLeft) 152 153 for _, res := range resultsLeft[:n] { 154 if err := cobyShowResult(w, res); err != nil { 155 // assume later stages/apps in a pipe had enough input and 156 // quit successfully, so quit successfully too 157 return true 158 } 159 } 160 resultsLeft = resultsLeft[n:] 161 162 // flush output-buffer only if anything new was shown 163 if n > 0 { 164 w.Flush() 165 } 166 } 167 168 return ok 169 } 170 171 // cobyShowResult does what it says 172 func cobyShowResult(w writer, res cobyStats) error { 173 if res.result == resultError { 174 return nil 175 } 176 177 var buf [64]byte 178 w.WriteString(res.name) 179 w.Write([]byte{'\t'}) 180 w.Write(strconv.AppendUint(buf[:0], uint64(res.bytes), 10)) 181 w.Write([]byte{'\t'}) 182 w.Write(strconv.AppendUint(buf[:0], uint64(res.runes), 10)) 183 w.Write([]byte{'\t'}) 184 w.Write(strconv.AppendUint(buf[:0], uint64(res.lines), 10)) 185 w.Write([]byte{'\t'}) 186 w.Write(strconv.AppendUint(buf[:0], uint64(res.lf), 10)) 187 w.Write([]byte{'\t'}) 188 w.Write(strconv.AppendUint(buf[:0], uint64(res.crlf), 10)) 189 w.Write([]byte{'\t'}) 190 w.Write(strconv.AppendUint(buf[:0], uint64(res.spaces), 10)) 191 w.Write([]byte{'\t'}) 192 w.Write(strconv.AppendUint(buf[:0], uint64(res.tabs), 10)) 193 w.Write([]byte{'\t'}) 194 w.Write(strconv.AppendUint(buf[:0], uint64(res.trailing), 10)) 195 w.Write([]byte{'\t'}) 196 w.Write(strconv.AppendUint(buf[:0], uint64(res.nulls), 10)) 197 w.Write([]byte{'\t'}) 198 w.Write(strconv.AppendUint(buf[:0], uint64(res.fulls), 10)) 199 w.Write([]byte{'\t'}) 200 w.Write(strconv.AppendUint(buf[:0], uint64(res.highs), 10)) 201 _, err := w.Write([]byte{'\n'}) 202 return err 203 } 204 205 // uniqueStrings ensures items only appear once in the result, keeping the 206 // original slice unchanged 207 func uniqueStrings(src []string) []string { 208 var unique []string 209 got := make(map[string]struct{}) 210 for _, s := range src { 211 if _, ok := got[s]; ok { 212 continue 213 } 214 unique = append(unique, s) 215 got[s] = struct{}{} 216 } 217 return unique 218 } 219 220 // findAllFiles does what it says, given a mix of file/folder paths, finding 221 // all files recursively in the case of folders 222 func findAllFiles(paths []string) (found []string, ok bool) { 223 var unique []string 224 got := make(map[string]struct{}) 225 ok = true 226 227 for _, root := range paths { 228 // a dash means standard input 229 if root == `-` { 230 if _, ok := got[root]; ok { 231 continue 232 } 233 234 unique = append(unique, root) 235 got[root] = struct{}{} 236 continue 237 } 238 239 _, err := os.Stat(root) 240 if os.IsNotExist(err) { 241 ok = false 242 // on windows, file-not-found error messages may mention `CreateFile`, 243 // even when trying to open files in read-only mode 244 err := errors.New(`can't find file/folder named ` + root) 245 showError(err) 246 continue 247 } 248 249 err = filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { 250 if err != nil { 251 return err 252 } 253 254 if d.IsDir() { 255 return nil 256 } 257 258 if _, ok := got[path]; ok { 259 return nil 260 } 261 262 unique = append(unique, path) 263 got[path] = struct{}{} 264 return nil 265 }) 266 267 if err != nil { 268 ok = false 269 showError(err) 270 } 271 } 272 273 return unique, ok 274 } 275 276 // isZero enables branchless-counting, when xor-compared bytes are used 277 // as indices for it 278 var isZero = [256]byte{1} 279 280 // counter makes it easy to change the int-size of almost all counters 281 type counter int 282 283 // cobyStatResult constrains possible result-states/values in type stats 284 type cobyStatResult int 285 286 const ( 287 // resultPending is the default not-yet-ready result-status 288 resultPending = cobyStatResult(0) 289 290 // resultError signals result should show as an error, instead of data 291 resultError = cobyStatResult(1) 292 293 // resultSuccess means result can be shown 294 resultSuccess = cobyStatResult(2) 295 ) 296 297 // cobyStats has all the size-cobyStats for some input, as well as a way to 298 // skip showing results, in case of an error such as `file not found` 299 type cobyStats struct { 300 // bytes counts all bytes read 301 bytes int 302 303 // lines counts lines, and is 0 only when the byte-count is also 0 304 lines counter 305 306 // runes counts utf-8 sequences, each of which can use up to 4 bytes and 307 // is usually a complete symbol: `emoji` country-flags are commonly-used 308 // counter-examples, as these `symbols` need 2 runes, using 8 bytes each 309 runes counter 310 311 // maxWidth is maximum byte-width of lines, excluding carriage-returns 312 // and/or line-feeds 313 maxWidth counter 314 315 // nulls counts all-bits-off bytes 316 nulls counter 317 318 // fulls counts all-bits-on bytes 319 fulls counter 320 321 // highs counts bytes with their `top` (highest-order) bit on 322 highs counter 323 324 // spaces counts ASCII spaces 325 spaces counter 326 327 // tabs counts ASCII tabs 328 tabs counter 329 330 // trailing counts lines with trailing spaces in them 331 trailing counter 332 333 // lf counts ASCII line-feeds as their own byte-values: this means its 334 // value will always be at least the same as field `crlf` 335 lf counter 336 337 // crlf counts ASCII CRLF byte-pairs 338 crlf counter 339 340 // name is the filepath of the file/source these stats are about 341 name string 342 343 // results keeps track of whether results are valid and/or ready 344 result cobyStatResult 345 } 346 347 // updateStats does what it says, reading everything from a reader 348 func (res *cobyStats) updateStats(r io.Reader) error { 349 err := res.updateUsing(r) 350 if err == io.EOF { 351 err = nil 352 } 353 354 if err == nil { 355 res.result = resultSuccess 356 } else { 357 res.result = resultError 358 } 359 return err 360 } 361 362 // updateUsing helps func updateStats do its job 363 func (res *cobyStats) updateUsing(r io.Reader) error { 364 var width counter 365 var highRun int 366 var prev1, prev2 byte 367 var buf [16 * 1024]byte 368 var tallies [256]uint64 369 370 for { 371 n, err := r.Read(buf[:]) 372 if n < 1 { 373 if err == io.EOF { 374 res.tabs = counter(tallies['\t']) 375 res.spaces = counter(tallies[' ']) 376 res.lf = counter(tallies['\n']) 377 res.nulls = counter(tallies[0]) 378 res.fulls = counter(tallies[255]) 379 for i := 128; i < 256; i++ { 380 res.highs += counter(tallies[i]) 381 } 382 return res.handleEnd(width, prev1, highRun) 383 } 384 return err 385 } 386 387 res.bytes += n 388 chunk := buf[:n] 389 390 for _, b := range chunk { 391 // count values without branching, because it's fun 392 tallies[b]++ 393 394 // handle non-ASCII runes, assuming input is valid UTF-8 395 if b >= 128 { 396 if highRun < 3 { 397 highRun++ 398 } else { 399 highRun = 0 400 res.runes++ 401 width++ 402 } 403 404 prev2 = prev1 405 prev1 = b 406 continue 407 } 408 409 // handle line-feeds 410 if b == '\n' { 411 res.lines++ 412 413 crlf := count(prev1, '\r') 414 res.crlf += crlf 415 416 // count lines with trailing spaces, whether these end with 417 // a CRLF byte-pair or just a line-feed byte 418 res.trailing += count(prev1, ' ') 419 res.trailing += crlf & count(prev2, ' ') 420 421 // exclude any CR from the current line's width-count 422 width -= crlf 423 if res.maxWidth < width { 424 res.maxWidth = width 425 } 426 427 prev2 = prev1 428 prev1 = b 429 430 res.runes++ 431 highRun = 0 432 width = 0 433 continue 434 } 435 436 prev2 = prev1 437 prev1 = b 438 439 res.runes++ 440 highRun = 0 441 width++ 442 } 443 } 444 } 445 446 // handleEnd fixes/finalizes stats when input data end; this func is only 447 // meant to be used by func updateStats, since it takes some of the latter's 448 // local variables 449 func (res *cobyStats) handleEnd(width counter, prev1 byte, highRun int) error { 450 if prev1 == ' ' { 451 res.trailing++ 452 } 453 454 if res.maxWidth < width { 455 res.maxWidth = width 456 } 457 458 // avoid reporting 0 lines with a non-0 byte-count: this is unlike the 459 // standard cmd-line tool `wc` 460 if res.bytes > 0 && prev1 != '\n' { 461 res.lines++ 462 } 463 464 if highRun > 0 { 465 res.runes++ 466 } 467 return nil 468 } 469 470 // count checks if 2 bytes are the same, returning either 0 or 1, which can 471 // be added directly/branchlessly to totals 472 func count(x, y byte) counter { 473 return counter(isZero[x^y]) 474 } 475 476 // countLeadingReady finds how many items are ready to show at the start of a 477 // results-slice, which ensures output matches the original item-order 478 func countLeadingReady(values []cobyStats) int { 479 for i, v := range values { 480 if v.result == resultPending { 481 return i 482 } 483 } 484 return len(values) 485 } File: box/coby_stats.go 1 package main 2 3 import ( 4 "strings" 5 "testing" 6 ) 7 8 func TestCount(t *testing.T) { 9 for x := 0; x < 256; x++ { 10 for y := 0; y < 256; y++ { 11 var exp counter 12 if x == y { 13 exp = 1 14 } 15 16 if got := count(byte(x), byte(y)); got != exp { 17 t.Fatalf(`%d, %d: expected %v, but got %v`, x, y, exp, got) 18 return 19 } 20 } 21 } 22 } 23 24 func TestCountLeadingReady(t *testing.T) { 25 for size := 0; size <= 20; size++ { 26 for exp := 0; exp < size; exp++ { 27 values := make([]cobyStats, size) 28 for i := 0; i < exp; i++ { 29 v := resultSuccess 30 if i%2 == 1 { 31 v = resultError 32 } 33 values[i].result = v 34 } 35 36 if got := countLeadingReady(values); got != exp { 37 const fs = `size %d: expected %d, instead of %d` 38 t.Fatalf(fs, size, exp, got) 39 } 40 } 41 } 42 } 43 44 func TestStats(t *testing.T) { 45 var tests = []struct { 46 Input string 47 Expected cobyStats 48 }{ 49 { 50 ``, 51 cobyStats{}, 52 }, 53 { 54 `abc`, 55 cobyStats{lines: 1, runes: 3, maxWidth: 3}, 56 }, 57 { 58 "abc\tdef\r\n", 59 cobyStats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1}, 60 }, 61 { 62 "abc\tdef\r\n", 63 cobyStats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1}, 64 }, 65 { 66 "abc\tdef \r\n123\t456 789 ", 67 cobyStats{ 68 lines: 2, runes: 23, maxWidth: 13, 69 spaces: 4, tabs: 2, trailing: 2, lf: 1, crlf: 1, 70 }, 71 }, 72 } 73 74 for _, tc := range tests { 75 t.Run(tc.Input, func(t *testing.T) { 76 var got cobyStats 77 err := got.updateStats(strings.NewReader(tc.Input)) 78 if err != nil { 79 t.Error(err) 80 return 81 } 82 83 tc.Expected.bytes = len(tc.Input) 84 tc.Expected.result = resultSuccess 85 if got != tc.Expected { 86 t.Fatalf("expected\n%#v,\ngot\n%#v", tc.Expected, got) 87 return 88 } 89 }) 90 } 91 } File: box/deflac.go 1 package main 2 3 import ( 4 "errors" 5 "io" 6 ) 7 8 // deflac eventually wants to be a streaming FLAC-to-WAV audio decoder 9 func deflac(w writer, r io.Reader) error { 10 // return emitTone(w, 2.0, 440) 11 return errors.New(`not ready / this tool seems a crazy idea`) 12 } File: box/help.go 1 package main 2 3 import ( 4 "errors" 5 "io" 6 "sort" 7 "strings" 8 "unicode/utf8" 9 ) 10 11 var name2help = map[string]string{ 12 `abs`: `abs 13 Turn all lines from (assumed) filepath names into absolute paths`, 14 15 `args`: `args [args...] 16 Emit each argument given on its own line`, 17 18 `base`: `base 19 Transform each line, which is assumed to be a filepath, by only keeping the 20 final part of the (assumed) filepath, sometimes called the "base-name"`, 21 22 `base64`: `base64 23 Turn general bytes into a base64-encoded line of text`, 24 25 `begin`: `begin [lines...] 26 Start output with the lines given as arguments, followed by the input lines`, 27 28 `begintsv`: `begintsv [items...] 29 Start output with a line of tab-separated values (TSV), followed by the input 30 lines, which are (presumably) TSV-type lines too`, 31 32 `bh`: `bh [every = 5] 33 Breathe Header is like Breathe Lines (bl), except it also adds an extra empty 34 line after the first/header line; when not given a number, the default is to 35 breathe every 5 lines, starting after the first line`, 36 37 `bl`: `bl [every = 5] 38 Breathe Lines adds an extra empty line every few, so that long blocks of text 39 lines can visually "breathe"; when not given a number, the default is every 5 40 lines`, 41 42 `book`: `book [page height] 43 Lay out lines on 2 side-by-side columns, making the result look like pairs of 44 facing pages in a book`, 45 46 `bytefreq`: `bytefreq 47 Show the frequency of each byte read from the main input`, 48 49 `choplf`: `choplf 50 Avoid emitting the last line-feed, if present in the original input`, 51 52 `coby`: `coby [folders...] 53 COunt BYtes is a tool which finds various byte-related stats/counts from all 54 files (found recursively) in all the folder names given; results are emitted 55 as TSV lines, where the first line is a label/header line`, 56 57 `compose`: `compose [separator] [tools/args/seps...] 58 Run multiple tools concurrently, chaining each tool's input to the previous 59 tool's output; the result is similar to how shell pipes work, except that no 60 external processes/tasks are involved, as everything happens inside this app`, 61 62 `datauri`: `datauri [mime type...] 63 Encode input bytes into their (base64-encoded) data-URI representation: when 64 not given an explicit MIME-type (or single-word shortcut for it), this tool 65 tries to auto-detect the media/data-type from the first few bytes, since all 66 data-URIs start by declaring a MIME-type for their base64 payload`, 67 68 `debase64`: `debase64 69 Decode base64-encoded text into general bytes`, 70 71 `debz`: `debz 72 Decode bzip2-compressed data`, 73 74 `decsv`: `decsv 75 Turn CSV lines into a "JSONS" (JSON Strings) array, which is an array of JSON 76 objects where the only values are JSON strings; the only other type of value 77 is null, which is reserved for missing trailing row-values`, 78 79 `deflac`: `deflac 80 A tool which eventually wants to be a streaming FLAC-to-WAV audio decoder`, 81 82 `degz`: `degz 83 Decode gzip-compressed data`, 84 85 `dejsonl`: `dejsonl 86 Convert JSON Lines (JSONL) into a proper JSON array: while JSONL isn't valid 87 JSON as a whole, each line in it is valid JSON, a property which allows its 88 use for log-style line-based JSON-streaming applications`, 89 90 `delay`: `delay [seconds] 91 Delay input lines one by one, by waiting the number of seconds given before 92 emitting each line: the delay given (in seconds) can be a round number, or 93 a decimal number`, 94 95 `detab`: `detab [tabstop = 4] 96 Expand all tabs using runs of spaces up to the number given; when not given 97 a number, the default is to use up to 4 spaces for each tab`, 98 99 `dir`: `dir 100 Transform each line, which is assumed to be a filepath, by only keeping the 101 directory/folder part of the (assumed) filepath`, 102 103 `div`: `div [x] [y...] 104 Divide 2 numbers in 3 ways, showing their proper fraction, its reciprocal, 105 and the complement of the proper fraction; when given just 1 number, its 106 reciprocal is shown`, 107 108 `drop`: `drop [what...] 109 Ignore all occurrences of all substrings given, where matching is always 110 case-sensitive, and each substring is fully "dropped" before doing the same 111 with later ones, in case multiple args are given`, 112 113 `end`: `end [lines...] 114 Start output with the input lines, ending with the lines given as arguments`, 115 116 `endtsv`: `endtsv [items...] 117 Start output with the input lines, which are (presumably) TSV-type lines, 118 ending with a line of tab-separated values (TSV)`, 119 120 `files`: `files [folders...] 121 Find all files in all the folders given, recursively; when not given any 122 folder name, the current folder is searched by default`, 123 124 `first`: `first [lines = 1] 125 Keep only up to the first n lines`, 126 127 `folders`: `folders [folders...] 128 Find all folders in all the folders given, recursively; when not given any 129 folder name, the current folder is searched by default`, 130 131 `gz`: `gz 132 Gzip-compress input bytes`, 133 134 `help`: `help [tools...] 135 Show help messages for all the tool names given; when not given any names, it 136 shows the app's main help message, followed by the help messages for all the 137 tools in it`, 138 139 `hex`: `hex 140 Encode input bytes into a line of ASCII-based hexadecimal symbols`, 141 142 `hold`: `hold 143 Read all input bytes, holding everything, and start copying them all into 144 the main output only after the last input byte was read`, 145 146 `id3pic`: `id3pic 147 Isolate thumbnail (picture) payload from a media stream; embedded ID3-format 148 pictures are often part of MP3 streams, especially "podcast" episodes`, 149 150 `identity`: `identity 151 Copy all input bytes, exactly as given`, 152 153 `indent`: `indent [spaces = 2] 154 Indent non-empty lines with the leading number of spaces given; when not 155 given a number, the default is to indent using 2 leading spaces`, 156 157 `items`: `items 158 Split items/words from each line, emitting each item into its own line: items 159 are split by 1 or more spaces and/or tabs`, 160 161 `join`: `join [separator] 162 Join all input lines into a single output line, separating its items with the 163 string given as the tool's only argument; to join input lines into a single 164 line of tab-separated values, use tool "lineup" with 0 or a negative value`, 165 166 `json0`: `json0 167 Squeeze JSON input into a smaller payload, ignoring unneeded spaces; this 168 tool also adapts almost-JSON into valid JSON, as it ignores trailing commas 169 in arrays and objects, comments (not allowed in JSON), and even turns strings 170 surrounded by single quotes into double-quoted strings`, 171 172 `jsonl`: `jsonl 173 Convert proper JSON into JSON Lines (JSONL): while JSONL isn't valid JSON as 174 a whole, each line in it is valid JSON, a property which allows its use for 175 log-style line-based JSON-streaming applications`, 176 177 `junk`: `junk [size] 178 Emit the number of pseudo-random bytes given`, 179 180 `last`: `last [lines = 1] 181 Keep only up to the last n lines`, 182 183 `leak`: `leak [style = plain] 184 Emit input lines both to stderr and stdout, thus "leaking" contents along a 185 shell pipe; this tool can help debug pipes involving many steps`, 186 187 `limit`: `limit [max bytes] 188 Limit data to the maximum byte-count given`, 189 190 `lines`: `lines 191 Ignore leading UTF-8 BOM (Byte-Order Mark) on the first line, if present, and 192 any trailing carriage-returns at the end of each line; the final line is also 193 guaranteed to end with a line-feed, no matter the input`, 194 195 `lineup`: `lineup [size = 0] 196 Group lines into lines with up to n tab-separated items in them; when not 197 given a number, or when the number given is 0 or negative, all input lines 198 will end up in a single line of tab-separated items`, 199 200 `links`: `links 201 Find all web-like (HTTP/HTTPS) links found in each line, including multiple 202 links per line, emitting each match on its own output line`, 203 204 `lower`: `lower 205 Lowercase all symbols in all lines`, 206 207 `mime`: `mime 208 Try to auto-detect the MIME-type from the first few input bytes`, 209 210 `n`: `n [start = 1] 211 Number each line, using a tab to separate the number and the actual line-text 212 which follows it; when not given a number, the default is to start counting 213 lines from 1`, 214 215 `nj`: `nj 216 Nice JSON renders JSON data as indented ANSI-styled/colored output`, 217 218 `nn`: `nn [style = gray] 219 Restyle all numbers of at least 4 digits, using alternating ANSI-styles, to 220 make long numbers easier to parse visually; especially useful when dealing 221 with many such numbers at once; when not given a style name, the default is 222 to use a light gray color`, 223 224 `noempty`: `noempty 225 Ignore all empty lines`, 226 227 `nothing`: `nothing 228 Do nothing, which means all input is ignored, and no output is emitted`, 229 230 `now`: `now 231 Show the current date and time`, 232 233 `numbers`: `numbers 234 Show all valid numbers found in all lines, each match shown on its own line`, 235 236 `plain`: `plain 237 Ignore all ANSI-style sequences, making plain-text strictly plain/unstyled`, 238 239 `primes`: `primes [count] 240 Find the given number of prime numbers in increasing order, showing each 241 prime on its own output line`, 242 243 `prun`: `prun 244 Parallel RUN does what it says, where commands come from lines from the main 245 input`, 246 247 `range`: `range [start line] [stop line...] 248 Emit only the lines in the 1-based inclusive range given, ignoring all other 249 lines; when given only 1 number, the default is to emit all lines until the 250 end, once/if started`, 251 252 `realign`: `realign 253 Realign lines by padding "columns" with enough spaces`, 254 255 `reprose`: `reprose [max runes = 80] 256 Reflow lines of plain-text prose, trying to emit lines not wider than the 257 rune-count given, even if that's not always possible, depending on the input 258 lines being processed; when not given a rune-count, the default is 80 runes 259 max per line`, 260 261 `restyle`: `restyle [style] 262 Color/style each input line using ANSI-styles; each resulting line starts 263 with the style for the name given, and ends with a style-reset`, 264 265 `reuse`: `reuse [separator] [tools/args/seps...] 266 Run multiple tools in sequence, reusing all input bytes for each tool run`, 267 268 `sbs`: `sbs [columns = 0] 269 Side By Side tries to lay out input lines as multiple aligned columns, so 270 that more lines of text can fit a single screen; when not given a number of 271 columns, or when given 0 or a negative number, this tool tries to fit as 272 many columns as possible on a 80-symbols-wide width-limit`, 273 274 `sha1`: `sha1 275 Encode/hash input bytes into SHA-1`, 276 277 `sha256`: `sha256 278 Encode/hash input bytes into SHA-256`, 279 280 `sha512`: `sha512 281 Encode/hash input bytes into SHA-512`, 282 283 `size`: `size 284 Count how many bytes the input has`, 285 286 `skip`: `skip [lines = 1] 287 Skip/ignore the first few/many lines, up to the number given; when not given 288 a number, the default is to skip the first line, if present`, 289 290 `skiplast`: `skiplast [lines = 1] 291 Skip/ignore the last few/many lines, up to the number given; when not given 292 a number, the default is to skip only the very last line`, 293 294 `soak`: `soak 295 Read all data to the last byte, before starting to copy everything to the 296 main output`, 297 298 `split`: `split [separators...] 299 Split lines using any of the separators given, emitting each result on its 300 own line, using the earliest separator found on each step; when not given 301 any separators, items are split by 1 or more spaces and/or tabs`, 302 303 `splitany`: `splitany [separators] 304 Run the "split" tool using the symbols from the single argument given`, 305 306 `squeeze`: `squeeze 307 Trim lines very aggressively, which also means squishing runs of multiple 308 spaces into single ones, besides the usual space-trimming at both ends`, 309 310 `stomp`: `stomp 311 Turn runs of empty lines into single empty lines, effectively squeezing 312 paragraphs vertically, so to speak`, 313 314 `strings`: `strings 315 Find all ASCII-like runs of bytes from the input, emitting each match on 316 its own output line`, 317 318 `symbols`: `symbols [names] 319 Show unicode symbols matched by (common) name: each match is shown on its 320 own output line; when not given any name, name-value pairs are shown as 321 tab-separated lines`, 322 323 `tally`: `tally 324 Tally all unique lines, showing the reverse-sorted results at the end, 325 each line consisting of the tally count, a tab, and the line/value`, 326 327 `teletype`: `teletype 328 Simulate the output cadence of old teletype machines`, 329 330 `timer`: `timer [command...] [args...] 331 Run a live timer, showing the time elapsed: any arguments are optional, but 332 when they're given, a subtask is run using those, ensuring its own stderr 333 doesn't clash with the timer info being constantly updated also on stderr`, 334 335 `title`: `title 336 Uppercase the first symbol in all lines, lowercasing all later symbols`, 337 338 `today`: `today 339 Show the current date`, 340 341 `tone`: `tone [seconds = 2] [frequency = 440] 342 Emit a simple sound tone in WAV format: the optional time is in seconds, the 343 optional frequency is in Hertz, both arguments allowing decimal values`, 344 345 `tools`: `tools 346 Show all tools from this app, along with the first line from their description 347 message; full descriptions for tools are available using the "help" tool`, 348 349 `topfiles`: `topfiles [folders...] 350 Find all top-level files in all the folders given; when not given any folder 351 name, the current folder is searched by default`, 352 353 `topfolders`: `topfolders [folders...] 354 Find all top-level folders in all the folders given; when not given any folder 355 name, the current folder is searched by default`, 356 357 `trim`: `trim 358 Ignore space-like symbols at both end of lines`, 359 360 `trimend`: `trimend 361 Ignore space-like symbols at the end of lines`, 362 363 `tsv`: `tsv 364 Emit each line's tab-separated item on its own line`, 365 366 `unique`: `unique 367 Avoid repeating any previous line`, 368 369 `urify`: `urify 370 URI-encode each input line`, 371 372 `utf8`: `utf8 373 Turn UTF-16-encoded text into UTF-8, or ignore leading UTF-8 BOM in UTF-8 374 text: this is one of the few text-oriented tools which keeps CRLF byte-pairs 375 as found, instead of turning them into single line-feeds`, 376 377 `wait`: `wait [seconds] [tool/args...] 378 Wait the number of seconds given, before running an optional tool: the delay 379 given (in seconds) can be a round number, or a decimal number`, 380 } 381 382 func help(w writer, r io.Reader, args []string) error { 383 if len(args) == 0 { 384 w.WriteString(info) 385 386 w.WriteString("\n\nAliases\n\n") 387 388 maxw := 0 389 aliases := make([]string, 0, len(toolAliases)) 390 for name := range toolAliases { 391 aliases = append(aliases, name) 392 if w := utf8.RuneCountInString(name); maxw < w { 393 maxw = w 394 } 395 } 396 397 sort.SliceStable(aliases, func(i, j int) bool { 398 x := aliases[i] 399 y := aliases[j] 400 401 diff := strings.Compare(toolAliases[x], toolAliases[y]) 402 if diff != 0 { 403 return diff < 0 404 } 405 return strings.Compare(x, y) < 0 406 }) 407 408 for _, name := range aliases { 409 w.WriteString(` `) 410 w.WriteString(name) 411 writeSpaces(w, maxw-utf8.RuneCountInString(name)) 412 w.WriteString(` `) 413 w.WriteString(toolAliases[name]) 414 w.WriteByte('\n') 415 } 416 417 args = make([]string, 0, len(name2tool)) 418 for name := range name2tool { 419 args = append(args, name) 420 } 421 sort.Strings(args) 422 423 w.WriteString("\n\nTools Available\n\n\n") 424 } 425 426 for i, name := range args { 427 key, _ := dealiasToolName(name) 428 msg, ok := name2help[key] 429 if !ok { 430 return errors.New(`no tool named ` + name) 431 } 432 433 if i > 0 { 434 w.WriteByte('\n') 435 w.WriteByte('\n') 436 } 437 438 if err := showToolHelpMessage(w, msg); err != nil { 439 return err 440 } 441 } 442 443 return nil 444 } 445 446 func tools(w writer, r io.Reader, args []string) error { 447 keys := make([]string, 0, len(toolAliases)+len(name2tool)) 448 for alias := range toolAliases { 449 keys = append(keys, alias) 450 } 451 for name := range name2tool { 452 keys = append(keys, name) 453 } 454 455 sort.Strings(keys) 456 457 for _, k := range keys { 458 name := k 459 if s, ok := toolAliases[k]; ok { 460 name = s 461 } 462 463 help := name2help[name] 464 if i := strings.IndexByte(help, '\n'); i >= 0 { 465 help = help[:i] 466 } 467 468 w.WriteString(k) 469 w.WriteByte('\t') 470 w.WriteString(help) 471 472 if err := endLine(w); err != nil { 473 return err 474 } 475 } 476 477 return nil 478 } 479 480 func showToolHelpMessage(w writer, s string) error { 481 i := strings.IndexByte(s, '\n') 482 if i < 0 { 483 w.WriteString(s) 484 return endLine(w) 485 } 486 487 w.WriteString(s[:i]) 488 s = s[i+1:] 489 if err := endLine(w); err != nil { 490 return err 491 } 492 493 for len(s) > 0 { 494 line := s 495 i := strings.IndexByte(s, '\n') 496 if i >= 0 { 497 line = s[:i] 498 s = s[i+1:] 499 } else { 500 s = `` 501 } 502 503 if len(line) > 0 { 504 w.WriteString(` `) 505 } 506 w.WriteString(line) 507 508 if err := endLine(w); err != nil { 509 return err 510 } 511 } 512 513 return nil 514 } File: box/help_test.go 1 package main 2 3 import ( 4 "testing" 5 ) 6 7 func TestHelpNames(t *testing.T) { 8 for name := range name2help { 9 if _, ok := name2tool[name]; !ok { 10 t.Fatalf(`%s: not a tool name`, name) 11 return 12 } 13 } 14 15 for name := range name2tool { 16 if s, ok := name2help[name]; !ok || len(s) == 0 { 17 t.Fatalf(`%s: tool has no help message`, name) 18 return 19 } 20 } 21 } File: box/id3pictures.go 1 package main 2 3 import ( 4 "bufio" 5 "encoding/binary" 6 "errors" 7 "io" 8 "mime" 9 ) 10 11 // id3pic isolates the thumbnail bytes from the id3/mp3 stream given 12 func id3pic(w writer, r io.Reader) error { 13 _, err := pickID3Picture(w, r) 14 if err == io.EOF { 15 return errors.New(`no thumbnail found`) 16 } 17 return err 18 } 19 20 // pickID3Picture isolates the thumbnail bytes from the id3/mp3 stream given, 21 // also returning the detected MIME-type 22 func pickID3Picture(w io.Writer, r io.Reader) (mimetype string, err error) { 23 // http://www.unixgods.org/Ruby/ID3/docs/ID3_comparison.html 24 25 br := bufio.NewReader(r) 26 27 for { 28 b, err := br.ReadByte() 29 if err != nil { 30 return ``, err 31 } 32 33 switch b { 34 case 'A': 35 // check for an `APIC` section 36 ok, err := matchBytes(br, []byte{'P', 'I', 'C'}) 37 if err != nil { 38 return ``, err 39 } 40 if ok { 41 return handleAPIC(w, br) 42 } 43 44 case 'P': 45 // check for a `PIC` section 46 ok, err := matchBytes(br, []byte{'I', 'C'}) 47 if err != nil { 48 return ``, err 49 } 50 if ok { 51 return handlePIC(w, br) 52 } 53 } 54 } 55 } 56 57 // matchBytes is used by func id3Picture to skip right past the byte-sequence 58 // given 59 func matchBytes(br *bufio.Reader, data []byte) (bool, error) { 60 cur := data[:] 61 62 for { 63 if len(cur) == 0 { 64 return true, nil 65 } 66 67 b, err := br.ReadByte() 68 if err != nil { 69 return false, err 70 } 71 72 if b != cur[0] { 73 err = br.UnreadByte() 74 return false, err 75 } 76 77 cur = cur[1:] 78 } 79 } 80 81 // handleAPIC is used by func id3Picture 82 func handleAPIC(w io.Writer, br *bufio.Reader) (mimeType string, err error) { 83 // section-size seems stored as 4 little-endian bytes 84 var size uint32 85 err = binary.Read(br, binary.LittleEndian, &size) 86 if err != nil { 87 const msg = `failed to detect thumbnail-payload size` 88 return ``, errors.New(msg) 89 } 90 91 b, err := br.ReadByte() 92 if err != nil { 93 const msg = `failed to detect text-encoding of thumbnail meta-data` 94 return ``, errors.New(msg) 95 } 96 if b != 0 { 97 const msg = `unsupported text-encoding of thumbnail meta-data` 98 return ``, errors.New(msg) 99 } 100 101 kind, n, err := getThumbnailTypeAPIC(br) 102 if err != nil { 103 const msg = `failed to sync to start of thumbnail data` 104 return ``, errors.New(msg) 105 } 106 107 mimeType = string(kind) 108 size -= uint32(n) 109 110 _, err = br.ReadByte() 111 if err != nil { 112 const msg = `failed to detect picture-type of thumbnail meta-data` 113 return ``, errors.New(msg) 114 } 115 size-- 116 117 // skip a null-delimited string 118 comment, err := br.ReadString(0) 119 if err != nil { 120 const msg = `failed to read thumbnail-payload description` 121 return ``, errors.New(msg) 122 } 123 size -= uint32(len(comment)) 124 125 _, err = io.Copy(w, io.LimitReader(br, int64(size))) 126 if err != nil { 127 return mimeType, noMoreOutput{} 128 } 129 return mimeType, nil 130 } 131 132 // handlePIC is used by func id3Picture 133 func handlePIC(w io.Writer, br *bufio.Reader) (mimeType string, err error) { 134 // http://www.unixgods.org/Ruby/ID3/docs/id3v2-00.html#PIC 135 136 var buf [8]byte 137 n, err := br.Read(buf[:3]) 138 if err != nil || n != 3 { 139 const msg = `failed to detect thumbnail-payload size` 140 return ``, errors.New(msg) 141 } 142 143 // thumbnail-payload-size seems stored as 3 big-endian bytes 144 var size uint32 145 size += 256 * 256 * uint32(buf[0]) 146 size += 256 * uint32(buf[1]) 147 size += uint32(buf[2]) 148 149 // skip the text encoding 150 n, err = br.Read(buf[:5]) 151 if err != nil || n != 5 { 152 const msg = `failed to read thumbnail-payload type` 153 return ``, errors.New(msg) 154 } 155 156 // skip a null-delimited string 157 _, err = br.ReadString(0) 158 if err != nil { 159 const msg = `failed to read thumbnail-payload description` 160 return ``, errors.New(msg) 161 } 162 163 var ext [4]byte 164 ext[0] = '.' 165 ext[1] = buf[1] 166 ext[2] = buf[2] 167 ext[3] = buf[3] 168 169 // use made-up file-extension to detect MIME-type, then copy all 170 // thumbnail bytes 171 mimeType = mime.TypeByExtension(string(ext[:])) 172 _, err = io.Copy(w, io.LimitReader(br, int64(size))) 173 if err != nil { 174 return mimeType, noMoreOutput{} 175 } 176 return mimeType, nil 177 } 178 179 // getThumbnailTypeAPIC is used by func handleAPIC 180 func getThumbnailTypeAPIC(br *bufio.Reader) ([]byte, int, error) { 181 var kind []byte 182 n, err := meetBytes(br, []byte(`image/`)) 183 if err != nil { 184 return nil, n, err 185 } 186 187 kind = append(kind, `image/`...) 188 for { 189 b, err := br.ReadByte() 190 if err != nil { 191 return kind, n, err 192 } 193 n++ 194 195 if b == 0 { 196 return kind, n, nil 197 } 198 kind = append(kind, b) 199 } 200 } 201 202 // meetBytes is used by func getThumbnailTypeAPIC to skip right past the 203 // byte-sequence given 204 func meetBytes(br *bufio.Reader, data []byte) (int, error) { 205 n := 0 206 cur := data[:] 207 208 for { 209 if len(cur) == 0 { 210 return n, nil 211 } 212 213 b, err := br.ReadByte() 214 if err != nil { 215 return n, err 216 } 217 n++ 218 219 if b == cur[0] { 220 cur = cur[1:] 221 } else { 222 cur = data 223 } 224 } 225 } File: box/info.txt 1 box [tool] [tool arguments...] 2 3 4 Box is a busybox/toybox-style command-line app with many simple tools in it. 5 These tools vary in functionality, but most of them act on UTF-8 plain-text. 6 7 Using the `help` tool will also show you all aliases available for various 8 tools, followed by brief descriptions of all tools/commands this app gives 9 you. 10 11 Tool `compose` has a few special shortcuts, which also imply the separator 12 argument the tool needs: these are -, --, +, /, a comma, a colon, and a dot. File: box/io.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "strconv" 7 "strings" 8 ) 9 10 // writer is a type-alias which allows flexibility in how tools write their 11 // output, since changing this type minimizes changes around the rest of the 12 // code; this type has since stuck to its present form, as the `lineFlusher` 13 // type seems to work really well for this kind of app 14 type writer = lineFlusher 15 16 // lineFlusher is a special buffered-writer which automatically flushes when 17 // given line-feeds, ensuring live-propagation of lines across tools, when 18 // these are run concurrently via internal `pipes` 19 type lineFlusher struct { 20 *bufio.Writer 21 } 22 23 func (lf lineFlusher) Write(s []byte) (size int, err error) { 24 if bytes.IndexByte(s, '\n') >= 0 { 25 defer lf.Flush() 26 } 27 return lf.Writer.Write(s) 28 } 29 30 func (lf lineFlusher) WriteByte(b byte) error { 31 if b == '\n' { 32 defer lf.Flush() 33 } 34 return lf.Writer.WriteByte(b) 35 } 36 37 func (lf lineFlusher) WriteRune(r rune) (size int, err error) { 38 if r == '\n' { 39 defer lf.Flush() 40 } 41 return lf.Writer.WriteRune(r) 42 } 43 44 func (lf lineFlusher) WriteString(s string) (size int, err error) { 45 if strings.IndexByte(s, '\n') >= 0 { 46 defer lf.Flush() 47 } 48 return lf.Writer.WriteString(s) 49 } 50 51 // endLine is used as the main way to check for end-of-output across tools 52 // from this app 53 func endLine(w writer) error { 54 if err := w.WriteByte('\n'); err != nil { 55 return noMoreOutput{} 56 } 57 return nil 58 } 59 60 func write(w writer, s []byte) error { 61 if n, err := w.Write(s); n < 1 && err != nil { 62 return noMoreOutput{} 63 } 64 return nil 65 } 66 67 func writeln(w writer, s []byte) error { 68 if err := write(w, s); err != nil { 69 return err 70 } 71 return endLine(w) 72 } 73 74 func writeFloat(w writer, f float64) error { 75 var buf [32]byte 76 s := strconv.AppendFloat(buf[:0], f, 'f', -1, 64) 77 return write(w, s) 78 } 79 80 func writeInt(w writer, n int) error { 81 var buf [24]byte 82 s := strconv.AppendInt(buf[:0], int64(n), 10) 83 return write(w, s) 84 } 85 86 func writeSpaces(w writer, n int) error { 87 const spaces = ` ` 88 for n >= len(spaces) { 89 w.WriteString(spaces) 90 n -= len(spaces) 91 } 92 if n > 0 { 93 w.WriteString(spaces[:n]) 94 } 95 return nil 96 } File: box/iterators.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "io" 7 "io/fs" 8 "os" 9 "path/filepath" 10 ) 11 12 func loopItems(line []byte, how func(i int, s []byte) error) error { 13 prevItems := 0 14 line = bytes.TrimSpace(line) 15 16 for len(line) > 0 { 17 i := bytes.IndexAny(line, " \t") 18 if i < 0 { 19 // don't forget the line's last item 20 break 21 } 22 23 item := line[:i] 24 line = line[i+1:] 25 line = bytes.TrimSpace(line) 26 27 if len(item) == 0 { 28 continue 29 } 30 31 if err := how(prevItems, item); err != nil { 32 return err 33 } 34 prevItems++ 35 } 36 37 // handle the last item in its line 38 if len(line) > 0 { 39 return how(prevItems, line) 40 } 41 return nil 42 } 43 44 func loopLines(r io.Reader, f func(i int, line []byte) error) error { 45 sc := bufio.NewScanner(r) 46 sc.Buffer(nil, maxLineSize) 47 48 for i := 0; sc.Scan(); i++ { 49 line := sc.Bytes() 50 // ignore leading UTF-8 BOM on the first line 51 if i == 0 { 52 line = bytes.TrimPrefix(line, []byte{'\xef', '\xbb', '\xbf'}) 53 } 54 55 if err := f(i, line); err != nil { 56 return err 57 } 58 } 59 60 return sc.Err() 61 } 62 63 func loopTSV(line []byte, how func(i int, s []byte) error) error { 64 prevItems := 0 65 66 for len(line) > 0 { 67 i := bytes.IndexByte(line, '\t') 68 if i < 0 { 69 // don't forget the line's last item 70 return how(prevItems, line) 71 } 72 73 if err := how(prevItems, line[:i]); err != nil { 74 return err 75 } 76 prevItems++ 77 78 line = line[i+1:] 79 // handle the last item in its line 80 if len(line) == 0 { 81 return how(prevItems, line) 82 } 83 } 84 85 return nil 86 } 87 88 func loopPlain(line []byte, how func(i int, s []byte) error) error { 89 prevChunks := 0 90 91 for len(line) > 0 { 92 i := bytes.Index(line, []byte{'\x1b', '['}) 93 if i < 0 { 94 // don't forget the line's last chunk 95 break 96 } 97 98 if s := line[:i]; len(s) > 0 { 99 if err := how(prevChunks, s); err != nil { 100 return err 101 } 102 prevChunks++ 103 } 104 105 // skip the ANSI-style sequence 106 line = line[i+2:] 107 for len(line) > 0 { 108 b := line[0] 109 line = line[1:] 110 if ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z') { 111 break 112 } 113 } 114 } 115 116 // handle the end of the line 117 if len(line) > 0 { 118 return how(prevChunks, line) 119 } 120 return nil 121 } 122 123 func walk(args []string, how fs.WalkDirFunc) error { 124 seen := make(map[string]struct{}) 125 126 once := func(path string, d fs.DirEntry, err error) error { 127 if _, ok := seen[path]; ok { 128 return err 129 } 130 seen[path] = struct{}{} 131 return how(path, d, err) 132 } 133 134 if len(args) == 0 { 135 return filepath.WalkDir(`.`, once) 136 } 137 138 for _, path := range args { 139 if _, ok := seen[path]; ok { 140 continue 141 } 142 143 if err := filepath.WalkDir(path, once); err != nil { 144 return err 145 } 146 } 147 148 return nil 149 } 150 151 func walktop(args []string, how func(e fs.DirEntry) error) error { 152 seen := make(map[string]struct{}) 153 154 once := func(path string) error { 155 if _, ok := seen[path]; ok { 156 return nil 157 } 158 seen[path] = struct{}{} 159 160 entries, err := os.ReadDir(path) 161 if err != nil { 162 return err 163 } 164 165 for _, e := range entries { 166 if err := how(e); err != nil { 167 return err 168 } 169 } 170 171 return nil 172 } 173 174 if len(args) == 0 { 175 return once(`.`) 176 } 177 178 for _, path := range args { 179 if err := once(path); err != nil { 180 return err 181 } 182 } 183 return nil 184 } File: box/iterators_test.go 1 package main 2 3 import ( 4 "reflect" 5 "testing" 6 ) 7 8 func TestLoopItems(t *testing.T) { 9 var tests = []struct { 10 line string 11 expected []string 12 }{ 13 {``, nil}, 14 {` `, nil}, 15 {` abc `, []string{`abc`}}, 16 {` abc 213 `, []string{`abc`, `213`}}, 17 {"123 \t 456", []string{`123`, `456`}}, 18 } 19 20 var res []string 21 for _, tc := range tests { 22 res = res[:0] 23 loopItems([]byte(tc.line), func(i int, s []byte) error { 24 res = append(res, string(s)) 25 return nil 26 }) 27 28 t.Run(tc.line, func(t *testing.T) { 29 if !reflect.DeepEqual(tc.expected, res) { 30 t.Fatalf("expected %v instead of %v", tc.expected, res) 31 } 32 }) 33 } 34 } 35 36 func TestLoopTSV(t *testing.T) { 37 var tests = []struct { 38 line string 39 expected []string 40 }{ 41 {``, nil}, 42 {` `, []string{` `}}, 43 {` abc `, []string{` abc `}}, 44 {"123 \t 456", []string{`123 `, ` 456`}}, 45 {"123\t456\t", []string{`123`, `456`, ``}}, 46 {"\t123\t456\t", []string{``, `123`, `456`, ``}}, 47 } 48 49 var res []string 50 for _, tc := range tests { 51 res = res[:0] 52 loopTSV([]byte(tc.line), func(i int, s []byte) error { 53 res = append(res, string(s)) 54 return nil 55 }) 56 57 t.Run(tc.line, func(t *testing.T) { 58 if !reflect.DeepEqual(tc.expected, res) { 59 t.Fatalf("expected %v instead of %v", tc.expected, res) 60 } 61 }) 62 } 63 } File: box/json0.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "errors" 7 "io" 8 "strconv" 9 ) 10 11 // linePosError is a more descriptive kind of error, showing the source of 12 // the input-related problem, as 1-based a line/pos number pair in front 13 // of the error message 14 type linePosError struct { 15 // line is the 1-based line count from the input 16 line int 17 18 // pos is the 1-based `horizontal` position in its line 19 pos int 20 21 // err is the error message to `decorate` with the position info 22 err error 23 } 24 25 // Error satisfies the error interface 26 func (lpe linePosError) Error() string { 27 where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos) 28 return where + `: ` + lpe.err.Error() 29 } 30 31 var ( 32 errCommentEarlyEnd = errors.New(`unexpected early-end of comment`) 33 errInputEarlyEnd = errors.New(`expected end of input data`) 34 errInvalidComment = errors.New(`expected / or *`) 35 errInvalidHex = errors.New(`expected a base-16 digit`) 36 errInvalidToken = errors.New(`invalid JSON token`) 37 errNoDigits = errors.New(`expected numeric digits`) 38 errNoStringQuote = errors.New(`expected " or '`) 39 errNoArrayComma = errors.New(`missing comma between array values`) 40 errNoObjectComma = errors.New(`missing comma between key-value pairs`) 41 errStringEarlyEnd = errors.New(`unexpected early-end of string`) 42 errExtraBytes = errors.New(`unexpected extra input bytes`) 43 44 // errNoMoreOutput is a generic dummy output-error, which is meant to be 45 // ultimately ignored, being just an excuse to quit the app immediately 46 // and successfully 47 // errNoMoreOutput = errors.New(`no more output`) 48 ) 49 50 // isIdentifier improves control-flow of func jsonReader.key, when it handles 51 // unquoted object keys 52 var isIdentifier = [256]bool{ 53 '_': true, 54 55 '0': true, '1': true, '2': true, '3': true, '4': true, 56 '5': true, '6': true, '7': true, '8': true, '9': true, 57 58 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 59 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 60 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 61 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 62 'Y': true, 'Z': true, 63 64 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 65 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 66 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 67 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 68 'y': true, 'z': true, 69 } 70 71 // matchHex both figures out if a byte is a valid ASCII hex-digit, by not 72 // being 0, and normalizes letter-case for the hex letters 73 var matchHex = [256]byte{ 74 '0': '0', '1': '1', '2': '2', '3': '3', '4': '4', 75 '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', 76 'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F', 77 'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F', 78 } 79 80 // escapedStringBytes helps func stringValue treat all string bytes quickly 81 // and correctly, using their officially-supported JSON escape sequences 82 // 83 // https://www.rfc-editor.org/rfc/rfc8259#section-7 84 var escapedStringBytes = [256][]byte{ 85 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 86 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 87 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 88 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 89 {'\\', 'b'}, {'\\', 't'}, 90 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 91 {'\\', 'f'}, {'\\', 'r'}, 92 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 93 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 94 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 95 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 96 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 97 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 98 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 99 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 100 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 101 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 102 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 103 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 104 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 105 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 106 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 107 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 108 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 109 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 110 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 111 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 112 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 113 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 114 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 115 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 116 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 117 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 118 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 119 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 120 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 121 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 122 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 123 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 124 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 125 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 126 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 127 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 128 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 129 } 130 131 // json0 converts JSON/pseudo-JSON into (valid) minimal JSON, except for an 132 // extra/single line-feed at the end of the output 133 func json0(w writer, r io.Reader) error { 134 br := bufio.NewReader(r) 135 jr := jsonReader{br, 1, 1} 136 if err := jr.run(w.Writer); err != nil { 137 return err 138 } 139 return endLine(w) 140 } 141 142 // dejsonl converts lines, each with JSON/pseudo-JSON data, into a (valid) 143 // minimal JSON array 144 func dejsonl(w writer, r io.Reader) error { 145 nvalues := 0 146 147 err := loopLines(r, func(i int, line []byte) error { 148 line = bytes.TrimSpace(line) 149 if len(line) == 0 { 150 return nil 151 } 152 153 if i == 0 { 154 w.WriteByte('[') 155 } else { 156 w.WriteByte(',') 157 } 158 nvalues++ 159 160 br := bufio.NewReader(bytes.NewReader(line)) 161 jr := jsonReader{br, 1, 1} 162 // make errors refer to the right line number 163 jr.line = i + 1 164 return jr.run(w.Writer) 165 }) 166 167 if err != nil { 168 return err 169 } 170 171 if nvalues > 0 { 172 w.WriteByte(']') 173 } 174 return endLine(w) 175 } 176 177 // jsonReader reads data via a buffer, keeping track of the input position: 178 // this in turn allows showing much more useful errors, when these happen 179 type jsonReader struct { 180 // r is the actual reader 181 r *bufio.Reader 182 183 // line is the 1-based line-counter for input bytes, and gives errors 184 // useful position info 185 line int 186 187 // pos is the 1-based `horizontal` position in its line, and gives 188 // errors useful position info 189 pos int 190 } 191 192 // improveError makes any error more useful, by giving it info about the 193 // current input-position, as a 1-based line/within-line-position pair 194 func (jr jsonReader) improveError(err error) error { 195 if _, ok := err.(linePosError); ok { 196 return err 197 } 198 199 if err == io.EOF { 200 return linePosError{jr.line, jr.pos, errInputEarlyEnd} 201 } 202 if err != nil { 203 return linePosError{jr.line, jr.pos, err} 204 } 205 return nil 206 } 207 208 // run does all the work for func json0, and each input line's work for func 209 // jsonl 210 func (jr *jsonReader) run(w *bufio.Writer) error { 211 // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order 212 // mark) gives no useful info if present, as UTF-8 leaves no ambiguity 213 // about byte-order by design 214 jr.skipUTF8BOM() 215 216 // ignore leading whitespace and/or comments 217 if err := jr.seekNext(); err != nil { 218 return err 219 } 220 221 // handle a single top-level JSON value 222 if err := jr.value(w); err != nil { 223 return err 224 } 225 226 // ignore trailing whitespace and/or comments 227 if err := jr.seekNext(); err != nil { 228 return err 229 } 230 231 // beyond trailing whitespace and/or comments, any more bytes 232 // make the whole input data invalid JSON 233 if _, ok := jr.peekByte(); ok { 234 return jr.improveError(errExtraBytes) 235 } 236 return nil 237 } 238 239 // demandSyntax fails with an error when the next byte isn't the one given; 240 // when it is, the byte is then read/skipped, and a nil error is returned 241 func (jr *jsonReader) demandSyntax(syntax byte) error { 242 chunk, err := jr.r.Peek(1) 243 if err == io.EOF { 244 return jr.improveError(errInputEarlyEnd) 245 } 246 if err != nil { 247 return jr.improveError(err) 248 } 249 250 if len(chunk) < 1 || chunk[0] != syntax { 251 msg := `expected ` + string(rune(syntax)) 252 return jr.improveError(errors.New(msg)) 253 } 254 255 jr.readByte() 256 return nil 257 } 258 259 // updatePosInfo does what it says, given the byte just read separately 260 func (jr *jsonReader) updatePosInfo(b byte) { 261 if b == '\n' { 262 jr.line += 1 263 jr.pos = 1 264 } else { 265 jr.pos++ 266 } 267 } 268 269 // peekByte simplifies control-flow for various other funcs 270 func (jr jsonReader) peekByte() (b byte, ok bool) { 271 chunk, err := jr.r.Peek(1) 272 if err == nil && len(chunk) >= 1 { 273 return chunk[0], true 274 } 275 return 0, false 276 } 277 278 // readByte does what it says, updating the reader's position info 279 func (jr *jsonReader) readByte() (b byte, err error) { 280 b, err = jr.r.ReadByte() 281 if err == nil { 282 jr.updatePosInfo(b) 283 return b, nil 284 } 285 return b, jr.improveError(err) 286 } 287 288 // seekNext skips/seeks the next token, ignoring runs of whitespace symbols 289 // and comments, either single-line (starting with //) or general (starting 290 // with /* and ending with */) 291 func (jr *jsonReader) seekNext() error { 292 for { 293 b, ok := jr.peekByte() 294 if !ok { 295 return nil 296 } 297 298 // case ' ', '\t', '\f', '\v', '\r', '\n': 299 if b <= 32 { 300 // keep skipping whitespace bytes 301 b, _ := jr.readByte() 302 jr.updatePosInfo(b) 303 continue 304 } 305 306 if b != '/' { 307 // reached the next token 308 return nil 309 } 310 311 if err := jr.skipComment(); err != nil { 312 return err 313 } 314 315 // after comments, keep looking for more whitespace and/or comments 316 } 317 } 318 319 // skipComment helps func seekNext skip over comments, simplifying the latter 320 // func's control-flow 321 func (jr *jsonReader) skipComment() error { 322 err := jr.demandSyntax('/') 323 if err != nil { 324 return err 325 } 326 327 b, ok := jr.peekByte() 328 if !ok { 329 return jr.improveError(errInputEarlyEnd) 330 } 331 332 switch b { 333 case '/': 334 // handle single-line comments 335 return jr.skipLine() 336 337 case '*': 338 // handle (potentially) multi-line comments 339 return jr.skipGeneralComment() 340 341 default: 342 return jr.improveError(errInvalidComment) 343 } 344 } 345 346 // skipLine handles single-line comments for func skipComment 347 func (jr *jsonReader) skipLine() error { 348 for { 349 b, err := jr.r.ReadByte() 350 if err == io.EOF { 351 // end of input is fine in this case 352 return nil 353 } 354 if err != nil { 355 return err 356 } 357 358 jr.updatePosInfo(b) 359 if b == '\n' { 360 jr.line++ 361 return nil 362 } 363 } 364 } 365 366 // skipGeneralComment handles (potentially) multi-line comments for func 367 // skipComment 368 func (jr *jsonReader) skipGeneralComment() error { 369 var prev byte 370 for { 371 b, err := jr.readByte() 372 if err != nil { 373 return jr.improveError(errCommentEarlyEnd) 374 } 375 376 if prev == '*' && b == '/' { 377 return nil 378 } 379 if b == '\n' { 380 jr.line++ 381 } 382 prev = b 383 } 384 } 385 386 // skipUTF8BOM does what it says, if a UTF-8 BOM is present 387 func (jr *jsonReader) skipUTF8BOM() { 388 lead, err := jr.r.Peek(3) 389 if err == nil && bytes.HasPrefix(lead, []byte{0xef, 0xbb, 0xbf}) { 390 jr.readByte() 391 jr.readByte() 392 jr.readByte() 393 jr.pos += 3 394 } 395 } 396 397 // outputByte is a small wrapper on func WriteByte, which adapts any error 398 // into a custom dummy output-error, which is in turn meant to be ignored, 399 // being just an excuse to quit the app immediately and successfully 400 func outputByte(w *bufio.Writer, b byte) error { 401 err := w.WriteByte(b) 402 if err == nil { 403 return nil 404 } 405 return noMoreOutput{} 406 } 407 408 // array handles arrays for func value 409 func (jr *jsonReader) array(w *bufio.Writer) error { 410 if err := jr.demandSyntax('['); err != nil { 411 return err 412 } 413 w.WriteByte('[') 414 415 for n := 0; true; n++ { 416 // there may be whitespace/comments before the next comma 417 if err := jr.seekNext(); err != nil { 418 return err 419 } 420 421 // handle commas between values, as well as trailing ones 422 comma := false 423 b, _ := jr.peekByte() 424 if b == ',' { 425 jr.readByte() 426 comma = true 427 428 // there may be whitespace/comments before an ending ']' 429 if err := jr.seekNext(); err != nil { 430 return err 431 } 432 b, _ = jr.peekByte() 433 } 434 435 // handle end of array 436 if b == ']' { 437 jr.readByte() 438 w.WriteByte(']') 439 return nil 440 } 441 442 // don't forget commas between adjacent values 443 if n > 0 { 444 if !comma { 445 return errNoArrayComma 446 } 447 if err := outputByte(w, ','); err != nil { 448 return err 449 } 450 } 451 452 // handle the next value 453 if err := jr.seekNext(); err != nil { 454 return err 455 } 456 if err := jr.value(w); err != nil { 457 return err 458 } 459 } 460 461 // make the compiler happy 462 return nil 463 } 464 465 // digits helps various number-handling funcs do their job 466 func (jr *jsonReader) digits(w *bufio.Writer) error { 467 for n := 0; true; n++ { 468 b, _ := jr.peekByte() 469 470 // support `nice` long numbers by ignoring their underscores 471 if b == '_' { 472 jr.readByte() 473 continue 474 } 475 476 if '0' <= b && b <= '9' { 477 jr.readByte() 478 w.WriteByte(b) 479 continue 480 } 481 482 if n == 0 { 483 return errNoDigits 484 } 485 return nil 486 } 487 488 // make the compiler happy 489 return nil 490 } 491 492 // dot handles pseudo-JSON numbers which start with a decimal dot 493 func (jr *jsonReader) dot(w *bufio.Writer) error { 494 if err := jr.demandSyntax('.'); err != nil { 495 return err 496 } 497 w.Write([]byte{'0', '.'}) 498 return jr.digits(w) 499 } 500 501 // key is used by func object and generalizes func stringValue, by allowing 502 // unquoted object keys; it's not used anywhere else, as allowing unquoted 503 // string values is ambiguous with actual JSON-keyword values null, false, and 504 // true 505 func (jr *jsonReader) key(w *bufio.Writer) error { 506 quote, ok := jr.peekByte() 507 if quote == '"' || quote == '\'' { 508 return jr.stringValue(w) 509 } 510 if !ok { 511 return jr.improveError(errStringEarlyEnd) 512 } 513 514 w.WriteByte('"') 515 for { 516 if b, _ := jr.peekByte(); isIdentifier[b] { 517 jr.readByte() 518 w.WriteByte(b) 519 continue 520 } 521 522 w.WriteByte('"') 523 return nil 524 } 525 } 526 527 // trySimpleInner tries to handle (more quickly) inner-strings where all bytes 528 // are unescaped ASCII symbols: this is a very common case for strings, and is 529 // almost always the case for object keys; returns whether it succeeded, so 530 // this func's caller knows knows if it needs to do anything, the slower way 531 func trySimpleInner(w *bufio.Writer, jr *jsonReader, quote byte) (gotIt bool) { 532 chunk, _ := jr.r.Peek(64) 533 534 for i, b := range chunk { 535 if b < 32 || b > 127 || b == '\\' { 536 return false 537 } 538 if b != quote { 539 continue 540 } 541 542 // bulk-writing the chunk is this func's whole point 543 w.WriteByte('"') 544 w.Write(chunk[:i]) 545 w.WriteByte('"') 546 547 jr.r.Discard(i + 1) 548 return true 549 } 550 551 // maybe the inner-string is ok, but it's just longer than the chunk 552 return false 553 } 554 555 // keyword demands the exact keyword/string given to it 556 func (jr *jsonReader) keyword(w *bufio.Writer, kw []byte) error { 557 for rest := kw; len(rest) > 0; rest = rest[1:] { 558 b, err := jr.readByte() 559 if err == nil && b == rest[0] { 560 // keywords given to this func have no line-feeds 561 jr.pos++ 562 continue 563 } 564 565 msg := `expected JSON value ` + string(kw) 566 return jr.improveError(errors.New(msg)) 567 } 568 569 w.Write(kw) 570 return nil 571 } 572 573 // negative handles numbers starting with a negative sign for func value 574 func (jr *jsonReader) negative(w *bufio.Writer) error { 575 if err := jr.demandSyntax('-'); err != nil { 576 return err 577 } 578 579 w.WriteByte('-') 580 if b, _ := jr.peekByte(); b == '.' { 581 jr.readByte() 582 w.Write([]byte{'0', '.'}) 583 return jr.digits(w) 584 } 585 return jr.number(w) 586 } 587 588 // number handles numeric values/tokens, including invalid-JSON cases, such 589 // as values starting with a decimal dot 590 func (jr *jsonReader) number(w *bufio.Writer) error { 591 // handle integer digits 592 if err := jr.digits(w); err != nil { 593 return err 594 } 595 596 // handle optional decimal digits, starting with a leading dot 597 if b, _ := jr.peekByte(); b == '.' { 598 jr.readByte() 599 w.WriteByte('.') 600 return jr.digits(w) 601 } 602 return nil 603 } 604 605 // object handles objects for func value 606 func (jr *jsonReader) object(w *bufio.Writer) error { 607 if err := jr.demandSyntax('{'); err != nil { 608 return err 609 } 610 w.WriteByte('{') 611 612 for npairs := 0; true; npairs++ { 613 // there may be whitespace/comments before the next comma 614 if err := jr.seekNext(); err != nil { 615 return err 616 } 617 618 // handle commas between key-value pairs, as well as trailing ones 619 comma := false 620 b, _ := jr.peekByte() 621 if b == ',' { 622 jr.readByte() 623 comma = true 624 625 // there may be whitespace/comments before an ending '}' 626 if err := jr.seekNext(); err != nil { 627 return err 628 } 629 b, _ = jr.peekByte() 630 } 631 632 // handle end of object 633 if b == '}' { 634 jr.readByte() 635 w.WriteByte('}') 636 return nil 637 } 638 639 // don't forget commas between adjacent key-value pairs 640 if npairs > 0 { 641 if !comma { 642 return errNoObjectComma 643 } 644 if err := outputByte(w, ','); err != nil { 645 return err 646 } 647 } 648 649 // handle the next pair's key 650 if err := jr.seekNext(); err != nil { 651 return err 652 } 653 if err := jr.key(w); err != nil { 654 return err 655 } 656 657 // demand a colon right after the key 658 if err := jr.seekNext(); err != nil { 659 return err 660 } 661 if err := jr.demandSyntax(':'); err != nil { 662 return err 663 } 664 w.WriteByte(':') 665 666 // handle the next pair's value 667 if err := jr.seekNext(); err != nil { 668 return err 669 } 670 if err := jr.value(w); err != nil { 671 return err 672 } 673 } 674 675 // make the compiler happy 676 return nil 677 } 678 679 // positive handles numbers starting with a positive sign for func value 680 func (jr *jsonReader) positive(w *bufio.Writer) error { 681 if err := jr.demandSyntax('+'); err != nil { 682 return err 683 } 684 685 // valid JSON isn't supposed to have leading pluses on numbers, so 686 // emit nothing for it, unlike for negative numbers 687 688 if b, _ := jr.peekByte(); b == '.' { 689 jr.readByte() 690 w.Write([]byte{'0', '.'}) 691 return jr.digits(w) 692 } 693 return jr.number(w) 694 } 695 696 // stringValue handles strings for funcs value and key, and supports both 697 // single-quotes and double-quotes, always emitting the latter in the output, 698 // of course 699 func (jr *jsonReader) stringValue(w *bufio.Writer) error { 700 quote, ok := jr.peekByte() 701 if !ok || (quote != '"' && quote != '\'') { 702 return errNoStringQuote 703 } 704 705 jr.readByte() 706 // try the quicker all-unescaped-ASCII handler 707 if trySimpleInner(w, jr, quote) { 708 return nil 709 } 710 711 // it's a non-trivial inner-string, so handle it byte-by-byte 712 w.WriteByte('"') 713 escaped := false 714 715 for { 716 b, err := jr.r.ReadByte() 717 if err != nil { 718 if err == io.EOF { 719 return jr.improveError(errStringEarlyEnd) 720 } 721 return jr.improveError(err) 722 } 723 724 if !escaped { 725 if b == '\\' { 726 escaped = true 727 continue 728 } 729 730 // handle end of string 731 if b == quote { 732 return outputByte(w, '"') 733 } 734 735 w.Write(escapedStringBytes[b]) 736 jr.updatePosInfo(b) 737 continue 738 } 739 740 // handle escaped items 741 escaped = false 742 743 switch b { 744 case 'u': 745 // \u needs exactly 4 hex-digits to follow it 746 w.Write([]byte{'\\', 'u'}) 747 if err := copyHex(w, 4, jr); err != nil { 748 return jr.improveError(err) 749 } 750 751 case 'x': 752 // JSON only supports 4 escaped hex-digits, so pad the 2 753 // expected hex-digits with 2 zeros 754 w.Write([]byte{'\\', 'u', '0', '0'}) 755 if err := copyHex(w, 2, jr); err != nil { 756 return jr.improveError(err) 757 } 758 759 case 't', 'f', 'r', 'n', 'b', '\\', '"': 760 // handle valid-JSON escaped string sequences 761 w.WriteByte('\\') 762 w.WriteByte(b) 763 764 // case '\'': 765 // // escaped single-quotes aren't standard JSON, but they can 766 // // be handy when the input uses non-standard single-quoted 767 // // strings 768 // w.WriteByte('\'') 769 770 default: 771 // return jr.decorateError(unexpectedByte{b}) 772 w.Write(escapedStringBytes[b]) 773 } 774 } 775 } 776 777 // copyHex handles a run of hex-digits for func stringValue, starting right 778 // after the leading `\u` (or `\x`) part; this func doesn't `improve` its 779 // errors with position info: that's up to the caller 780 func copyHex(w *bufio.Writer, n int, jr *jsonReader) error { 781 for i := 0; i < n; i++ { 782 b, err := jr.r.ReadByte() 783 if err == io.EOF { 784 return errStringEarlyEnd 785 } 786 if err != nil { 787 return err 788 } 789 790 jr.updatePosInfo(b) 791 792 if b := matchHex[b]; b != 0 { 793 w.WriteByte(b) 794 continue 795 } 796 797 return errInvalidHex 798 } 799 800 return nil 801 } 802 803 // value is a generic JSON-token/value handler, which allows the recursive 804 // behavior to handle any kind of JSON/pseudo-JSON input 805 func (jr *jsonReader) value(w *bufio.Writer) error { 806 chunk, err := jr.r.Peek(1) 807 if err == nil && len(chunk) >= 1 { 808 return jr.dispatch(w, chunk[0]) 809 } 810 811 if err == io.EOF { 812 return jr.improveError(errInputEarlyEnd) 813 } 814 return jr.improveError(errInputEarlyEnd) 815 } 816 817 // dispatch simplifies control-flow for func value 818 func (jr *jsonReader) dispatch(w *bufio.Writer, b byte) error { 819 switch b { 820 case 'f': 821 return jr.keyword(w, []byte{'f', 'a', 'l', 's', 'e'}) 822 case 'n': 823 return jr.keyword(w, []byte{'n', 'u', 'l', 'l'}) 824 case 't': 825 return jr.keyword(w, []byte{'t', 'r', 'u', 'e'}) 826 case '.': 827 return jr.dot(w) 828 case '+': 829 return jr.positive(w) 830 case '-': 831 return jr.negative(w) 832 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 833 return jr.number(w) 834 case '\'', '"': 835 return jr.stringValue(w) 836 case '[': 837 return jr.array(w) 838 case '{': 839 return jr.object(w) 840 default: 841 return jr.improveError(errInvalidToken) 842 } 843 } File: box/json0_test.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "io" 7 "strings" 8 "testing" 9 ) 10 11 func TestJSON0(t *testing.T) { 12 var tests = []struct { 13 Input string 14 Expected string 15 }{ 16 {`false`, `false`}, 17 {`null`, `null`}, 18 {` true `, `true`}, 19 20 {`0`, `0`}, 21 {`1`, `1`}, 22 {`2`, `2`}, 23 {`3`, `3`}, 24 {`4`, `4`}, 25 {`5`, `5`}, 26 {`6`, `6`}, 27 {`7`, `7`}, 28 {`8`, `8`}, 29 {`9`, `9`}, 30 31 {` .345`, `0.345`}, 32 {` -.345`, `-0.345`}, 33 {` +.345`, `0.345`}, 34 {` +123.345`, `123.345`}, 35 {` +.345`, `0.345`}, 36 {` 123.34523`, `123.34523`}, 37 {` 123.34_523`, `123.34523`}, 38 {` 123_456.123`, `123456.123`}, 39 40 {`""`, `""`}, 41 {`''`, `""`}, 42 {`"\""`, `"\""`}, 43 {`'\"'`, `"\""`}, 44 {`'\''`, `"'"`}, 45 {`'abc\u0e9A'`, `"abc\u0E9A"`}, 46 {`'abc\x1f[0m'`, `"abc\u001F[0m"`}, 47 48 {`[ ]`, `[]`}, 49 {`[ , ]`, `[]`}, 50 {`[.345, false,null , ]`, `[0.345,false,null]`}, 51 52 {`{ }`, `{}`}, 53 {`{ , }`, `{}`}, 54 55 { 56 `{ 'abc': .345, "def" : false, 'xyz':null , }`, 57 `{"abc":0.345,"def":false,"xyz":null}`, 58 }, 59 60 {`{0problems:123,}`, `{"0problems":123}`}, 61 {`{0_problems:123}`, `{"0_problems":123}`}, 62 } 63 64 for _, tc := range tests { 65 t.Run(tc.Input, func(t *testing.T) { 66 var out strings.Builder 67 w := lineFlusher{bufio.NewWriter(&out)} 68 r := bufio.NewReader(strings.NewReader(tc.Input)) 69 if err := json0(w, r); err != nil && err != io.EOF { 70 t.Fatal(err) 71 return 72 } 73 // don't forget to flush the buffer, or output may stay empty 74 w.Flush() 75 76 s := out.String() 77 s = strings.TrimSuffix(s, "\n") 78 if s != tc.Expected { 79 t.Fatalf("<got>\n%s\n<expected>\n%s", s, tc.Expected) 80 return 81 } 82 }) 83 } 84 } 85 86 func TestEscapedStringBytes(t *testing.T) { 87 var escaped = map[rune][]byte{ 88 '\x00': {'\\', 'u', '0', '0', '0', '0'}, 89 '\x01': {'\\', 'u', '0', '0', '0', '1'}, 90 '\x02': {'\\', 'u', '0', '0', '0', '2'}, 91 '\x03': {'\\', 'u', '0', '0', '0', '3'}, 92 '\x04': {'\\', 'u', '0', '0', '0', '4'}, 93 '\x05': {'\\', 'u', '0', '0', '0', '5'}, 94 '\x06': {'\\', 'u', '0', '0', '0', '6'}, 95 '\x07': {'\\', 'u', '0', '0', '0', '7'}, 96 '\x0b': {'\\', 'u', '0', '0', '0', 'b'}, 97 '\x0e': {'\\', 'u', '0', '0', '0', 'e'}, 98 '\x0f': {'\\', 'u', '0', '0', '0', 'f'}, 99 '\x10': {'\\', 'u', '0', '0', '1', '0'}, 100 '\x11': {'\\', 'u', '0', '0', '1', '1'}, 101 '\x12': {'\\', 'u', '0', '0', '1', '2'}, 102 '\x13': {'\\', 'u', '0', '0', '1', '3'}, 103 '\x14': {'\\', 'u', '0', '0', '1', '4'}, 104 '\x15': {'\\', 'u', '0', '0', '1', '5'}, 105 '\x16': {'\\', 'u', '0', '0', '1', '6'}, 106 '\x17': {'\\', 'u', '0', '0', '1', '7'}, 107 '\x18': {'\\', 'u', '0', '0', '1', '8'}, 108 '\x19': {'\\', 'u', '0', '0', '1', '9'}, 109 '\x1a': {'\\', 'u', '0', '0', '1', 'a'}, 110 '\x1b': {'\\', 'u', '0', '0', '1', 'b'}, 111 '\x1c': {'\\', 'u', '0', '0', '1', 'c'}, 112 '\x1d': {'\\', 'u', '0', '0', '1', 'd'}, 113 '\x1e': {'\\', 'u', '0', '0', '1', 'e'}, 114 '\x1f': {'\\', 'u', '0', '0', '1', 'f'}, 115 116 '\t': {'\\', 't'}, 117 '\f': {'\\', 'f'}, 118 '\b': {'\\', 'b'}, 119 '\r': {'\\', 'r'}, 120 '\n': {'\\', 'n'}, 121 '\\': {'\\', '\\'}, 122 '"': {'\\', '"'}, 123 } 124 125 if n := len(escapedStringBytes); n != 256 { 126 t.Fatalf(`expected 256 entries, instead of %d`, n) 127 return 128 } 129 130 for i, v := range escapedStringBytes { 131 exp := []byte{byte(i)} 132 if esc, ok := escaped[rune(i)]; ok { 133 exp = esc 134 } 135 136 if !bytes.Equal(v, exp) { 137 t.Fatalf("%d: expected %#v, got %#v", i, exp, v) 138 return 139 } 140 } 141 } File: box/main.go 1 package main 2 3 import ( 4 "bufio" 5 "errors" 6 "io" 7 "math" 8 "os" 9 "strconv" 10 "strings" 11 12 _ "embed" 13 ) 14 15 //go:embed info.txt 16 var info string 17 18 const ( 19 kb = 1024 20 mb = 1024 * kb 21 gb = 1024 * mb 22 23 // bufferSize seems a sensible default for modern cache-sizes 24 bufferSize = 16 * kb 25 26 // maxLineSize limits how long input lines can be byte-count-wise 27 maxLineSize = 8 * gb 28 29 // defaultTabstop is the space-count used for tab-expansion across tools 30 defaultTabstop = 4 31 32 // columnSeparator is the string put between adjacent columns; to avoid 33 // trailing spaces on output lines whose latter line is empty/missing, 34 // a right-trimmed version of this string is used in those output lines 35 columnSeparator = ` █ ` 36 37 // maxAutoWidth is the output max-width used by a few `column-fitting` 38 // tools when run in `automatic mode`, and is chosen to fit very old 39 // monitors 40 maxAutoWidth = 79 41 ) 42 43 var toolAliases = map[string]string{ 44 `absname`: `abs`, 45 `allfiles`: `files`, 46 `allfolders`: `folders`, 47 `arguments`: `args`, 48 `automime`: `mime`, 49 `basename`: `base`, 50 `blow`: `detab`, 51 `blowtabs`: `detab`, 52 `breathe`: `bl`, 53 `butfinal`: `skiplast`, 54 `butlast`: `skiplast`, 55 `chain`: `compose`, 56 `color`: `restyle`, 57 `colorjson`: `nj`, 58 `copy`: `identity`, 59 `countbytes`: `coby`, 60 `debz2`: `debz`, 61 `debzip`: `debz`, 62 `debzip2`: `debz`, 63 `dedup`: `unique`, 64 `deempty`: `noempty`, 65 `degzip`: `degz`, 66 `dempty`: `noempty`, 67 `dirname`: `dir`, 68 `expand`: `detab`, 69 `expandtabs`: `detab`, 70 `fancyjson`: `nj`, 71 `fancynumbers`: `nn`, 72 `fancynums`: `nn`, 73 `final`: `last`, 74 `fixjson`: `json0`, 75 `folder`: `dir`, 76 `foldername`: `dir`, 77 `guessmime`: `mime`, 78 `guesstype`: `mime`, 79 `guessfiletype`: `mime`, 80 `gzip`: `gz`, 81 `hexify`: `hex`, 82 `id3image`: `id3pic`, 83 `id3img`: `id3pic`, 84 `id3picture`: `id3pic`, 85 `id3thumb`: `id3pic`, 86 `id3thumbnail`: `id3pic`, 87 `idem`: `identity`, 88 `iden`: `identity`, 89 `ignore`: `drop`, 90 `j0`: `json0`, 91 `mp3image`: `id3pic`, 92 `mp3img`: `id3pic`, 93 `mp3pic`: `id3pic`, 94 `mp3picture`: `id3pic`, 95 `mp3thumb`: `id3pic`, 96 `mp3thumbnail`: `id3pic`, 97 `nicejson`: `nj`, 98 `nicenumbers`: `nn`, 99 `nicenums`: `nn`, 100 `nil`: `nothing`, 101 `null`: `nothing`, 102 `parentname`: `dir`, 103 `parun`: `prun`, 104 `pipe`: `compose`, 105 `reflow`: `reprose`, 106 `rstrip`: `trimend`, 107 `sidebyside`: `sbs`, 108 `skipfinal`: `skiplast`, 109 `skipfirst`: `skip`, 110 `sponge`: `soak`, 111 `strip`: `trim`, 112 `stripend`: `trimend`, 113 `striptrail`: `trimend`, 114 `striptrails`: `trimend`, 115 `style`: `restyle`, 116 `sym`: `symbols`, 117 `symbol`: `symbols`, 118 `trimtrail`: `trimend`, 119 `trimtrails`: `trimend`, 120 `trunc`: `limit`, 121 `truncate`: `limit`, 122 `unbase64`: `debase64`, 123 `unbz`: `debz`, 124 `unbz2`: `debz`, 125 `unbzip`: `debz`, 126 `unbzip2`: `debz`, 127 `uncsv`: `decsv`, 128 `unempty`: `noempty`, 129 `unflac`: `deflac`, 130 `ungz`: `degz`, 131 `ungzip`: `degz`, 132 `unjsonl`: `dejsonl`, 133 `untab`: `detab`, 134 `uriencode`: `urify`, 135 `words`: `items`, 136 } 137 138 var name2tool = map[string]any{ 139 `abs`: abs, 140 `args`: args, 141 `base64`: base64encode, 142 `base`: base, 143 `begin`: begin, 144 `begintsv`: beginTSV, 145 `bh`: breatheHeader, 146 `bl`: breatheLines, 147 `book`: book, 148 `bytefreq`: byteFreq, 149 `choplf`: choplf, 150 `coby`: coby, 151 `compose`: nil, 152 `datauri`: datauri, 153 `debase64`: debase64, 154 `debz`: debz, 155 `decsv`: decsv, 156 `deflac`: deflac, 157 `degz`: degz, 158 `dejsonl`: dejsonl, 159 `delay`: delay, 160 `detab`: detab, 161 `dir`: dir, 162 `div`: div, 163 `drop`: drop, 164 `end`: end, 165 `endtsv`: endTSV, 166 `files`: files, 167 `folders`: folders, 168 `first`: first, 169 `gz`: gz, 170 `help`: nil, 171 `hex`: hexify, 172 `hold`: hold, 173 `id3pic`: id3pic, 174 `identity`: identity, 175 `indent`: indent, 176 `items`: items, 177 `join`: join, 178 `json0`: json0, 179 `jsonl`: jsonl, 180 `junk`: junk, 181 `last`: last, 182 `leak`: leak, 183 `limit`: limit, 184 `lines`: lines, 185 `lineup`: lineup, 186 `links`: links, 187 `lower`: lower, 188 `mime`: mimeDetect, 189 `n`: n, 190 `nj`: nj, 191 `nn`: nn, 192 `noempty`: noEmpty, 193 `nothing`: nothing, 194 `now`: now, 195 `numbers`: numbers, 196 `plain`: plain, 197 `primes`: primes, 198 `prun`: prun, 199 `range`: rangeLines, 200 `realign`: realign, 201 `reprose`: reprose, 202 `restyle`: restyle, 203 `reuse`: nil, 204 `sbs`: sbs, 205 `sha1`: sha1encode, 206 `sha256`: sha256encode, 207 `sha512`: sha512encode, 208 `size`: size, 209 `skip`: skip, 210 `skiplast`: skipLast, 211 `soak`: soak, 212 `split`: split, 213 `splitany`: splitAny, 214 `squeeze`: squeeze, 215 `stomp`: stomp, 216 `strings`: stringsTool, 217 `symbols`: symbols, 218 `tally`: tally, 219 `teletype`: teletype, 220 `timer`: timer, 221 `title`: title, 222 `today`: today, 223 `tone`: tone, 224 `tools`: nil, 225 `topfiles`: topfiles, 226 `topfolders`: topfolders, 227 `trim`: trim, 228 `trimend`: trimend, 229 `tsv`: tsv, 230 `unique`: unique, 231 `urify`: urify, 232 `utf8`: utf8Tool, 233 `wait`: nil, 234 } 235 236 var name2metaTool = map[string]any{ 237 `compose`: compose, 238 `help`: help, 239 `reuse`: reuse, 240 `tools`: tools, 241 `wait`: wait, 242 } 243 244 // justQuit is a custom error-type which isn't for showing, but quitting a 245 // tool right away instead 246 type justQuit struct { 247 exitCode int 248 } 249 250 // Error is only to satisfy the error interface, and not for showing 251 func (justQuit) Error() string { 252 return `` 253 } 254 255 // noMoreOutput is a custom error-type to allow the app to quit immediately 256 // without complaining, treating a closed stdout as a non-error condition 257 type noMoreOutput struct{} 258 259 func (nmo noMoreOutput) Error() string { 260 return `no more output` 261 } 262 263 // wrongToolArgs is a custom error-type to allow the app to offer a tool's 264 // help/description message if the tool is `misrun` by giving it the wrong 265 // number and/or type of arguments 266 type wrongToolArgs struct { 267 problem string 268 tool string 269 } 270 271 func (wta wrongToolArgs) Error() string { 272 if wta.tool == `` { 273 return wta.problem 274 } 275 return wta.tool + `: ` + wta.problem 276 } 277 278 // unsupportedTool is a custom error-type to automate tool-table testing 279 type unsupportedTool struct{} 280 281 func (ut unsupportedTool) Error() string { 282 return `unsupported tool type` 283 } 284 285 // multipleErrors is a custom error-type to avoid showing further errors 286 type multipleErrors struct{} 287 288 func (me multipleErrors) Error() string { 289 return `multiple errors happened` 290 } 291 292 // fill lookup-table with items which the compiler would otherwise forbid due 293 // to circular intialization cycles 294 func init() { 295 for name, tool := range name2metaTool { 296 name2tool[name] = tool 297 } 298 } 299 300 func main() { 301 if len(os.Args) < 2 { 302 os.Stderr.WriteString(info) 303 os.Exit(0) 304 } 305 306 switch os.Args[1] { 307 case `-h`, `--h`, `-help`, `--help`: 308 os.Stderr.WriteString(info) 309 os.Exit(0) 310 } 311 312 err := run(os.Stdout, os.Stdin, os.Args[1:]) 313 if err == nil { 314 return 315 } 316 if _, ok := err.(noMoreOutput); ok { 317 return 318 } 319 if _, ok := err.(multipleErrors); ok { 320 return 321 } 322 323 showError(err) 324 325 // offer a helpful tool-description message, if a tool was `misrun` 326 if err, ok := err.(wrongToolArgs); ok { 327 run(os.Stdout, os.Stdin, []string{`help`, err.tool}) 328 } 329 330 os.Exit(1) 331 } 332 333 func showError(err error) { 334 if err == nil { 335 return 336 } 337 338 os.Stderr.WriteString("\x1b[31m") 339 os.Stderr.WriteString(err.Error()) 340 os.Stderr.WriteString("\x1b[0m\n") 341 } 342 343 func run(w io.Writer, r io.Reader, args []string) error { 344 err := dispatch(w, r, args) 345 if err == nil { 346 return nil 347 } 348 349 if _, ok := err.(noMoreOutput); ok { 350 return err 351 } 352 if _, ok := err.(multipleErrors); ok { 353 return err 354 } 355 356 if quit, ok := err.(justQuit); ok { 357 if quit.exitCode != 0 { 358 os.Exit(quit.exitCode) 359 } 360 return nil 361 } 362 363 if len(args) == 0 { 364 return err 365 } 366 367 name := strings.TrimSpace(args[0]) 368 if err, ok := err.(wrongToolArgs); ok && err.tool == `` { 369 return wrongToolArgs{err.problem, name} 370 } 371 return errors.New(name + `: ` + err.Error()) 372 } 373 374 func dealiasToolName(name string) (resolved string, ok bool) { 375 key := strings.TrimSpace(name) 376 key = strings.ReplaceAll(key, `-`, ``) 377 key = strings.ReplaceAll(key, `_`, ``) 378 if s, ok := toolAliases[key]; ok { 379 return s, true 380 } 381 return key, false 382 } 383 384 func dispatch(w io.Writer, r io.Reader, args []string) error { 385 if len(args) == 0 { 386 return errors.New(`no tool name given`) 387 } 388 389 name := strings.TrimSpace(args[0]) 390 switch name { 391 case `-`, `--`, `+`, `/`, `.`, `,`, `:`: 392 // enable special shortcuts for the `compose` tool 393 sep := name 394 cmds := splitSliceNonEmpty(args[1:], sep) 395 return composeAsyncRec(w, r, cmds) 396 } 397 398 key, _ := dealiasToolName(name) 399 args = args[1:] 400 401 bw := lineFlusher{bufio.NewWriter(w)} 402 defer bw.Flush() 403 404 tool, ok := name2tool[key] 405 if !ok { 406 if _, ok := styles[key]; ok { 407 return restyle(bw, r, []string{key}) 408 } 409 return errors.New(`no such tool available`) 410 } 411 412 switch tool := tool.(type) { 413 case func(w writer, r io.Reader) error: 414 if len(args) != 0 { 415 return errors.New(`no args expected`) 416 } 417 return tool(bw, r) 418 419 case func(w writer, r io.Reader, arg string) error: 420 arg, err := requireString(args) 421 if err != nil { 422 return err 423 } 424 return tool(bw, r, arg) 425 426 case func(w writer, r io.Reader, n int) error: 427 n, err := requireInteger(args) 428 if err != nil { 429 return err 430 } 431 return tool(bw, r, n) 432 433 case func(w writer, r io.Reader, f float64) error: 434 f, err := requireNumber(args) 435 if err != nil { 436 return err 437 } 438 return tool(bw, r, f) 439 440 case func(w writer, r io.Reader, args []string) error: 441 return tool(bw, r, args) 442 443 case func(w writer, i int, line []byte) error: 444 return loopLines(r, func(i int, line []byte) error { 445 return tool(bw, i, line) 446 }) 447 448 default: 449 // this enables the automated testing of the tool-lookup table 450 return unsupportedTool{} 451 } 452 } 453 454 func optionalInteger(args []string, fallback int) (int, error) { 455 if len(args) == 0 { 456 return fallback, nil 457 } 458 459 if len(args) == 1 { 460 n, err := parseInteger(args[0]) 461 if err != nil { 462 return n, wrongToolArgs{err.Error(), ``} 463 } 464 return n, nil 465 } 466 467 n := strconv.Itoa(len(args)) 468 m := `expected at most 1 integer-like argument, instead of ` + n + ` args` 469 return 0, wrongToolArgs{m, ``} 470 } 471 472 func optionalString(args []string, fallback string) (string, error) { 473 if len(args) == 0 { 474 return fallback, nil 475 } 476 477 if len(args) == 1 { 478 return args[0], nil 479 } 480 481 n := strconv.Itoa(len(args)) 482 m := `expected at most 1 argument, instead of ` + n + ` args` 483 return ``, wrongToolArgs{m, ``} 484 } 485 486 func parseInteger(s string) (int, error) { 487 // it's nice to have float-like shortcuts such as `5e6`, even for ints 488 f, err := strconv.ParseFloat(s, 64) 489 if err == nil && !math.IsNaN(f) && !math.IsInf(f, 0) { 490 if float64(int64(f)) == f { 491 return int(f), nil 492 } 493 } 494 495 return strconv.Atoi(s) 496 } 497 498 func pickStyle(args []string, fallbackStyle string) ([]byte, error) { 499 if len(args) == 0 { 500 return styles[fallbackStyle], nil 501 } 502 503 if len(args) > 1 { 504 return nil, wrongToolArgs{`expected at most 1 argument`, ``} 505 } 506 507 name := args[0] 508 name = strings.TrimSpace(name) 509 name = strings.ToLower(name) 510 if len(name) == 0 { 511 return nil, wrongToolArgs{`no style name given`, ``} 512 } 513 514 // ignore leading dash or leading double-dash 515 name = strings.TrimPrefix(name, `-`) 516 name = strings.TrimPrefix(name, `-`) 517 518 style, ok := styles[name] 519 if !ok { 520 return nil, errors.New(`no style named ` + name) 521 } 522 return style, nil 523 } 524 525 func requireInteger(args []string) (int, error) { 526 if len(args) != 1 { 527 n := strconv.Itoa(len(args)) 528 m := `expected 1 integer-like argument, instead of ` + n + ` args` 529 return 0, wrongToolArgs{m, ``} 530 } 531 532 n, err := parseInteger(args[0]) 533 if err != nil { 534 return n, wrongToolArgs{err.Error(), ``} 535 } 536 return n, nil 537 } 538 539 func requireLeadingNumber(args []string) (float64, []string, error) { 540 if len(args) == 0 { 541 const m = `expected at least 1 number-like argument` 542 return 0, args, wrongToolArgs{m, ``} 543 } 544 545 rest := args[1:] 546 f, err := strconv.ParseFloat(args[0], 64) 547 if err != nil { 548 return f, rest, wrongToolArgs{err.Error(), ``} 549 } 550 if math.IsNaN(f) || math.IsInf(f, 0) { 551 return f, rest, wrongToolArgs{`invalid number`, ``} 552 } 553 return f, rest, nil 554 } 555 556 func requireNumber(args []string) (float64, error) { 557 if len(args) != 1 { 558 n := strconv.Itoa(len(args)) 559 m := `expected 1 number-like argument, instead of ` + n + ` args` 560 return 0, wrongToolArgs{m, ``} 561 } 562 563 f, err := strconv.ParseFloat(args[0], 64) 564 if err != nil { 565 return f, wrongToolArgs{err.Error(), ``} 566 } 567 if math.IsNaN(f) || math.IsInf(f, 0) { 568 return f, wrongToolArgs{`invalid number`, ``} 569 } 570 return f, nil 571 } 572 573 func requireString(args []string) (string, error) { 574 if len(args) != 1 { 575 n := strconv.Itoa(len(args)) 576 m := `expected 1 argument, instead of ` + n + ` args` 577 return ``, wrongToolArgs{m, ``} 578 } 579 580 return args[0], nil 581 } File: box/main_test.go 1 package main 2 3 import ( 4 "bufio" 5 "io" 6 "testing" 7 ) 8 9 type emptyReader struct{} 10 11 func (er emptyReader) Read(p []byte) (int, error) { 12 return 0, io.EOF 13 } 14 15 func TestToolAliases(t *testing.T) { 16 for name, alias := range toolAliases { 17 if _, ok := name2tool[alias]; !ok { 18 t.Fatalf(`%s: not a tool alias`, name) 19 return 20 } 21 } 22 } 23 24 func TestToolTypes(t *testing.T) { 25 bw := bufio.NewWriter(io.Discard) 26 for name := range name2tool { 27 err := dispatch(bw, emptyReader{}, []string{name}) 28 if _, bad := err.(unsupportedTool); bad { 29 t.Fatalf(`%s: unsupported tool type`, name) 30 return 31 } 32 } 33 } File: box/mimetypes.go 1 package main 2 3 import ( 4 "bytes" 5 "unicode" 6 ) 7 8 // all the MIME types used/recognized in this package 9 const ( 10 aiff = `audio/aiff` 11 au = `audio/basic` 12 avi = `video/avi` 13 avif = `image/avif` 14 bmp = `image/x-bmp` 15 caf = `audio/x-caf` 16 cur = `image/vnd.microsoft.icon` 17 css = `text/css` 18 csvMIME = `text/csv` 19 djvu = `image/x-djvu` 20 elf = `application/x-elf` 21 exe = `application/vnd.microsoft.portable-executable` 22 flac = `audio/x-flac` 23 gif = `image/gif` 24 gzMIME = `application/gzip` 25 heic = `image/heic` 26 htm = `text/html` 27 html = `text/html` 28 ico = `image/x-icon` 29 iso = `application/octet-stream` 30 jpg = `image/jpeg` 31 jpeg = `image/jpeg` 32 js = `application/javascript` 33 jsonMIME = `application/json` 34 m4a = `audio/aac` 35 m4v = `video/x-m4v` 36 mid = `audio/midi` 37 mov = `video/quicktime` 38 mp4 = `video/mp4` 39 mp3 = `audio/mpeg` 40 mpg = `video/mpeg` 41 ogg = `audio/ogg` 42 opus = `audio/opus` 43 pdf = `application/pdf` 44 png = `image/png` 45 ps = `application/postscript` 46 psd = `image/vnd.adobe.photoshop` 47 rtf = `application/rtf` 48 sqlite3 = `application/x-sqlite3` 49 svg = `image/svg+xml` 50 text = `text/plain` 51 tiff = `image/tiff` 52 tsvMIME = `text/tsv` 53 wasm = `application/wasm` 54 wav = `audio/x-wav` 55 webp = `image/webp` 56 webm = `video/webm` 57 xml = `application/xml` 58 zip = `application/zip` 59 zst = `application/zstd` 60 ) 61 62 var mimeAliases = map[string]string{ 63 `aif`: aiff, 64 `aiff`: aiff, 65 `au`: au, 66 `avi`: avi, 67 `avif`: avif, 68 `bmp`: bmp, 69 `caf`: caf, 70 `cur`: cur, 71 `css`: css, 72 `csv`: csvMIME, 73 `elf`: elf, 74 `exe`: exe, 75 `flac`: flac, 76 `geojson`: jsonMIME, 77 `gif`: gif, 78 `gz`: gzMIME, 79 `heic`: heic, 80 `htm`: htm, 81 `html`: html, 82 `ico`: ico, 83 `iso`: iso, 84 `jpg`: jpg, 85 `jpeg`: jpeg, 86 `js`: js, 87 `json`: jsonMIME, 88 `m4a`: m4a, 89 `m4v`: m4v, 90 `mp4`: mp4, 91 `mid`: mid, 92 `mov`: mov, 93 `mp3`: mp3, 94 `mpg`: mpg, 95 `png`: png, 96 `ogg`: ogg, 97 `opus`: opus, 98 `pdf`: pdf, 99 `plain`: text, 100 `plaintext`: text, 101 `ps`: ps, 102 `psd`: psd, 103 `rtf`: rtf, 104 `sqlite3`: sqlite3, 105 `svg`: svg, 106 `text`: text, 107 `tiff`: tiff, 108 `tsv`: tsvMIME, 109 `txt`: text, 110 `wasm`: wasm, 111 `wav`: wav, 112 `wave`: wav, 113 `webm`: webm, 114 `webp`: webp, 115 `xml`: xml, 116 `zip`: zip, 117 `zst`: zst, 118 } 119 120 // formatDescriptor ties a file-header pattern to its data-format type 121 type formatDescriptor struct { 122 Header []byte 123 Type string 124 } 125 126 // can be anything: ensure this value differs from all other literal bytes 127 // in the generic-headers table: failing that, its value could cause subtle 128 // type-misdetection bugs 129 const cba = 0xFD // 253, which is > 127, the highest-valued ascii symbol 130 131 // dash-streamed m4a format 132 var m4aDash = []byte{ 133 cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 134 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', 135 } 136 137 // format markers with leading wildcards, which should be checked before the 138 // normal ones: this is to prevent mismatches with the latter types, even 139 // though you can make probabilistic arguments which suggest these mismatches 140 // should be very unlikely in practice 141 var specialHeaders = []formatDescriptor{ 142 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a}, 143 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a}, 144 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4}, 145 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4}, 146 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v}, 147 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov}, 148 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic}, 149 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif}, 150 {m4aDash, m4a}, 151 } 152 153 // sqlite3 database format 154 var sqlite3db = []byte{ 155 'S', 'Q', 'L', 'i', 't', 'e', ' ', 156 'f', 'o', 'r', 'm', 'a', 't', ' ', '3', 157 000, 158 } 159 160 // windows-variant bitmap file-header, which is followed by a byte-counter for 161 // the 40-byte infoheader which follows that 162 var winbmp = []byte{ 163 'B', 'M', cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, 40, 164 } 165 166 // deja-vu document format 167 var djv = []byte{ 168 'A', 'T', '&', 'T', 'F', 'O', 'R', 'M', cba, cba, cba, cba, 'D', 'J', 'V', 169 } 170 171 var doctypeHTML = []byte{ 172 '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E', ' ', 'h', 't', 'm', 'l', '>', 173 } 174 175 // hdrDispatch groups format-description-groups by their first byte, thus 176 // shortening total lookups for some data header: notice how the `ftyp` data 177 // formats aren't handled here, since these can start with any byte, instead 178 // of the literal value of the any-byte markers they use 179 var hdrDispatch = [256][]formatDescriptor{ 180 { 181 {[]byte{000, 000, 001, 0xBA}, mpg}, 182 {[]byte{000, 000, 001, 0xB3}, mpg}, 183 {[]byte{000, 000, 001, 000}, ico}, 184 {[]byte{000, 000, 002, 000}, cur}, 185 {[]byte{000, 'a', 's', 'm'}, wasm}, 186 }, // 0 187 nil, // 1 188 nil, // 2 189 nil, // 3 190 nil, // 4 191 nil, // 5 192 nil, // 6 193 nil, // 7 194 nil, // 8 195 nil, // 9 196 nil, // 10 197 nil, // 11 198 nil, // 12 199 nil, // 13 200 nil, // 14 201 nil, // 15 202 nil, // 16 203 nil, // 17 204 nil, // 18 205 nil, // 19 206 nil, // 20 207 nil, // 21 208 nil, // 22 209 nil, // 23 210 nil, // 24 211 nil, // 25 212 { 213 {[]byte{0x1A, 0x45, 0xDF, 0xA3}, webm}, 214 }, // 26 215 nil, // 27 216 nil, // 28 217 nil, // 29 218 nil, // 30 219 { 220 // {[]byte{0x1F, 0x8B, 0x08, 0x08}, gzMIME}, 221 {[]byte{0x1F, 0x8B, 0x08}, gzMIME}, 222 }, // 31 223 nil, // 32 224 nil, // 33 ! 225 nil, // 34 " 226 { 227 {[]byte{'#', '!', ' '}, text}, 228 {[]byte{'#', '!', '/'}, text}, 229 }, // 35 # 230 nil, // 36 $ 231 { 232 {[]byte{'%', 'P', 'D', 'F'}, pdf}, 233 {[]byte{'%', '!', 'P', 'S'}, ps}, 234 }, // 37 % 235 nil, // 38 & 236 nil, // 39 ' 237 { 238 {[]byte{0x28, 0xB5, 0x2F, 0xFD}, zst}, 239 }, // 40 ( 240 nil, // 41 ) 241 nil, // 42 * 242 nil, // 43 + 243 nil, // 44 , 244 nil, // 45 - 245 { 246 {[]byte{'.', 's', 'n', 'd'}, au}, 247 }, // 46 . 248 nil, // 47 / 249 nil, // 48 0 250 nil, // 49 1 251 nil, // 50 2 252 nil, // 51 3 253 nil, // 52 4 254 nil, // 53 5 255 nil, // 54 6 256 nil, // 55 7 257 { 258 {[]byte{'8', 'B', 'P', 'S'}, psd}, 259 }, // 56 8 260 nil, // 57 9 261 nil, // 58 : 262 nil, // 59 ; 263 { 264 // func checkDoc is better for these, since it's case-insensitive 265 {doctypeHTML, html}, 266 {[]byte{'<', 's', 'v', 'g'}, svg}, 267 {[]byte{'<', 'h', 't', 'm', 'l', '>'}, html}, 268 {[]byte{'<', 'h', 'e', 'a', 'd', '>'}, html}, 269 {[]byte{'<', 'b', 'o', 'd', 'y', '>'}, html}, 270 {[]byte{'<', '?', 'x', 'm', 'l'}, xml}, 271 }, // 60 < 272 nil, // 61 = 273 nil, // 62 > 274 nil, // 63 ? 275 nil, // 64 @ 276 { 277 {djv, djvu}, 278 }, // 65 A 279 { 280 {winbmp, bmp}, 281 }, // 66 B 282 nil, // 67 C 283 nil, // 68 D 284 nil, // 69 E 285 { 286 {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff}, 287 {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff}, 288 }, // 70 F 289 { 290 {[]byte{'G', 'I', 'F', '8', '7', 'a'}, gif}, 291 {[]byte{'G', 'I', 'F', '8', '9', 'a'}, gif}, 292 }, // 71 G 293 nil, // 72 H 294 { 295 {[]byte{'I', 'D', '3', 2}, mp3}, // ID3-format metadata 296 {[]byte{'I', 'D', '3', 3}, mp3}, // ID3-format metadata 297 {[]byte{'I', 'D', '3', 4}, mp3}, // ID3-format metadata 298 {[]byte{'I', 'I', '*', 000}, tiff}, 299 }, // 73 I 300 nil, // 74 J 301 nil, // 75 K 302 nil, // 76 L 303 { 304 {[]byte{'M', 'M', 000, '*'}, tiff}, 305 {[]byte{'M', 'T', 'h', 'd'}, mid}, 306 {[]byte{'M', 'Z', cba, 000, cba, 000}, exe}, 307 // {[]byte{'M', 'Z', 0x90, 000, 003, 000}, exe}, 308 // {[]byte{'M', 'Z', 0x78, 000, 001, 000}, exe}, 309 // {[]byte{'M', 'Z', 'P', 000, 002, 000}, exe}, 310 }, // 77 M 311 nil, // 78 N 312 { 313 {[]byte{'O', 'g', 'g', 'S'}, ogg}, 314 }, // 79 O 315 { 316 {[]byte{'P', 'K', 003, 004}, zip}, 317 }, // 80 P 318 nil, // 81 Q 319 { 320 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp}, 321 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav}, 322 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi}, 323 }, // 82 R 324 { 325 {sqlite3db, sqlite3}, 326 }, // 83 S 327 nil, // 84 T 328 nil, // 85 U 329 nil, // 86 V 330 nil, // 87 W 331 nil, // 88 X 332 nil, // 89 Y 333 nil, // 90 Z 334 nil, // 91 [ 335 nil, // 92 \ 336 nil, // 93 ] 337 nil, // 94 ^ 338 nil, // 95 _ 339 nil, // 96 ` 340 nil, // 97 a 341 nil, // 98 b 342 { 343 {[]byte{'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf}, 344 }, // 99 c 345 nil, // 100 d 346 nil, // 101 e 347 { 348 {[]byte{'f', 'L', 'a', 'C'}, flac}, 349 }, // 102 f 350 nil, // 103 g 351 nil, // 104 h 352 nil, // 105 i 353 nil, // 106 j 354 nil, // 107 k 355 nil, // 108 l 356 nil, // 109 m 357 nil, // 110 n 358 nil, // 111 o 359 nil, // 112 p 360 nil, // 113 q 361 nil, // 114 r 362 nil, // 115 s 363 nil, // 116 t 364 nil, // 117 u 365 nil, // 118 v 366 nil, // 119 w 367 nil, // 120 x 368 nil, // 121 y 369 nil, // 122 z 370 { 371 {[]byte{'{', '\\', 'r', 't', 'f'}, rtf}, 372 }, // 123 { 373 nil, // 124 | 374 nil, // 125 } 375 nil, // 126 376 { 377 {[]byte{127, 'E', 'L', 'F'}, elf}, 378 }, // 127 379 nil, // 128 380 nil, // 129 381 nil, // 130 382 nil, // 131 383 nil, // 132 384 nil, // 133 385 nil, // 134 386 nil, // 135 387 nil, // 136 388 { 389 {[]byte{0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png}, 390 }, // 137 391 nil, // 138 392 nil, // 139 393 nil, // 140 394 nil, // 141 395 nil, // 142 396 nil, // 143 397 nil, // 144 398 nil, // 145 399 nil, // 146 400 nil, // 147 401 nil, // 148 402 nil, // 149 403 nil, // 150 404 nil, // 151 405 nil, // 152 406 nil, // 153 407 nil, // 154 408 nil, // 155 409 nil, // 156 410 nil, // 157 411 nil, // 158 412 nil, // 159 413 nil, // 160 414 nil, // 161 415 nil, // 162 416 nil, // 163 417 nil, // 164 418 nil, // 165 419 nil, // 166 420 nil, // 167 421 nil, // 168 422 nil, // 169 423 nil, // 170 424 nil, // 171 425 nil, // 172 426 nil, // 173 427 nil, // 174 428 nil, // 175 429 nil, // 176 430 nil, // 177 431 nil, // 178 432 nil, // 179 433 nil, // 180 434 nil, // 181 435 nil, // 182 436 nil, // 183 437 nil, // 184 438 nil, // 185 439 nil, // 186 440 nil, // 187 441 nil, // 188 442 nil, // 189 443 nil, // 190 444 nil, // 191 445 nil, // 192 446 nil, // 193 447 nil, // 194 448 nil, // 195 449 nil, // 196 450 nil, // 197 451 nil, // 198 452 nil, // 199 453 nil, // 200 454 nil, // 201 455 nil, // 202 456 nil, // 203 457 nil, // 204 458 nil, // 205 459 nil, // 206 460 nil, // 207 461 nil, // 208 462 nil, // 209 463 nil, // 210 464 nil, // 211 465 nil, // 212 466 nil, // 213 467 nil, // 214 468 nil, // 215 469 nil, // 216 470 nil, // 217 471 nil, // 218 472 nil, // 219 473 nil, // 220 474 nil, // 221 475 nil, // 222 476 nil, // 223 477 nil, // 224 478 nil, // 225 479 nil, // 226 480 nil, // 227 481 nil, // 228 482 nil, // 229 483 nil, // 230 484 nil, // 231 485 nil, // 232 486 nil, // 233 487 nil, // 234 488 nil, // 235 489 nil, // 236 490 nil, // 237 491 nil, // 238 492 nil, // 239 493 nil, // 240 494 nil, // 241 495 nil, // 242 496 nil, // 243 497 nil, // 244 498 nil, // 245 499 nil, // 246 500 nil, // 247 501 nil, // 248 502 nil, // 249 503 nil, // 250 504 nil, // 251 505 nil, // 252 506 nil, // 253 507 nil, // 254 508 { 509 {[]byte{0xFF, 0xD8, 0xFF}, jpg}, 510 {[]byte{0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3}, 511 {[]byte{0xFF, 0xFB}, mp3}, 512 }, // 255 513 } 514 515 // guessMIME guesses the first appropriate MIME type from the first few 516 // data bytes given: 24 bytes are enough to detect all supported types 517 func guessMIME(b []byte) (mimeType string, ok bool) { 518 // empty data, so there's no way to detect anything 519 if len(b) == 0 { 520 return ``, false 521 } 522 523 // check for plain-text web-document formats case-insensitively 524 kind, ok := checkDoc(b) 525 if ok { 526 return kind, true 527 } 528 529 // check data formats which allow any byte at the start 530 kind, ok = checkSpecial(b) 531 if ok { 532 return kind, true 533 } 534 535 // check all other supported data formats 536 headers := hdrDispatch[b[0]] 537 for _, t := range headers { 538 if hasPrefixPattern(b[1:], t.Header[1:], cba) { 539 return t.Type, true 540 } 541 } 542 543 // unrecognized data format 544 return ``, false 545 } 546 547 // checkDoc tries to guess if the bytes given are the start of HTML, SVG, 548 // XML, or JSON data 549 func checkDoc(b []byte) (kind string, ok bool) { 550 const ( 551 json = `application/json` 552 svg = `image/svg+xml` 553 xml = `application/xml` 554 ) 555 556 // ignore leading whitespaces 557 b = bytes.TrimLeftFunc(b, unicode.IsSpace) 558 559 // can't detect anything with empty data 560 if len(b) == 0 { 561 return ``, false 562 } 563 564 // handle HTML/SVG/XML documents 565 if len(b) > 0 && b[0] == '<' { 566 if hasPrefixFold(b, []byte{'<', '?', 'x', 'm', 'l'}) { 567 if bytes.Contains(b, []byte{'<', 's', 'v', 'g'}) { 568 return svg, true 569 } 570 return xml, true 571 } 572 573 headers := hdrDispatch['<'] 574 for _, v := range headers { 575 if hasPrefixFold(b, v.Header) { 576 return v.Type, true 577 } 578 } 579 return ``, false 580 } 581 582 // handle JSON with top-level arrays 583 if len(b) > 0 && b[0] == '[' { 584 // match [", or [[, or [{, ignoring spaces between 585 b = bytes.TrimLeftFunc(b[1:], unicode.IsSpace) 586 if len(b) > 0 { 587 switch b[0] { 588 case '"', '[', '{': 589 return json, true 590 } 591 } 592 return ``, false 593 } 594 595 // handle JSON with top-level objects 596 if len(b) > 0 && b[0] == '{' { 597 // match {", ignoring spaces between: after {, the only valid syntax 598 // which can follow is the opening quote for the expected object-key 599 b = bytes.TrimLeftFunc(b[1:], unicode.IsSpace) 600 if len(b) > 0 && b[0] == '"' { 601 return json, true 602 } 603 return ``, false 604 } 605 606 // checking for a quoted string, any of the JSON keywords, or even a 607 // number seems too ambiguous to declare the data valid JSON 608 609 // no web-document format detected 610 return ``, false 611 } 612 613 // checkSpecial handles special file-format headers, which should be checked 614 // before the normal file-type headers, since the first-byte dispatch algo 615 // doesn't work for these 616 func checkSpecial(b []byte) (kind string, ok bool) { 617 if len(b) >= 8 && bytes.Index(b, []byte{'f', 't', 'y', 'p'}) == 4 { 618 for _, t := range specialHeaders { 619 if hasPrefixPattern(b[4:], t.Header[4:], cba) { 620 return t.Type, true 621 } 622 } 623 } 624 return ``, false 625 } 626 627 // hasPrefixPattern works like bytes.HasPrefix, except it allows for a special 628 // value to signal any byte is allowed on specific spots 629 func hasPrefixPattern(what []byte, pat []byte, wildcard byte) bool { 630 // if the data are shorter than the pattern to match, there's no match 631 if len(what) < len(pat) { 632 return false 633 } 634 635 // use a slice which ensures the pattern length is never exceeded 636 what = what[:len(pat)] 637 638 for i, x := range what { 639 y := pat[i] 640 if x != y && y != wildcard { 641 return false 642 } 643 } 644 return true 645 } File: box/mimetypes_test.go 1 package main 2 3 import ( 4 "testing" 5 ) 6 7 func TestCheckDoc(t *testing.T) { 8 const ( 9 lf = "\n" 10 crlf = "\r\n" 11 tab = "\t" 12 xmlIntro = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` 13 ) 14 15 tests := []struct { 16 Input string 17 Expected string 18 }{ 19 {``, ``}, 20 {`{"abc":123}`, jsonMIME}, 21 {`[` + lf + ` {"abc":123}`, jsonMIME}, 22 {`[` + lf + ` {"abc":123}`, jsonMIME}, 23 {`[` + crlf + tab + `{"abc":123}`, jsonMIME}, 24 25 {``, ``}, 26 {`<?xml?>`, xml}, 27 {`<?xml?><records>`, xml}, 28 {`<?xml?>` + lf + `<records>`, xml}, 29 {`<?xml?><svg>`, svg}, 30 {`<?xml?>` + crlf + `<svg>`, svg}, 31 {xmlIntro + lf + `<svg`, svg}, 32 {xmlIntro + crlf + `<svg`, svg}, 33 } 34 35 for _, tc := range tests { 36 t.Run(tc.Input, func(t *testing.T) { 37 res, _ := checkDoc([]byte(tc.Input)) 38 if res != tc.Expected { 39 t.Fatalf(`got %v, expected %v instead`, res, tc.Expected) 40 } 41 }) 42 } 43 } 44 45 func TestHasPrefixPattern(t *testing.T) { 46 var ( 47 data = []byte{ 48 'R', 'I', 'F', 'F', 0xf0, 0xba, 0xc8, 0x2b, 'A', 'V', 'I', ' ', 49 } 50 pat = []byte{ 51 'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' ', 52 } 53 ) 54 55 if !hasPrefixPattern(data, pat, cba) { 56 t.Fatal(`wildcard pattern not working`) 57 } 58 } File: box/mit-license.txt 1 The MIT License (MIT) 2 3 Copyright © 2024 pacman64 4 5 Permission is hereby granted, free of charge, to any person obtaining a copy of 6 this software and associated documentation files (the “Software”), to deal 7 in the Software without restriction, including without limitation the rights to 8 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 of the Software, and to permit persons to whom the Software is furnished to do 10 so, subject to the following conditions: 11 12 The above copyright notice and this permission notice shall be included in all 13 copies or substantial portions of the Software. 14 15 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 SOFTWARE. File: box/nn.go 1 package main 2 3 import ( 4 "bytes" 5 "io" 6 ) 7 8 // nn is the `nice numbers` tool, which alternates ANSI-styles over long 9 // sequences of ASCII digits, making them easier for people to read/parse 10 func nn(w writer, r io.Reader, args []string) error { 11 style, err := pickStyle(args, `gray`) 12 if err != nil { 13 return err 14 } 15 16 if bytes.Equal(style, []byte("\x1b[0m")) { 17 return loopLines(r, func(i int, line []byte) error { 18 return lines(w, i, line) 19 }) 20 } 21 22 return loopLines(r, func(i int, line []byte) error { 23 restyleLine(w, line, style) 24 return endLine(w) 25 }) 26 } 27 28 // restyleLine is the testable core of func niceNumbers 29 func restyleLine(w writer, line []byte, style []byte) { 30 for len(line) > 0 { 31 i := indexDigit(line) 32 if i < 0 { 33 // no (more) digits to style for sure 34 w.Write(line) 35 return 36 } 37 38 // some ANSI-style sequences use 4-digit numbers, which are long 39 // enough for this app to mangle 40 isANSI := i >= 2 && line[i-2] == '\x1b' && line[i-1] == '[' 41 42 // emit line before current digit-run 43 w.Write(line[:i]) 44 // advance to the start of the current digit-run 45 line = line[i:] 46 47 // see where the digit-run ends 48 j := indexNonDigit(line) 49 if j < 0 { 50 // the digit-run goes until the end 51 if !isANSI { 52 restyleDigits(w, line, style) 53 } else { 54 w.Write(line) 55 } 56 return 57 } 58 59 // emit styled digit-run... maybe 60 if !isANSI { 61 restyleDigits(w, line[:j], style) 62 } else { 63 w.Write(line[:j]) 64 } 65 66 // skip right past the end of the digit-run 67 line = line[j:] 68 } 69 } 70 71 // restyleDigits renders a run of digits as alternating styled/unstyled runs 72 // of 3 digits, which greatly improves readability, and is the only purpose 73 // of this app; string is assumed to be all decimal digits 74 func restyleDigits(w writer, digits []byte, altStyle []byte) { 75 if len(digits) < 4 { 76 // digit sequence is short, so emit it as is 77 w.Write(digits) 78 return 79 } 80 81 // separate leading 0..2 digits which don't align with the 3-digit groups 82 i := len(digits) % 3 83 // emit leading digits unstyled, if there are any 84 w.Write(digits[:i]) 85 // the rest is guaranteed to have a length which is a multiple of 3 86 digits = digits[i:] 87 88 // start by styling, unless there were no leading digits 89 style := i != 0 90 91 for len(digits) > 0 { 92 if style { 93 w.Write(altStyle) 94 w.Write(digits[:3]) 95 w.Write([]byte{'\x1b', '[', '0', 'm'}) 96 } else { 97 w.Write(digits[:3]) 98 } 99 100 // advance to the next triple: the start of this func is supposed 101 // to guarantee this step always works 102 digits = digits[3:] 103 104 // alternate between styled and unstyled 3-digit groups 105 style = !style 106 } 107 } File: box/overview.txt 1 This app is an all-in-one mix of command-line tools, some of which are 2 (arguably) useful, some which are more experimental. 3 4 Run the app by giving it a tool name (you can try `help` or `tools`) as its 5 first argument, to run any specific tool. 6 7 Among the main motivations behind such a `busybox/toybox` approach 8 - keeping several small tools in one place 9 - minimizing the number of my own custom little apps hanging around 10 - saving a few MBs on file-size, as go(lang) makes fairly-big apps 11 - having a few simply-named tools do reasonably-common CLI tasks 12 13 The app basically dispatches func calls out of a lookup-table: all funcs 14 have byte-based IO, some funcs need and/or accept arguments. 15 16 The mere idea of file/folder names is practically absent in this app: the 17 only tools which use FS-type names are the file-lister tools; no other 18 tools even allow mentioning files/folders by name, relying exclusively on 19 piped data streams. 20 21 Some files are just slightly-adapted copies of other standalone tools I 22 made previously: some of their own unit-tests weren't copied to this app. 23 24 Of particular interest is the concurrent/async `channelling` used in the 25 `compose` tool: it's most likely the trickiest code in this app, and seems 26 to work as is. Using io.Pipe from the go(lang) stdlib seems astonishingly 27 wasteful/inefficient, as most of its writes are literally empty slices, 28 when used concurrently as part of internal comms across `composed` tools: 29 its wasteful behavior is apparently deliberate, according to discussions 30 I read which involved official go(lang) maintainers. 31 32 A nice abstraction which seems to work particularly well is the so-called 33 `lineFlusher` type, a simple wrapper/embedder of a *bufio.Writer which 34 flushes itself any time line-feeds are written using it. This type is used 35 seamlessly across this app, and seems to provide a very good trade-off 36 between efficiency (rarity of writing/flushing) and `liveness` of output, 37 especially when passing data thru `composed` internal pipes. 38 39 Custom error types deal with end-of-output (type `noMoreOutput`; presumably 40 due to closed stdout pipes), multi-error failures (type `multipleErrors`) 41 which need no further reporting, or even enable automated unit-testing of 42 lookup tables, such as type `unsupportedTool`. 43 44 Tools related to the last lines from the input, whether it's keeping or 45 ignoring them, make cute uses of ring-buffers. 46 47 I'm considering adding my other cmd-line app `coby` (COunt BYtes) to this 48 bunch of tools. This app uses channels in interesting ways to limit/bound 49 concurrent steps in 2 separate ways: one is to upper-limit concurrency on the 50 data-processing side (counting byte-related stats), the other is to keep the 51 order implied by the cmd-line args given when presenting final results, while 52 still presenting any result as soon as it's available. 53 54 I like the idea of a `deflac` tool, but it may require too much time to make 55 that one work. File: box/sbs.go 1 package main 2 3 import ( 4 "bytes" 5 "io" 6 "math" 7 "strings" 8 "unicode" 9 "unicode/utf8" 10 ) 11 12 // sbs is the `side by side` tool 13 func sbs(w writer, r io.Reader, args []string) error { 14 ncols, err := optionalInteger(args, 0) 15 if err != nil { 16 return err 17 } 18 19 var buf []byte 20 var lines [][]byte 21 loopLines(r, func(i int, line []byte) error { 22 buf = buf[:0] 23 line = bytes.TrimRightFunc(line, unicode.IsSpace) 24 buf = expandTabs(buf, line, defaultTabstop) 25 lines = append(lines, append([]byte{}, buf...)) 26 return nil 27 }) 28 29 if ncols < 1 { 30 ncols = chooseNumColumns(lines) 31 } 32 33 cols, maxHeight := splitLines(lines, ncols) 34 widths := make([]int, 0, len(cols)) 35 for _, c := range cols { 36 widths = append(widths, findMaxWidth(c)) 37 } 38 39 endColumnSeparator := strings.TrimRight(columnSeparator, ` `) 40 41 // show columns side by side 42 for r := 0; r < maxHeight; r++ { 43 for c := 0; c < len(cols); c++ { 44 badr := r >= len(cols[c]) 45 46 // clearly separate columns visually 47 if c > 0 { 48 if c == len(cols)-1 && (badr || cols[c][r] == nil) { 49 // avoid unneeded trailing spaces 50 w.WriteString(endColumnSeparator) 51 } else { 52 w.WriteString(columnSeparator) 53 } 54 } 55 56 if badr { 57 // exceeding items for this (last) column 58 continue 59 } 60 61 // pad all columns, except the last 62 width := 0 63 if c < len(cols)-1 { 64 width = widths[c] 65 } 66 67 // emit maybe-padded column 68 w.Write(cols[c][r]) 69 writeSpaces(w, width-findWidth(cols[c][r])) 70 } 71 72 // end the line 73 err := w.WriteByte('\n') 74 if err != nil { 75 // probably a pipe was closed 76 return nil 77 } 78 } 79 80 return nil 81 } 82 83 // chooseNumColumns implements heuristics to auto-pick the number of columns 84 // to show: this func is used when the app is using data from standard-input 85 func chooseNumColumns(lines [][]byte) int { 86 if len(lines) == 0 { 87 return 1 88 } 89 90 // sepw is the separator width 91 sepw := utf8.RuneCountInString(columnSeparator) 92 93 // see if lines can even fit a single column 94 if !columnsCanFit(1, lines, sepw) { 95 return 1 96 } 97 98 // starting from the max possible columns which may fit, keep trying 99 // with 1 fewer column, until the columns fit 100 for ncols := int(maxAutoWidth / sepw); ncols > 1; ncols-- { 101 if columnsCanFit(ncols, lines, sepw) { 102 // success: found the most columns which fit 103 return ncols 104 } 105 } 106 107 // avoid multiple columns if some lines are too wide 108 return 1 109 } 110 111 // columnsCanFit checks whether the number of columns given would fit the 112 // display max-width constant 113 func columnsCanFit(ncols int, lines [][]byte, gap int) bool { 114 if ncols < 1 { 115 // avoid surprises when called with non-sense column counts 116 return true 117 } 118 119 // stack-allocate the backing-array behind slice maxw 120 var buf [maxAutoWidth / 2]int 121 maxw := buf[:0] 122 123 // find the column max-height, to chunk lines into columns 124 h := int(math.Ceil(float64(len(lines)) / float64(ncols))) 125 126 // find column max-width by looping over chunks of lines 127 for len(lines) >= h { 128 w := findMaxWidth(lines[:h]) 129 maxw = append(maxw, w) 130 lines = lines[h:] 131 } 132 133 // don't forget the last column 134 if len(lines) > 0 { 135 w := findMaxWidth(lines) 136 maxw = append(maxw, w) 137 } 138 139 // remember to add the gaps/separators between columns, along with 140 // all the individual column max-widths 141 w := (ncols - 1) * gap 142 for _, n := range maxw { 143 w += n 144 } 145 146 // do the columns fit? 147 return w <= maxAutoWidth 148 } 149 150 // splitLines turns an array of lines into sub-arrays of lines, so they can 151 // be shown side by side later on 152 func splitLines(lines [][]byte, ncols int) (cols [][][]byte, maxheight int) { 153 n := ncols 154 hfrac := float64(len(lines)) / float64(n) 155 h := int(math.Ceil(hfrac)) 156 157 cols = make([][][]byte, 0, n) 158 for len(lines) > h { 159 cols = append(cols, lines[:h]) 160 lines = lines[h:] 161 } 162 if len(lines) != 0 { 163 cols = append(cols, lines) 164 } 165 return cols, h 166 } File: box/strings.go 1 package main 2 3 import ( 4 "bytes" 5 "unicode/utf8" 6 ) 7 8 // isSymbolASCII helps the `strings` tool do its job quickly 9 var isSymbolASCII = [256]bool{ 10 false, false, false, false, false, false, false, false, 11 false, true, false, false, false, false, false, false, 12 false, false, false, false, false, false, false, false, 13 false, false, false, false, false, false, false, false, 14 true, true, true, true, true, true, true, true, 15 true, true, true, true, true, true, true, true, 16 true, true, true, true, true, true, true, true, 17 true, true, true, true, true, true, true, true, 18 true, true, true, true, true, true, true, true, 19 true, true, true, true, true, true, true, true, 20 true, true, true, true, true, true, true, true, 21 true, true, true, true, true, true, true, true, 22 true, true, true, true, true, true, true, true, 23 true, true, true, true, true, true, true, true, 24 true, true, true, true, true, true, true, true, 25 true, true, true, true, true, true, true, false, 26 } 27 28 // styles turns style-names into the ANSI-code sequences used by some tools 29 var styles = map[string][]byte{ 30 `b`: []byte("\x1b[38;5;26m"), // same as `blue` 31 `g`: []byte("\x1b[38;5;29m"), // same as `green` 32 `h`: []byte("\x1b[7m"), // same as `highlighted` 33 `o`: []byte("\x1b[38;5;166m"), // same as `orange` 34 `p`: []byte("\x1b[38;5;99m"), // same as `purple` 35 `r`: []byte("\x1b[31m"), // same as `red` 36 `u`: []byte("\x1b[4m"), // same as `underline` 37 38 `blue`: []byte("\x1b[38;5;26m"), 39 `bold`: []byte("\x1b[1m"), 40 `gray`: []byte("\x1b[38;5;248m"), 41 `green`: []byte("\x1b[38;5;29m"), 42 `highlight`: []byte("\x1b[7m"), 43 `highlighted`: []byte("\x1b[7m"), 44 `hilite`: []byte("\x1b[7m"), 45 `hilited`: []byte("\x1b[7m"), 46 `inverse`: []byte("\x1b[7m"), 47 `inverted`: []byte("\x1b[7m"), 48 `orange`: []byte("\x1b[38;5;166m"), 49 `plain`: []byte("\x1b[0m"), 50 `purple`: []byte("\x1b[38;5;99m"), 51 `red`: []byte("\x1b[31m"), 52 `underline`: []byte("\x1b[4m"), 53 `underlined`: []byte("\x1b[4m"), 54 55 `blueback`: []byte("\x1b[48;5;26m\x1b[38;5;15m"), 56 `bluebg`: []byte("\x1b[48;5;26m\x1b[38;5;15m"), 57 `grayback`: []byte("\x1b[48;5;248m\x1b[38;5;15m"), 58 `graybg`: []byte("\x1b[48;5;248m\x1b[38;5;15m"), 59 `greenback`: []byte("\x1b[48;5;29m\x1b[38;5;15m"), 60 `greenbg`: []byte("\x1b[48;5;29m\x1b[38;5;15m"), 61 `redback`: []byte("\x1b[41m\x1b[38;5;15m"), 62 `redbg`: []byte("\x1b[41m\x1b[38;5;15m"), 63 `orangeback`: []byte("\x1b[48;5;166m\x1b[38;5;15m"), 64 `orangebg`: []byte("\x1b[48;5;166m\x1b[38;5;15m"), 65 `purpleback`: []byte("\x1b[48;5;99m\x1b[38;5;15m"), 66 `purplebg`: []byte("\x1b[48;5;99m\x1b[38;5;15m"), 67 } 68 69 // uriUnescapedASCII marks which ASCII bytes don't need escaping 70 var uriUnescapedASCII = [256]bool{ 71 '0': true, '1': true, '2': true, '3': true, '4': true, 72 '5': true, '6': true, '7': true, '8': true, '9': true, 73 74 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 75 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 76 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 77 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 78 'Y': true, 'Z': true, 79 80 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 81 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 82 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 83 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 84 'y': true, 'z': true, 85 86 '-': true, '_': true, '.': true, '~': true, 87 '/': true, 88 } 89 90 func advanceSlice(items [][]byte, n int) [][]byte { 91 if n <= len(items) { 92 return items[n:] 93 } 94 return nil 95 } 96 97 func indexSlice(items [][]byte, i int) []byte { 98 if i < len(items) { 99 return items[i] 100 } 101 return nil 102 } 103 104 func limitSlice(items [][]byte, n int) [][]byte { 105 if n <= len(items) { 106 return items[:n] 107 } 108 return items 109 } 110 111 func expandTabs(dest []byte, src []byte, tabstop int) []byte { 112 if tabstop < 1 { 113 return append(dest, src...) 114 } 115 116 n := 0 117 118 for len(src) > 0 { 119 r, size := utf8.DecodeRune(src) 120 src = src[size:] 121 122 if r != '\t' { 123 dest = utf8.AppendRune(dest, r) 124 n++ 125 continue 126 } 127 128 spaces := tabstop - n%tabstop 129 n += spaces 130 131 for j := 0; j < spaces; j++ { 132 dest = append(dest, ' ') 133 } 134 } 135 136 return dest 137 } 138 139 func indexAny(s []byte, t [][]byte) (start int, end int) { 140 start = -1 141 end = -1 142 143 for _, sep := range t { 144 i := bytes.Index(s, sep) 145 if i < 0 { 146 continue 147 } 148 149 if start > i || start < 0 { 150 start = i 151 end = start + len(sep) 152 } 153 } 154 155 return start, end 156 } 157 158 // splitSliceNonEmpty does what it says, ensuring no subslice in the result 159 // is empty; empty slices return empty results 160 func splitSliceNonEmpty(items []string, sep string) [][]string { 161 cur := items 162 var res [][]string 163 164 for len(cur) > 0 { 165 // skip all leading separators, also ensuring no empty subslices 166 // sneak thru the splitting happending below 167 for len(cur) > 0 && cur[0] == sep { 168 cur = cur[1:] 169 } 170 171 i := findNext(cur, sep) 172 // no more subslices, or the very last subslice follows 173 if i < 0 { 174 // don't forget trailing subslices, after the last separator 175 if len(cur) > 0 { 176 res = append(res, cur) 177 } 178 return res 179 } 180 181 // ignore empty subslices 182 if i == 0 { 183 continue 184 } 185 186 res = append(res, cur[:i]) 187 cur = cur[i+1:] 188 } 189 190 return res 191 } 192 193 // findNext finds a string in a string-slice, returning an invalid negative 194 // index on failure 195 func findNext(src []string, what string) int { 196 for i, s := range src { 197 if s == what { 198 return i 199 } 200 } 201 return -1 202 } 203 204 func findWidth(s []byte) int { 205 w := 0 206 loopPlain(s, func(i int, s []byte) error { 207 w += utf8.RuneCount(s) 208 return nil 209 }) 210 return w 211 } 212 213 func findMaxWidth(items [][]byte) int { 214 maxw := 0 215 for _, s := range items { 216 if w := findWidth(s); maxw < w { 217 maxw = w 218 } 219 } 220 return maxw 221 } 222 223 // hasPrefixFold is a case-insensitive bytes.HasPrefix 224 func hasPrefixFold(s []byte, prefix []byte) bool { 225 n := len(prefix) 226 return len(s) >= n && bytes.EqualFold(s[:n], prefix) 227 } 228 229 // indexDigit finds the index of the first digit in a string, or -1 when the 230 // string has no decimal digits 231 func indexDigit(s []byte) int { 232 for i := 0; i < len(s); i++ { 233 switch s[i] { 234 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 235 return i 236 } 237 } 238 239 // empty slice, or a slice without any digits 240 return -1 241 } 242 243 // indexNonDigit finds the index of the first non-digit in a string, or -1 244 // when the string is all decimal digits 245 func indexNonDigit(s []byte) int { 246 for i := 0; i < len(s); i++ { 247 switch s[i] { 248 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 249 continue 250 default: 251 return i 252 } 253 } 254 255 // empty slice, or a slice which only has digits 256 return -1 257 } File: box/strings_test.go 1 package main 2 3 import ( 4 "bufio" 5 "strings" 6 "testing" 7 ) 8 9 func TestRestyleLine(t *testing.T) { 10 var ( 11 r = "\x1b[0m" 12 d = string(styles[`gray`]) 13 ) 14 15 var tests = []struct { 16 Input string 17 Expected string 18 }{ 19 {``, ``}, 20 {`abc`, `abc`}, 21 {` abc 123456 `, ` abc 123` + d + `456` + r + ` `}, 22 {` 123456789 text`, ` 123` + d + `456` + r + `789 text`}, 23 24 {`0`, `0`}, 25 {`01`, `01`}, 26 {`012`, `012`}, 27 {`0123`, `0` + d + `123` + r}, 28 {`01234`, `01` + d + `234` + r}, 29 {`012345`, `012` + d + `345` + r}, 30 {`0123456`, `0` + d + `123` + r + `456`}, 31 {`01234567`, `01` + d + `234` + r + `567`}, 32 {`012345678`, `012` + d + `345` + r + `678`}, 33 {`0123456789`, `0` + d + `123` + r + `456` + d + `789` + r}, 34 {`01234567890`, `01` + d + `234` + r + `567` + d + `890` + r}, 35 {`012345678901`, `012` + d + `345` + r + `678` + d + `901` + r}, 36 {`0123456789012`, `0` + d + `123` + r + `456` + d + `789` + r + `012`}, 37 38 {`00321`, `00` + d + `321` + r}, 39 {`123.456789`, `123.` + `456` + d + `789` + r}, 40 {`123456.123456`, `123` + d + `456` + r + `.` + `123` + d + `456` + r}, 41 } 42 43 for _, tc := range tests { 44 t.Run(tc.Input, func(t *testing.T) { 45 var b strings.Builder 46 w := bufio.NewWriter(&b) 47 restyleLine(lineFlusher{w}, []byte(tc.Input), []byte(d)) 48 // don't forget to flush the buffer, or output may stay empty 49 w.Flush() 50 51 if got := b.String(); got != tc.Expected { 52 t.Fatalf(`expected %q, but got %q instead`, tc.Expected, got) 53 } 54 }) 55 } 56 } 57 58 func TestExpand(t *testing.T) { 59 var tests = []struct { 60 name string 61 input string 62 tabstop int 63 expected string 64 }{ 65 {`empty`, ``, 4, ``}, 66 {`indent 1`, "\tabc", 4, ` abc`}, 67 {`indent 2`, "\t\tabc", 4, ` abc`}, 68 {`indent 2 (mix tab/space)`, "\t \tabc", 4, ` abc`}, 69 } 70 71 for _, tc := range tests { 72 t.Run(tc.name, func(t *testing.T) { 73 expanded := expandTabs(nil, []byte(tc.input), tc.tabstop) 74 75 if got := string(expanded); got != tc.expected { 76 const fs = `input %q, tabstop %d: got %q, instead of %q` 77 t.Fatalf(fs, tc.input, tc.tabstop, got, tc.expected) 78 } 79 }) 80 } 81 } 82 83 func TestFindWidth(t *testing.T) { 84 var tests = []struct { 85 name string 86 input string 87 expected int 88 }{ 89 { 90 name: `empty`, 91 input: ``, 92 expected: 0, 93 }, 94 { 95 name: `no ansi escapes`, 96 input: `abc def`, 97 expected: 7, 98 }, 99 { 100 name: `simple ansi escapes`, 101 input: "\x1b[38;5;120mabc def\x1b[0m", 102 expected: 7, 103 }, 104 } 105 106 for _, tc := range tests { 107 t.Run(tc.name, func(t *testing.T) { 108 if got := findWidth([]byte(tc.input)); got != tc.expected { 109 const fs = `input %q: got %d, instead of %d` 110 t.Fatalf(fs, tc.input, got, tc.expected) 111 } 112 }) 113 } 114 } File: box/symbols.go 1 package main 2 3 import ( 4 "errors" 5 "io" 6 "sort" 7 "strings" 8 ) 9 10 var name2symbol = map[string]string{ 11 `adash`: `-`, 12 `aeq`: `≈`, 13 `almost`: `≈`, 14 `amp`: `&`, 15 `ampersand`: `&`, 16 `apos`: `’`, 17 `apostrophe`: `’`, 18 `approx`: `≅`, 19 `asterisk`: `*`, 20 `atleast`: `≥`, 21 `atmost`: `≤`, 22 `backquote`: "`", 23 `backslash`: `\`, 24 `backtick`: "`", 25 `ball`: `●`, 26 `bang`: `!`, 27 `base64`: `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=`, 28 `block`: `█`, 29 `bquo`: "`", 30 `bquote`: "`", 31 `bslash`: `\`, 32 `bullet`: `•`, 33 `caret`: `^`, 34 `cdot`: `·`, 35 `circle`: `●`, 36 `cloud`: `☁️`, 37 `colon`: `:`, 38 `comma`: `,`, 39 `copyright`: `©`, 40 `cquote`: `”`, 41 `crap`: `💩`, 42 `crapface`: `💩`, 43 `cross`: `×`, 44 `dash`: `—`, 45 `deg`: `°`, 46 `degree`: `°`, 47 `doc`: `📄`, 48 `document`: `📄`, 49 `dollar`: `$`, 50 `dot`: `.`, 51 `dquo`: `"`, 52 `dquote`: `"`, 53 `ellip`: `…`, 54 `ellipsis`: `…`, 55 `email`: `@`, 56 `eq`: `=`, 57 `equal`: `=`, 58 `equals`: `=`, 59 `excl`: `!`, 60 `exclam`: `!`, 61 `exclamation`: `!`, 62 `faces`: `😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏😐😑😒😓😔😕😖😗😘😙😚😛😜😝😞😟😠😡😢😣😤😥😦😧😨😩😪😫😬😭😮😯😰😱😲😳😴😵😶😷🙁🙂🙃🙄🧐👶🤓🤐🤑🤒🤔🤕🤗🤠🤡🤢🤣🤤🤥🤧🤨🤩🤪🤫🤬🤭🤮🤯`, 63 `file`: `📄`, 64 `fullmoon`: `🌕`, 65 `fullstop`: `.`, 66 `geq`: `≥`, 67 `greatereq`: `≥`, 68 `happy`: `😀`, 69 `happyface`: `😀`, 70 `heart`: `❤️`, 71 `hellip`: `…`, 72 `hole`: `○`, 73 `hyphen`: `-`, 74 `leq`: `≤`, 75 `less`: `<`, 76 `lesseq`: `≤`, 77 `lightning`: `🌩️`, 78 `mdash`: `—`, 79 `mdot`: `·`, 80 `more`: `>`, 81 `music`: `🎵`, 82 `musicalnote`: `🎵`, 83 `ndash`: `–`, 84 `neq`: `≠`, 85 `not`: `¬`, 86 `notequal`: `≠`, 87 `notequals`: `≠`, 88 `oquote`: `“`, 89 `period`: `.`, 90 `pipe`: `|`, 91 `question`: `?`, 92 `rain`: `🌧️`, 93 `semicolon`: `;`, 94 `sharp`: `#`, 95 `shit`: `💩`, 96 `shitface`: `💩`, 97 `slash`: `/`, 98 `slasher`: `⧸`, 99 `slashier`: `⧸`, 100 `smile`: `🙂`, 101 `smileface`: `🙂`, 102 `smilingface`: `🙂`, 103 `snow`: `❄️`, 104 `square`: `■`, 105 `squo`: `'`, 106 `squote`: `'`, 107 `star`: `⭐`, 108 `sun`: `☀️`, 109 `tilde`: `~`, 110 `vbar`: `|`, 111 `vellip`: `⋮`, 112 `alpha`: `α`, 113 `beta`: `β`, 114 `delta`: `δ`, 115 `eps`: `ε`, 116 `epsilon`: `ε`, 117 `gamma`: `γ`, 118 `lambda`: `λ`, 119 `omega`: `ω`, 120 `pi`: `π`, 121 `sigma`: `σ`, 122 `tau`: `τ`, 123 `theta`: `θ`, 124 `Alpha`: `Α`, 125 `Beta`: `Β`, 126 `Delta`: `Δ`, 127 `Omega`: `Ω`, 128 `Pi`: `Π`, 129 `Sigma`: `Σ`, 130 `Theta`: `Θ`, 131 132 `alphabet`: `abcdefghijklmnopqrstuvwxyz`, 133 `asciiletters`: `abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ`, 134 `digits`: `0123456789`, 135 `greek`: `αβγδεζηθικλμνξοπρστυφχψω`, 136 `hex`: `0123456789abcdef`, 137 `hexa`: `0123456789abcdef`, 138 `hexadec`: `0123456789abcdef`, 139 `hexadecimal`: `0123456789abcdef`, 140 `hexdigits`: `0123456789abcdefABCDEF`, 141 `inf`: `∞`, 142 `infinity`: `∞`, 143 `latin`: `abcdefghijklmnopqrstuvwxyz`, 144 `letters`: `abcdefghijklmnopqrstuvwxyz`, 145 `lower`: `abcdefghijklmnopqrstuvwxyz`, 146 `lowercase`: `abcdefghijklmnopqrstuvwxyz`, 147 `lowercasegreek`: `αβγδεζηθικλμνξοπρστυφχψω`, 148 `lowergreek`: `αβγδεζηθικλμνξοπρστυφχψω`, 149 `lowercasehex`: `0123456789abcdef`, 150 `lowerhex`: `0123456789abcdef`, 151 `math`: `+-×÷²³±`, 152 `midascii`: "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", 153 `octal`: `01234567`, 154 `octaldigits`: `01234567`, 155 `octdigits`: `01234567`, 156 `other`: `✓✗✔❌`, 157 `plusminus`: `±`, 158 `prod`: `Π`, 159 `product`: `Π`, 160 `punct`: "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", 161 `punctuation`: "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", 162 `sum`: `Σ`, 163 `summation`: `Σ`, 164 `upper`: `ABCDEFGHIJKLMNOPQRSTUVWXYZ`, 165 `uppercase`: `ABCDEFGHIJKLMNOPQRSTUVWXYZ`, 166 `uppercasegreek`: `ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ`, 167 `uppergreek`: `ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ`, 168 `uppercasehex`: `0123456789ABCDEF`, 169 `upperhex`: `0123456789ABCDEF`, 170 171 `aud`: `A$`, 172 `brl`: `R$`, 173 `cad`: `C$`, 174 `chf`: `CHF`, 175 `clp`: `CLP`, 176 `cny`: `元`, 177 `czk`: `Kč`, 178 `dkk`: `DKK`, 179 `eur`: `€`, 180 `gbp`: `£`, 181 `hkd`: `HK$`, 182 `huf`: `Ft`, 183 `idr`: `Rp`, 184 `ils`: `₪`, 185 `inr`: `₹`, 186 `jpy`: `¥`, 187 `krw`: `₩`, 188 `mxn`: `MXN`, 189 `nok`: `NOK`, 190 `nzd`: `NZ$`, 191 `php`: `₱`, 192 `pln`: `zł`, 193 `rub`: `₽`, 194 `sar`: `﷼`, 195 `sek`: `SEK`, 196 `sgd`: `S$`, 197 `thb`: `฿`, 198 `try`: `₺`, 199 `twd`: `NT$`, 200 `usd`: `$`, 201 `zar`: `R`, 202 `baht`: `฿`, 203 `britishpound`: `£`, 204 `cent`: `¢`, 205 `cents`: `¢`, 206 `euro`: `€`, 207 `indianrupee`: `₹`, 208 `koruna`: `Kč`, 209 `naira`: `₦`, 210 `newshekel`: `₪`, 211 `philippinepeso`: `₱`, 212 `pound`: `£`, 213 `poundsterling`: `£`, 214 `renminbi`: `元`, 215 `riyal`: `﷼`, 216 `ruble`: `₽`, 217 `rupee`: `₹`, 218 `saudiriyal`: `﷼`, 219 `shekel`: `₪`, 220 `sterling`: `£`, 221 `turkishlira`: `₺`, 222 `won`: `₩`, 223 `yen`: `¥`, 224 `zloty`: `zł`, 225 `afghanistan`: `🇦🇫`, 226 `america`: `🇺🇸`, 227 `algeria`: `🇩🇿`, 228 `angola`: `🇦🇴`, 229 `arabia`: `🇸🇦`, 230 `arabemirates`: `🇦🇪`, 231 `argentina`: `🇦🇷`, 232 `australia`: `🇦🇺`, 233 `austria`: `🇦🇹`, 234 `bangladesh`: `🇧🇩`, 235 `belgium`: `🇧🇪`, 236 `brazil`: `🇧🇷`, 237 `britain`: `🇬🇧`, 238 `canada`: `🇨🇦`, 239 `chile`: `🇨🇱`, 240 `china`: `🇨🇳`, 241 `colombia`: `🇨🇴`, 242 `czechia`: `🇨🇿`, 243 `czechrepublic`: `🇨🇿`, 244 `denmark`: `🇩🇰`, 245 `dominicanrepublic`: `🇩🇴`, 246 `drc`: `🇨🇩`, 247 `drcongo`: `🇨🇩`, 248 `ecuador`: `🇪🇨`, 249 `egypt`: `🇪🇬`, 250 `emirates`: `🇦🇪`, 251 `england`: `🇬🇧`, 252 `ethiopia`: `🇪🇹`, 253 `europe`: `🇪🇺`, 254 `europeanunion`: `🇪🇺`, 255 `finland`: `🇫🇮`, 256 `france`: `🇫🇷`, 257 `germany`: `🇩🇪`, 258 `ghana`: `🇬🇭`, 259 `greatbritain`: `🇬🇧`, 260 `greece`: `🇬🇷`, 261 `holland`: `🇳🇱`, 262 `hungary`: `🇭🇺`, 263 `india`: `🇮🇳`, 264 `indonesia`: `🇮🇩`, 265 `iran`: `🇮🇷`, 266 `iraq`: `🇮🇶`, 267 `ireland`: `🇮🇪`, 268 `israel`: `🇮🇱`, 269 `italy`: `🇮🇹`, 270 `japan`: `🇯🇵`, 271 `kazakhstan`: `🇰🇿`, 272 `kenya`: `🇰🇪`, 273 `korea`: `🇰🇷`, 274 `kuwait`: `🇰🇼`, 275 `madagascar`: `🇲🇬`, 276 `malaysia`: `🇲🇾`, 277 `mexico`: `🇲🇽`, 278 `morocco`: `🇲🇦`, 279 `mozambique`: `🇲🇿`, 280 `myanmar`: `🇲🇲`, 281 `nepal`: `🇳🇵`, 282 `netherlands`: `🇳🇱`, 283 `newzealand`: `🇳🇿`, 284 `nigeria`: `🇳🇬`, 285 `northkorea`: `🇰🇵`, 286 `norway`: `🇳🇴`, 287 `pakistan`: `🇵🇰`, 288 `peru`: `🇵🇪`, 289 `philippines`: `🇵🇭`, 290 `poland`: `🇵🇱`, 291 `portugal`: `🇵🇹`, 292 `qatar`: `🇶🇦`, 293 `rok`: `🇰🇷`, 294 `romania`: `🇷🇴`, 295 `russia`: `🇷🇺`, 296 `saudiarabia`: `🇸🇦`, 297 `singapore`: `🇸🇬`, 298 `somalia`: `🇸🇴`, 299 `southafrica`: `🇿🇦`, 300 `southkorea`: `🇰🇷`, 301 `spain`: `🇪🇸`, 302 `srilanka`: `🇱🇰`, 303 `sudan`: `🇸🇩`, 304 `sweden`: `🇸🇪`, 305 `switzerland`: `🇨🇭`, 306 `taiwan`: `🇹🇼`, 307 `tanzania`: `🇹🇿`, 308 `thailand`: `🇹🇭`, 309 `turkey`: `🇹🇷`, 310 `uganda`: `🇺🇬`, 311 `ukraine`: `🇺🇦`, 312 `unitedarab`: `🇦🇪`, 313 `unitedarabemirates`: `🇦🇪`, 314 `unitedemirates`: `🇦🇪`, 315 `unitedkingdom`: `🇬🇧`, 316 `unitedstates`: `🇺🇸`, 317 `uzbekistan`: `🇺🇿`, 318 `yemen`: `🇾🇪`, 319 `venezuela`: `🇻🇪`, 320 `vietnam`: `🇻🇳`, 321 `af`: `🇦🇫`, 322 `ax`: `🇦🇽`, 323 `al`: `🇦🇱`, 324 `dz`: `🇩🇿`, 325 `as`: `🇦🇸`, 326 `ad`: `🇦🇩`, 327 `ao`: `🇦🇴`, 328 `ai`: `🇦🇮`, 329 `aq`: `🇦🇶`, 330 `ag`: `🇦🇬`, 331 `ar`: `🇦🇷`, 332 `am`: `🇦🇲`, 333 `aw`: `🇦🇼`, 334 `au`: `🇦🇺`, 335 `at`: `🇦🇹`, 336 `az`: `🇦🇿`, 337 `bs`: `🇧🇸`, 338 `bh`: `🇧🇭`, 339 `bd`: `🇧🇩`, 340 `bb`: `🇧🇧`, 341 `by`: `🇧🇾`, 342 `be`: `🇧🇪`, 343 `bz`: `🇧🇿`, 344 `bj`: `🇧🇯`, 345 `bm`: `🇧🇲`, 346 `bt`: `🇧🇹`, 347 `bo`: `🇧🇴`, 348 `bq`: `🇧🇶`, 349 `ba`: `🇧🇦`, 350 `bw`: `🇧🇼`, 351 `bv`: `🇧🇻`, 352 `br`: `🇧🇷`, 353 `io`: `🇮🇴`, 354 `bn`: `🇧🇳`, 355 `bg`: `🇧🇬`, 356 `bf`: `🇧🇫`, 357 `bi`: `🇧🇮`, 358 `cv`: `🇨🇻`, 359 `kh`: `🇰🇭`, 360 `cm`: `🇨🇲`, 361 `ca`: `🇨🇦`, 362 `ky`: `🇰🇾`, 363 `cf`: `🇨🇫`, 364 `td`: `🇹🇩`, 365 `cl`: `🇨🇱`, 366 `cn`: `🇨🇳`, 367 `cx`: `🇨🇽`, 368 `cc`: `🇨🇨`, 369 `co`: `🇨🇴`, 370 `km`: `🇰🇲`, 371 `cd`: `🇨🇩`, 372 `cg`: `🇨🇬`, 373 `ck`: `🇨🇰`, 374 `cr`: `🇨🇷`, 375 `ci`: `🇨🇮`, 376 `hr`: `🇭🇷`, 377 `cu`: `🇨🇺`, 378 `cw`: `🇨🇼`, 379 `cy`: `🇨🇾`, 380 `cz`: `🇨🇿`, 381 `dk`: `🇩🇰`, 382 `dj`: `🇩🇯`, 383 `dm`: `🇩🇲`, 384 `do`: `🇩🇴`, 385 `ec`: `🇪🇨`, 386 `eg`: `🇪🇬`, 387 `sv`: `🇸🇻`, 388 `gq`: `🇬🇶`, 389 `er`: `🇪🇷`, 390 `ee`: `🇪🇪`, 391 `sz`: `🇸🇿`, 392 `et`: `🇪🇹`, 393 `eu`: `🇪🇺`, 394 `fk`: `🇫🇰`, 395 `fo`: `🇫🇴`, 396 `fj`: `🇫🇯`, 397 `fi`: `🇫🇮`, 398 `fr`: `🇫🇷`, 399 `gf`: `🇬🇫`, 400 `pf`: `🇵🇫`, 401 `tf`: `🇹🇫`, 402 `ga`: `🇬🇦`, 403 `gm`: `🇬🇲`, 404 `ge`: `🇬🇪`, 405 `de`: `🇩🇪`, 406 `gh`: `🇬🇭`, 407 `gi`: `🇬🇮`, 408 `gr`: `🇬🇷`, 409 `gl`: `🇬🇱`, 410 `gd`: `🇬🇩`, 411 `gp`: `🇬🇵`, 412 `gu`: `🇬🇺`, 413 `gt`: `🇬🇹`, 414 `gg`: `🇬🇬`, 415 `gn`: `🇬🇳`, 416 `gw`: `🇬🇼`, 417 `gy`: `🇬🇾`, 418 `ht`: `🇭🇹`, 419 `hm`: `🇭🇲`, 420 `va`: `🇻🇦`, 421 `hn`: `🇭🇳`, 422 `hk`: `🇭🇰`, 423 `hu`: `🇭🇺`, 424 `is`: `🇮🇸`, 425 `in`: `🇮🇳`, 426 `id`: `🇮🇩`, 427 `ir`: `🇮🇷`, 428 `iq`: `🇮🇶`, 429 `ie`: `🇮🇪`, 430 `im`: `🇮🇲`, 431 `il`: `🇮🇱`, 432 `it`: `🇮🇹`, 433 `jm`: `🇯🇲`, 434 `jp`: `🇯🇵`, 435 `je`: `🇯🇪`, 436 `jo`: `🇯🇴`, 437 `kz`: `🇰🇿`, 438 `ke`: `🇰🇪`, 439 `ki`: `🇰🇮`, 440 `kp`: `🇰🇵`, 441 `kr`: `🇰🇷`, 442 `kw`: `🇰🇼`, 443 `kg`: `🇰🇬`, 444 `la`: `🇱🇦`, 445 `lv`: `🇱🇻`, 446 `lb`: `🇱🇧`, 447 `ls`: `🇱🇸`, 448 `lr`: `🇱🇷`, 449 `ly`: `🇱🇾`, 450 `li`: `🇱🇮`, 451 `lt`: `🇱🇹`, 452 `lu`: `🇱🇺`, 453 `mo`: `🇲🇴`, 454 `mk`: `🇲🇰`, 455 `mg`: `🇲🇬`, 456 `mw`: `🇲🇼`, 457 `my`: `🇲🇾`, 458 `mv`: `🇲🇻`, 459 `ml`: `🇲🇱`, 460 `mt`: `🇲🇹`, 461 `mh`: `🇲🇭`, 462 `mq`: `🇲🇶`, 463 `mr`: `🇲🇷`, 464 `mu`: `🇲🇺`, 465 `yt`: `🇾🇹`, 466 `mx`: `🇲🇽`, 467 `fm`: `🇫🇲`, 468 `md`: `🇲🇩`, 469 `mc`: `🇲🇨`, 470 `mn`: `🇲🇳`, 471 `me`: `🇲🇪`, 472 `ms`: `🇲🇸`, 473 `ma`: `🇲🇦`, 474 `mz`: `🇲🇿`, 475 `mm`: `🇲🇲`, 476 `na`: `🇳🇦`, 477 `nr`: `🇳🇷`, 478 `np`: `🇳🇵`, 479 `nl`: `🇳🇱`, 480 `nc`: `🇳🇨`, 481 `nz`: `🇳🇿`, 482 `ni`: `🇳🇮`, 483 `ne`: `🇳🇪`, 484 `ng`: `🇳🇬`, 485 `nu`: `🇳🇺`, 486 `nf`: `🇳🇫`, 487 `mp`: `🇲🇵`, 488 `no`: `🇳🇴`, 489 `om`: `🇴🇲`, 490 `pk`: `🇵🇰`, 491 `pw`: `🇵🇼`, 492 `ps`: `🇵🇸`, 493 `pa`: `🇵🇦`, 494 `pg`: `🇵🇬`, 495 `py`: `🇵🇾`, 496 `pe`: `🇵🇪`, 497 `ph`: `🇵🇭`, 498 `pn`: `🇵🇳`, 499 `pl`: `🇵🇱`, 500 `pt`: `🇵🇹`, 501 `pr`: `🇵🇷`, 502 `qa`: `🇶🇦`, 503 `re`: `🇷🇪`, 504 `ro`: `🇷🇴`, 505 `ru`: `🇷🇺`, 506 `rw`: `🇷🇼`, 507 `bl`: `🇧🇱`, 508 `sh`: `🇸🇭`, 509 `kn`: `🇰🇳`, 510 `lc`: `🇱🇨`, 511 `mf`: `🇲🇫`, 512 `pm`: `🇵🇲`, 513 `vc`: `🇻🇨`, 514 `ws`: `🇼🇸`, 515 `sm`: `🇸🇲`, 516 `st`: `🇸🇹`, 517 `sa`: `🇸🇦`, 518 `sn`: `🇸🇳`, 519 `rs`: `🇷🇸`, 520 `sc`: `🇸🇨`, 521 `sl`: `🇸🇱`, 522 `sg`: `🇸🇬`, 523 `sx`: `🇸🇽`, 524 `sk`: `🇸🇰`, 525 `si`: `🇸🇮`, 526 `sb`: `🇸🇧`, 527 `so`: `🇸🇴`, 528 `za`: `🇿🇦`, 529 `gs`: `🇬🇸`, 530 `ss`: `🇸🇸`, 531 `es`: `🇪🇸`, 532 `lk`: `🇱🇰`, 533 `sd`: `🇸🇩`, 534 `sr`: `🇸🇷`, 535 `sj`: `🇸🇯`, 536 `se`: `🇸🇪`, 537 `ch`: `🇨🇭`, 538 `sy`: `🇸🇾`, 539 `tw`: `🇹🇼`, 540 `tj`: `🇹🇯`, 541 `tz`: `🇹🇿`, 542 `th`: `🇹🇭`, 543 `tl`: `🇹🇱`, 544 `tg`: `🇹🇬`, 545 `tk`: `🇹🇰`, 546 `to`: `🇹🇴`, 547 `tt`: `🇹🇹`, 548 `tn`: `🇹🇳`, 549 `tr`: `🇹🇷`, 550 `tm`: `🇹🇲`, 551 `tc`: `🇹🇨`, 552 `tv`: `🇹🇻`, 553 `ug`: `🇺🇬`, 554 `ua`: `🇺🇦`, 555 `ae`: `🇦🇪`, 556 `gb`: `🇬🇧`, 557 `um`: `🇺🇲`, 558 `us`: `🇺🇸`, 559 `uy`: `🇺🇾`, 560 `uz`: `🇺🇿`, 561 `vu`: `🇻🇺`, 562 `ve`: `🇻🇪`, 563 `vn`: `🇻🇳`, 564 `vg`: `🇻🇬`, 565 `vi`: `🇻🇮`, 566 `wf`: `🇼🇫`, 567 `eh`: `🇪🇭`, 568 `ye`: `🇾🇪`, 569 `zm`: `🇿🇲`, 570 `zw`: `🇿🇼`, 571 `afg`: `🇦🇫`, 572 `ala`: `🇦🇽`, 573 `alb`: `🇦🇱`, 574 `dza`: `🇩🇿`, 575 `asm`: `🇦🇸`, 576 `and`: `🇦🇩`, 577 `ago`: `🇦🇴`, 578 `aia`: `🇦🇮`, 579 `ata`: `🇦🇶`, 580 `atg`: `🇦🇬`, 581 `arg`: `🇦🇷`, 582 `arm`: `🇦🇲`, 583 `abw`: `🇦🇼`, 584 `aus`: `🇦🇺`, 585 `aut`: `🇦🇹`, 586 `aze`: `🇦🇿`, 587 `bhs`: `🇧🇸`, 588 `bhr`: `🇧🇭`, 589 `bgd`: `🇧🇩`, 590 `brb`: `🇧🇧`, 591 `blr`: `🇧🇾`, 592 `bel`: `🇧🇪`, 593 `blz`: `🇧🇿`, 594 `ben`: `🇧🇯`, 595 `bmu`: `🇧🇲`, 596 `btn`: `🇧🇹`, 597 `bol`: `🇧🇴`, 598 `bes`: `🇧🇶`, 599 `bih`: `🇧🇦`, 600 `bwa`: `🇧🇼`, 601 `bvt`: `🇧🇻`, 602 `bra`: `🇧🇷`, 603 `iot`: `🇮🇴`, 604 `brn`: `🇧🇳`, 605 `bgr`: `🇧🇬`, 606 `bfa`: `🇧🇫`, 607 `bdi`: `🇧🇮`, 608 `cpv`: `🇨🇻`, 609 `khm`: `🇰🇭`, 610 `cmr`: `🇨🇲`, 611 `can`: `🇨🇦`, 612 `cym`: `🇰🇾`, 613 `caf`: `🇨🇫`, 614 `tcd`: `🇹🇩`, 615 `chl`: `🇨🇱`, 616 `chn`: `🇨🇳`, 617 `cxr`: `🇨🇽`, 618 `cck`: `🇨🇨`, 619 `col`: `🇨🇴`, 620 `com`: `🇰🇲`, 621 `cod`: `🇨🇩`, 622 `cog`: `🇨🇬`, 623 `cok`: `🇨🇰`, 624 `cri`: `🇨🇷`, 625 `civ`: `🇨🇮`, 626 `hrv`: `🇭🇷`, 627 `cub`: `🇨🇺`, 628 `cuw`: `🇨🇼`, 629 `cyp`: `🇨🇾`, 630 `cze`: `🇨🇿`, 631 `dnk`: `🇩🇰`, 632 `dji`: `🇩🇯`, 633 `dma`: `🇩🇲`, 634 `dom`: `🇩🇴`, 635 `ecu`: `🇪🇨`, 636 `egy`: `🇪🇬`, 637 `slv`: `🇸🇻`, 638 `gnq`: `🇬🇶`, 639 `eri`: `🇪🇷`, 640 `est`: `🇪🇪`, 641 `swz`: `🇸🇿`, 642 `eth`: `🇪🇹`, 643 `flk`: `🇫🇰`, 644 `fro`: `🇫🇴`, 645 `fji`: `🇫🇯`, 646 `fin`: `🇫🇮`, 647 `fra`: `🇫🇷`, 648 `guf`: `🇬🇫`, 649 `pyf`: `🇵🇫`, 650 `atf`: `🇹🇫`, 651 `gab`: `🇬🇦`, 652 `gmb`: `🇬🇲`, 653 `geo`: `🇬🇪`, 654 `deu`: `🇩🇪`, 655 `gha`: `🇬🇭`, 656 `gib`: `🇬🇮`, 657 `grc`: `🇬🇷`, 658 `grl`: `🇬🇱`, 659 `grd`: `🇬🇩`, 660 `glp`: `🇬🇵`, 661 `gum`: `🇬🇺`, 662 `gtm`: `🇬🇹`, 663 `ggy`: `🇬🇬`, 664 `gin`: `🇬🇳`, 665 `gnb`: `🇬🇼`, 666 `guy`: `🇬🇾`, 667 `hti`: `🇭🇹`, 668 `hmd`: `🇭🇲`, 669 `vat`: `🇻🇦`, 670 `hnd`: `🇭🇳`, 671 `hkg`: `🇭🇰`, 672 `hun`: `🇭🇺`, 673 `isl`: `🇮🇸`, 674 `ind`: `🇮🇳`, 675 `idn`: `🇮🇩`, 676 `irn`: `🇮🇷`, 677 `irq`: `🇮🇶`, 678 `irl`: `🇮🇪`, 679 `imn`: `🇮🇲`, 680 `isr`: `🇮🇱`, 681 `ita`: `🇮🇹`, 682 `jam`: `🇯🇲`, 683 `jpn`: `🇯🇵`, 684 `jey`: `🇯🇪`, 685 `jor`: `🇯🇴`, 686 `kaz`: `🇰🇿`, 687 `ken`: `🇰🇪`, 688 `kir`: `🇰🇮`, 689 `prk`: `🇰🇵`, 690 `kor`: `🇰🇷`, 691 `kwt`: `🇰🇼`, 692 `kgz`: `🇰🇬`, 693 `lao`: `🇱🇦`, 694 `lva`: `🇱🇻`, 695 `lbn`: `🇱🇧`, 696 `lso`: `🇱🇸`, 697 `lbr`: `🇱🇷`, 698 `lby`: `🇱🇾`, 699 `lie`: `🇱🇮`, 700 `ltu`: `🇱🇹`, 701 `lux`: `🇱🇺`, 702 `mac`: `🇲🇴`, 703 `mkd`: `🇲🇰`, 704 `mdg`: `🇲🇬`, 705 `mwi`: `🇲🇼`, 706 `mys`: `🇲🇾`, 707 `mdv`: `🇲🇻`, 708 `mli`: `🇲🇱`, 709 `mlt`: `🇲🇹`, 710 `mhl`: `🇲🇭`, 711 `mtq`: `🇲🇶`, 712 `mrt`: `🇲🇷`, 713 `mus`: `🇲🇺`, 714 `myt`: `🇾🇹`, 715 `mex`: `🇲🇽`, 716 `fsm`: `🇫🇲`, 717 `mda`: `🇲🇩`, 718 `mco`: `🇲🇨`, 719 `mng`: `🇲🇳`, 720 `mne`: `🇲🇪`, 721 `msr`: `🇲🇸`, 722 `mar`: `🇲🇦`, 723 `moz`: `🇲🇿`, 724 `mmr`: `🇲🇲`, 725 `nam`: `🇳🇦`, 726 `nru`: `🇳🇷`, 727 `npl`: `🇳🇵`, 728 `nld`: `🇳🇱`, 729 `ncl`: `🇳🇨`, 730 `nzl`: `🇳🇿`, 731 `nic`: `🇳🇮`, 732 `ner`: `🇳🇪`, 733 `nga`: `🇳🇬`, 734 `niu`: `🇳🇺`, 735 `nfk`: `🇳🇫`, 736 `mnp`: `🇲🇵`, 737 `nor`: `🇳🇴`, 738 `omn`: `🇴🇲`, 739 `pak`: `🇵🇰`, 740 `plw`: `🇵🇼`, 741 `pse`: `🇵🇸`, 742 `pan`: `🇵🇦`, 743 `png`: `🇵🇬`, 744 `pry`: `🇵🇾`, 745 `per`: `🇵🇪`, 746 `phl`: `🇵🇭`, 747 `pcn`: `🇵🇳`, 748 `pol`: `🇵🇱`, 749 `prt`: `🇵🇹`, 750 `pri`: `🇵🇷`, 751 `qat`: `🇶🇦`, 752 `reu`: `🇷🇪`, 753 `rou`: `🇷🇴`, 754 `rus`: `🇷🇺`, 755 `rwa`: `🇷🇼`, 756 `blm`: `🇧🇱`, 757 `shn`: `🇸🇭`, 758 `kna`: `🇰🇳`, 759 `lca`: `🇱🇨`, 760 `maf`: `🇲🇫`, 761 `spm`: `🇵🇲`, 762 `vct`: `🇻🇨`, 763 `wsm`: `🇼🇸`, 764 `smr`: `🇸🇲`, 765 `stp`: `🇸🇹`, 766 `sau`: `🇸🇦`, 767 `sen`: `🇸🇳`, 768 `srb`: `🇷🇸`, 769 `syc`: `🇸🇨`, 770 `sle`: `🇸🇱`, 771 `sgp`: `🇸🇬`, 772 `sxm`: `🇸🇽`, 773 `svk`: `🇸🇰`, 774 `svn`: `🇸🇮`, 775 `slb`: `🇸🇧`, 776 `som`: `🇸🇴`, 777 `zaf`: `🇿🇦`, 778 `sgs`: `🇬🇸`, 779 `ssd`: `🇸🇸`, 780 `esp`: `🇪🇸`, 781 `lka`: `🇱🇰`, 782 `sdn`: `🇸🇩`, 783 `sur`: `🇸🇷`, 784 `sjm`: `🇸🇯`, 785 `swe`: `🇸🇪`, 786 `che`: `🇨🇭`, 787 `syr`: `🇸🇾`, 788 `twn`: `🇹🇼`, 789 `tjk`: `🇹🇯`, 790 `tza`: `🇹🇿`, 791 `tha`: `🇹🇭`, 792 `tls`: `🇹🇱`, 793 `tgo`: `🇹🇬`, 794 `tkl`: `🇹🇰`, 795 `ton`: `🇹🇴`, 796 `tto`: `🇹🇹`, 797 `tun`: `🇹🇳`, 798 `tur`: `🇹🇷`, 799 `tkm`: `🇹🇲`, 800 `tca`: `🇹🇨`, 801 `tuv`: `🇹🇻`, 802 `uga`: `🇺🇬`, 803 `ukr`: `🇺🇦`, 804 `are`: `🇦🇪`, 805 `gbr`: `🇬🇧`, 806 `uae`: `🇦🇪`, 807 `umi`: `🇺🇲`, 808 `usa`: `🇺🇸`, 809 `ury`: `🇺🇾`, 810 `uzb`: `🇺🇿`, 811 `vut`: `🇻🇺`, 812 `ven`: `🇻🇪`, 813 `vnm`: `🇻🇳`, 814 `vgb`: `🇻🇬`, 815 `vir`: `🇻🇮`, 816 `wlf`: `🇼🇫`, 817 `esh`: `🇪🇭`, 818 `yem`: `🇾🇪`, 819 `zmb`: `🇿🇲`, 820 `zwe`: `🇿🇼`, 821 } 822 823 func symbols(w writer, r io.Reader, names []string) error { 824 if len(names) == 0 { 825 names = make([]string, 0, len(name2symbol)) 826 for k := range name2symbol { 827 names = append(names, k) 828 } 829 sort.Strings(names) 830 831 for _, name := range names { 832 w.WriteString(name) 833 w.WriteByte('\t') 834 w.WriteString(name2symbol[name]) 835 if err := endLine(w); err != nil { 836 return err 837 } 838 } 839 return nil 840 } 841 842 nerr := 0 843 for _, name := range names { 844 key := name 845 key = strings.ReplaceAll(key, `-`, ``) 846 key = strings.ReplaceAll(key, `_`, ``) 847 848 sym, ok := name2symbol[key] 849 if !ok { 850 showError(errors.New(`symbols: no symbol named ` + name)) 851 nerr++ 852 continue 853 } 854 855 w.WriteString(sym) 856 if err := endLine(w); err != nil { 857 return err 858 } 859 } 860 861 if nerr > 0 { 862 return multipleErrors{} 863 } 864 return nil 865 } File: box/timer.go 1 package main 2 3 import ( 4 "bufio" 5 "io" 6 "os" 7 "os/exec" 8 "os/signal" 9 "strconv" 10 "sync" 11 "time" 12 ) 13 14 const ( 15 timerSpaces = ` ` 16 17 // clear has enough spaces in it to cover any chronograph output 18 clear = "\r" + timerSpaces + timerSpaces + timerSpaces + "\r" 19 ) 20 21 // timer runs a live timer, showing the time elapsed 22 func timer(w writer, r io.Reader, args []string) error { 23 if len(args) > 0 { 24 return chronoRunTask(w, r, args[0], args[1:]) 25 } else { 26 return chronograph(w, r, nil) 27 } 28 } 29 30 // chronoRunTask handles running the timer tool in `subtask-mode` 31 func chronoRunTask(w writer, r io.Reader, name string, args []string) error { 32 cmd := exec.Command(name, args...) 33 cmd.Stdin = r 34 35 stdout, err := cmd.StdoutPipe() 36 if err != nil { 37 return err 38 } 39 defer stdout.Close() 40 41 stderr, err := cmd.StderrPipe() 42 if err != nil { 43 return err 44 } 45 defer stderr.Close() 46 47 if err := cmd.Start(); err != nil { 48 return err 49 } 50 51 if err := chronograph(w, stdout, stderr); err != nil { 52 return err 53 } 54 55 if err := cmd.Wait(); err != nil { 56 return err 57 } 58 return justQuit{cmd.ProcessState.ExitCode()} 59 } 60 61 // readLines is run twice asynchronously, so that both stdin and stderr lines 62 // are handled independently, which matters when running a subtask 63 func readLines(r io.Reader, lines chan []byte) error { 64 defer close(lines) 65 66 // when not handling a subtask, this func will be called with a nil 67 // reader, since without a subtask, there's no stderr to read from 68 if r == nil { 69 return nil 70 } 71 72 const gb = 1024 * 1024 * 1024 73 sc := bufio.NewScanner(r) 74 sc.Buffer(nil, 8*gb) 75 76 for sc.Scan() { 77 // interesting: trying to filter out needlessly-chatty pipes 78 // doesn't work as intended when done here, but is fine when 79 // done at both receiving ends in the big select statement 80 lines <- sc.Bytes() 81 } 82 return sc.Err() 83 } 84 85 // chronograph runs a live chronograph, showing the time elapsed: 2 input 86 // sources for lines are handled concurrently, one destined for the app's 87 // stdout, the other for the app's stderr, without interfering with the 88 // chronograph lines, which also show on stderr 89 func chronograph(w writer, stdout io.Reader, stderr io.Reader) error { 90 start := time.Now() 91 t := time.NewTicker(100 * time.Millisecond) 92 startChronoLine(start, start) 93 94 stopped := make(chan os.Signal, 1) 95 defer close(stopped) 96 signal.Notify(stopped, os.Interrupt) 97 98 errors := make(chan error) 99 var waitAllLines sync.WaitGroup 100 waitAllLines.Add(2) 101 102 outLines := make(chan []byte) 103 go func() { 104 defer waitAllLines.Done() 105 errors <- readLines(stdout, outLines) 106 }() 107 108 errLines := make(chan []byte) 109 go func() { 110 defer waitAllLines.Done() 111 errors <- readLines(stderr, errLines) 112 }() 113 114 quit := make(chan struct{}) 115 defer close(quit) 116 117 go func() { 118 waitAllLines.Wait() 119 close(errors) 120 quit <- struct{}{} 121 }() 122 123 for { 124 select { 125 case now := <-t.C: 126 os.Stderr.WriteString(clear) 127 startChronoLine(start, now) 128 129 case line := <-outLines: 130 if line == nil { 131 // filter out junk for needlessly-chatty pipes, while still 132 // keeping actual empty lines 133 continue 134 } 135 136 // write-order of the next 3 steps matters, to avoid mixing 137 // up lines since stdout and stderr lines can show up together 138 os.Stderr.WriteString(clear) 139 w.Write(line) 140 _, err := w.WriteString("\n") 141 startChronoLine(start, time.Now()) 142 if err != nil { 143 endChronoLine(start) 144 return err 145 } 146 147 case line := <-errLines: 148 if line == nil { 149 // filter out junk for needlessly-chatty pipes, while still 150 // keeping actual empty lines 151 continue 152 } 153 154 // write-order of the next 3 steps matters, to avoid mixing 155 // up lines since stdout and stderr lines can show up together 156 os.Stderr.WriteString(clear) 157 os.Stderr.Write(line) 158 _, err := os.Stderr.WriteString("\n") 159 startChronoLine(start, time.Now()) 160 if err != nil { 161 endChronoLine(start) 162 return err 163 } 164 165 case err := <-errors: 166 if err == nil { 167 continue 168 } 169 os.Stderr.WriteString(clear) 170 showError(err) 171 startChronoLine(start, time.Now()) 172 173 case <-quit: 174 endChronoLine(start) 175 return justQuit{0} 176 177 case <-stopped: 178 t.Stop() 179 endChronoLine(start) 180 return justQuit{255} 181 } 182 } 183 } 184 185 func startChronoLine(start, now time.Time) { 186 var buf [64]byte 187 dt := now.Sub(start) 188 189 os.Stderr.Write(time.Time{}.Add(dt).AppendFormat(buf[:0], `15:04:05`)) 190 os.Stderr.Write([]byte(` `)) 191 os.Stderr.Write(now.AppendFormat(buf[:0], `2006-01-02 15:04:05 Jan Mon`)) 192 } 193 194 func endChronoLine(start time.Time) { 195 var buf [64]byte 196 secs := time.Since(start).Seconds() 197 198 os.Stderr.Write([]byte(` `)) 199 os.Stderr.Write(strconv.AppendFloat(buf[:0], secs, 'f', 4, 64)) 200 os.Stderr.Write([]byte(" seconds\n")) 201 } File: box/tools.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "compress/bzip2" 7 "compress/gzip" 8 "crypto/rand" 9 "crypto/sha1" 10 "crypto/sha256" 11 "crypto/sha512" 12 "encoding/base64" 13 "encoding/csv" 14 "encoding/hex" 15 "encoding/json" 16 "errors" 17 "hash" 18 "io" 19 "io/fs" 20 "math" 21 "os" 22 "os/exec" 23 "path/filepath" 24 "regexp" 25 "runtime" 26 "sort" 27 "strconv" 28 "strings" 29 "sync" 30 "time" 31 "unicode" 32 "unicode/utf8" 33 ) 34 35 func abs(w writer, r io.Reader) error { 36 return loopLines(r, func(i int, line []byte) error { 37 abs, err := filepath.Abs(string(line)) 38 if err != nil { 39 return err 40 } 41 42 w.WriteString(abs) 43 return endLine(w) 44 }) 45 } 46 47 // args emits each string given to it on its own output line, ignoring any 48 // input 49 func args(w writer, r io.Reader, args []string) error { 50 for _, s := range args { 51 w.WriteString(s) 52 if err := endLine(w); err != nil { 53 return err 54 } 55 } 56 return nil 57 } 58 59 func base(w writer, i int, line []byte) error { 60 w.WriteString(filepath.Base(string(line))) 61 return endLine(w) 62 } 63 64 // base64encode encodes input bytes into a base64-encoded line 65 func base64encode(w writer, r io.Reader) error { 66 enc := base64.NewEncoder(base64.StdEncoding, w) 67 if _, err := io.Copy(enc, r); err != nil { 68 return err 69 } 70 if err := enc.Close(); err != nil { 71 return err 72 } 73 return endLine(w) 74 } 75 76 // begin emits the few strings given as their own lines, before emitting back 77 // all input lines 78 func begin(w writer, r io.Reader, args []string) error { 79 for _, s := range args { 80 w.WriteString(s) 81 if err := endLine(w); err != nil { 82 return err 83 } 84 } 85 86 return loopLines(r, func(i int, line []byte) error { 87 return writeln(w, line) 88 }) 89 } 90 91 // beginTSV emits a line of tab-separated values (TSV), before emitting back 92 // all input lines 93 func beginTSV(w writer, r io.Reader, args []string) error { 94 for i, s := range args { 95 if i > 0 { 96 w.WriteByte('\t') 97 } 98 w.WriteString(s) 99 } 100 101 if err := endLine(w); err != nil { 102 return err 103 } 104 105 return loopLines(r, func(i int, line []byte) error { 106 return writeln(w, line) 107 }) 108 } 109 110 // book lays out input lines on 2 columns, the same way books do it 111 func book(w writer, r io.Reader, pageHeight int) error { 112 if pageHeight < 2 { 113 return errors.New(`page height must be at least 2`) 114 } 115 116 var buf []byte 117 var lines [][]byte 118 loopLines(r, func(i int, line []byte) error { 119 buf = buf[:0] 120 line = bytes.TrimRightFunc(line, unicode.IsSpace) 121 buf = expandTabs(buf, line, defaultTabstop) 122 lines = append(lines, append([]byte{}, buf...)) 123 return nil 124 }) 125 126 var maxWidths [2]int 127 innerHeight := pageHeight - 1 128 129 rest := lines 130 for len(rest) > 0 { 131 w := findMaxWidth(limitSlice(rest, innerHeight)) 132 if maxWidths[0] < w { 133 maxWidths[0] = w 134 } 135 rest = advanceSlice(rest, innerHeight) 136 137 w = findMaxWidth(limitSlice(rest, innerHeight)) 138 if maxWidths[1] < w { 139 maxWidths[1] = w 140 } 141 rest = advanceSlice(rest, innerHeight) 142 } 143 144 endColumnSeparator := strings.TrimRight(columnSeparator, ` `) 145 bottom := strings.Repeat(`·`, maxWidths[0]+3+maxWidths[1]) 146 147 for i := 0; len(lines) > 0; i++ { 148 if i > 0 { 149 w.WriteString(bottom) 150 w.WriteByte('\n') 151 } 152 153 for j, left := range limitSlice(lines, innerHeight) { 154 w.Write(left) 155 if bytes.Contains(left, []byte{'\x1b', '['}) { 156 w.WriteString("\x1b[0m") 157 } 158 159 writeSpaces(w, maxWidths[0]-findWidth(left)) 160 161 right := indexSlice(lines, j+innerHeight) 162 if len(right) > 0 { 163 w.WriteString(columnSeparator) 164 } else { 165 w.WriteString(endColumnSeparator) 166 } 167 168 w.Write(right) 169 if bytes.Contains(right, []byte{'\x1b', '['}) { 170 w.WriteString("\x1b[0m") 171 } 172 173 if err := endLine(w); err != nil { 174 return err 175 } 176 } 177 178 lines = advanceSlice(lines, 2*innerHeight) 179 } 180 181 return nil 182 } 183 184 // breatheHeader adds an extra empty line after the first one (the header), 185 // then adds an extra empty line every few 186 func breatheHeader(w writer, r io.Reader, args []string) error { 187 every, err := optionalInteger(args, 5) 188 if err != nil { 189 return err 190 } 191 192 if every < 1 { 193 return loopLines(r, func(i int, line []byte) error { 194 if i == 1 { 195 w.WriteByte('\n') 196 } 197 return writeln(w, line) 198 }) 199 } 200 201 return loopLines(r, func(i int, line []byte) error { 202 if (i-1)%every == 0 { 203 w.WriteByte('\n') 204 } 205 return writeln(w, line) 206 }) 207 } 208 209 // breatheLines adds an extra empty line every few 210 func breatheLines(w writer, r io.Reader, args []string) error { 211 every, err := optionalInteger(args, 5) 212 if err != nil { 213 return err 214 } 215 216 return loopLines(r, func(i int, line []byte) error { 217 if every > 0 && i%every == 0 && i > 0 { 218 w.WriteByte('\n') 219 } 220 return writeln(w, line) 221 }) 222 } 223 224 func byteFreq(w writer, r io.Reader) error { 225 var buf [bufferSize]byte 226 var tally [256]uint64 227 228 for { 229 got, err := r.Read(buf[:]) 230 for _, b := range buf[:got] { 231 tally[b]++ 232 } 233 234 if err == io.EOF { 235 break 236 } 237 if err != nil { 238 return err 239 } 240 } 241 242 w.WriteString("byte\tcount\n") 243 for i, c := range tally { 244 writeInt(w, i) 245 w.WriteByte('\t') 246 w.Write(strconv.AppendUint(buf[:0], c, 10)) 247 if err := endLine(w); err != nil { 248 return err 249 } 250 } 251 return nil 252 } 253 254 // choplf ignores the last line-feed from the input, if present 255 func choplf(w writer, r io.Reader) error { 256 return loopLines(r, func(i int, line []byte) error { 257 if i > 0 { 258 w.WriteByte('\n') 259 } 260 return write(w, line) 261 }) 262 } 263 264 // compose runs a chain of commands asynchronously, but still keeping their 265 // implied I/O order 266 func compose(w writer, r io.Reader, args []string) error { 267 if len(args) == 0 { 268 return wrongToolArgs{`expected at least 1 argument`, ``} 269 } 270 271 sep := args[0] 272 cmds := splitSliceNonEmpty(args[1:], sep) 273 return composeAsyncRec(w, r, cmds) 274 } 275 276 // composeAsyncRec handles the recursion for the `compose` tool; the code to 277 // merge the error-channels looks slightly `simplifiable`, but trying to do 278 // so can lead to ugly concurrency bugs; things seem to work, so keep as is 279 func composeAsyncRec(w io.Writer, r io.Reader, cmds [][]string) error { 280 if len(cmds) == 0 { 281 return nil 282 } 283 284 // check, even if func splitSliceNonEmpty is supposed to prevent this 285 if len(cmds[0]) == 0 { 286 return errors.New(`internal error: unexpected empty command-slice`) 287 } 288 289 // handle the last subcommand/tool in the chain 290 if len(cmds) == 1 { 291 return run(w, r, cmds[0]) 292 } 293 294 // handle the steps along the way, gathering a single error result 295 errch := make(chan error) 296 defer close(errch) 297 298 go func() { 299 nextpipe, curpipe := io.Pipe() 300 301 curerrch := make(chan error) 302 defer close(curerrch) 303 nexterrch := make(chan error) 304 defer close(nexterrch) 305 306 // start the current task asynchronously 307 go func() { 308 // directly using io.Pipe can lead to an astonishing number of 309 // empty/tiny byte-slices being passed around channels, which 310 // slows things down considerably when dealing with many data 311 w := bufio.NewWriter(curpipe) 312 313 // ensure clean-up in case current tool panics 314 defer curpipe.Close() 315 defer w.Flush() 316 317 // make sequence of all steps explicit, to ensure things are 318 // happening in the correct order 319 err := run(w, r, cmds[0]) 320 w.Flush() 321 curpipe.Close() 322 curerrch <- err 323 }() 324 325 // start all later tasks asynchronously, by way of recursion 326 go func() { 327 // ensure clean-up in case later tools panic 328 defer nextpipe.Close() 329 330 // make sequence of all steps explicit 331 err := composeAsyncRec(w, nextpipe, cmds[1:]) 332 nextpipe.Close() 333 nexterrch <- err 334 }() 335 336 // wait for completion of all tasks, in any order: this is done 337 // by waiting for 2 tasks, since the latter of these handles all 338 // later tasks, by way of recursion 339 340 select { 341 case err := <-curerrch: 342 if err != nil { 343 // wait for the later tasks to end, ignoring their error 344 <-nexterrch 345 346 // return error from the current task 347 errch <- err 348 return 349 } 350 351 // wait for later tasks to end, ignoring their error 352 errch <- <-nexterrch 353 return 354 355 case err := <-nexterrch: 356 // try to explicitly end the current task sooner; multiple 357 // closures of io.Pipe `values` are allowed: their Close 358 // funcs do nothing when called after the first time 359 curpipe.Close() 360 361 if err != nil { 362 // wait for current task to end, ignoring its error 363 <-curerrch 364 365 // return error from later tasks 366 errch <- err 367 return 368 } 369 370 // wait for current task to end 371 errch <- <-curerrch 372 return 373 } 374 }() 375 376 // wait for a definitive error/result from the async tasks 377 return <-errch 378 } 379 380 // datauri encodes input bytes as a base64-encoded data-URI line 381 func datauri(w writer, r io.Reader, args []string) error { 382 mime, err := optionalString(args, ``) 383 if err != nil { 384 return err 385 } 386 387 if len(mime) > 0 { 388 mime = strings.TrimSpace(mime) 389 mime = strings.ToLower(mime) 390 if s, ok := mimeAliases[mime]; ok { 391 mime = s 392 } 393 394 enc := base64.NewEncoder(base64.StdEncoding, w) 395 defer enc.Close() 396 397 w.WriteString(`data:`) 398 w.WriteString(mime) 399 w.WriteString(`;base64,`) 400 if _, err := io.Copy(enc, r); err != nil { 401 return noMoreOutput{} 402 } 403 return endLine(w) 404 } 405 406 var buf [4 * 1024]byte 407 n, err := r.Read(buf[:]) 408 if err != nil && err != io.EOF { 409 return err 410 } 411 412 start := buf[:n] 413 mime, ok := guessMIME(start) 414 if !ok { 415 return errors.New(`can't autodetect the MIME-type`) 416 } 417 418 w.WriteString(`data:`) 419 w.WriteString(mime) 420 w.WriteString(`;base64,`) 421 422 enc := base64.NewEncoder(base64.StdEncoding, w) 423 defer enc.Close() 424 425 all := io.MultiReader(bytes.NewReader(start), r) 426 if _, err := io.Copy(enc, all); err != nil { 427 return noMoreOutput{} 428 } 429 return endLine(w) 430 } 431 432 // debase64 decodes base64-encoded input data, including data-URIs 433 func debase64(w writer, r io.Reader) error { 434 var buf [4 * 1024]byte 435 n, err := r.Read(buf[:]) 436 if err != nil && err != io.EOF { 437 return err 438 } 439 440 start := buf[:n] 441 if bytes.HasPrefix(start, []byte{'d', 'a', 't', 'a', ':'}) { 442 marker := []byte{';', 'b', 'a', 's', 'e', '6', '4', ','} 443 if i := bytes.Index(start, marker); i >= 0 { 444 start = start[i+len(marker):] 445 } 446 } 447 448 all := io.MultiReader(bytes.NewReader(start), r) 449 enc := base64.NewDecoder(base64.StdEncoding, all) 450 _, err = io.Copy(w, enc) 451 return err 452 } 453 454 // debz decompresses bzip2-encoded input bytes 455 func debz(w writer, r io.Reader) error { 456 dec := bzip2.NewReader(r) 457 _, err := w.ReadFrom(dec) 458 return err 459 } 460 461 // decsv turns CSV data into a JSONS (JSON Strings) array; the only other 462 // type of value is null, reserved for missing trailing row-items 463 func decsv(w writer, r io.Reader) error { 464 rr := csv.NewReader(r) 465 rr.LazyQuotes = true 466 rr.ReuseRecord = true 467 rr.FieldsPerRecord = -1 468 469 var keys []string 470 471 for i := 0; true; i++ { 472 row, err := rr.Read() 473 474 if err == io.EOF { 475 if i > 1 { 476 w.WriteByte(']') 477 return endLine(w) 478 } 479 return nil 480 } 481 482 if err != nil { 483 return err 484 } 485 486 if i == 0 { 487 keys = make([]string, 0, len(row)) 488 for _, s := range row { 489 c := string(append([]byte{}, s...)) 490 keys = append(keys, c) 491 } 492 continue 493 } 494 495 if i == 1 { 496 w.WriteByte('[') 497 } else { 498 err = w.WriteByte(',') 499 if err != nil { 500 return noMoreOutput{} 501 } 502 } 503 504 if len(row) > len(keys) { 505 return errors.New(`data-row has more items than the header`) 506 } 507 508 w.WriteByte('{') 509 for i, s := range row { 510 if i > 0 { 511 w.WriteByte(',') 512 } 513 w.WriteByte('"') 514 writeInnerStringJSON(w, keys[i]) 515 w.WriteString(`":"`) 516 writeInnerStringJSON(w, s) 517 w.WriteByte('"') 518 } 519 520 for i := len(row); i < len(keys); i++ { 521 if i > 0 { 522 w.WriteByte(',') 523 } 524 w.WriteByte('"') 525 writeInnerStringJSON(w, keys[i]) 526 w.WriteString(`":null`) 527 } 528 w.WriteByte('}') 529 } 530 531 return nil 532 } 533 534 // degz decompresses gzip-encoded input bytes 535 func degz(w writer, r io.Reader) error { 536 dec, err := gzip.NewReader(r) 537 if err != nil { 538 return err 539 } 540 defer dec.Close() 541 _, err = w.ReadFrom(dec) 542 return err 543 } 544 545 // delay waits the given number of seconds before emitting back each line 546 // from the input 547 func delay(w writer, r io.Reader, seconds float64) error { 548 dt := time.Duration(seconds * float64(time.Second)) 549 return loopLines(r, func(i int, line []byte) error { 550 defer w.Flush() 551 time.Sleep(dt) 552 return writeln(w, line) 553 }) 554 } 555 556 // detab expands tabs using the tabstop count given 557 func detab(w writer, r io.Reader, args []string) error { 558 tabstop, err := optionalInteger(args, defaultTabstop) 559 if err != nil { 560 return err 561 } 562 563 var buf []byte 564 return loopLines(r, func(i int, line []byte) error { 565 buf = buf[:0] 566 buf = expandTabs(buf, line, tabstop) 567 return writeln(w, buf) 568 }) 569 } 570 571 func dir(w writer, i int, line []byte) error { 572 w.WriteString(filepath.Dir(string(line))) 573 return endLine(w) 574 } 575 576 // div divides the 2 numbers given to it; if given just 1 number, it shows 577 // that number's reciprocal 578 func div(w writer, r io.Reader, args []string) error { 579 switch len(args) { 580 case 1: 581 x, err := strconv.ParseFloat(args[0], 64) 582 if err != nil { 583 return err 584 } 585 writeFloat(w, 1/x) 586 return endLine(w) 587 588 case 2: 589 x, err := strconv.ParseFloat(args[0], 64) 590 if err != nil { 591 return err 592 } 593 y, err := strconv.ParseFloat(args[1], 64) 594 if err != nil { 595 return err 596 } 597 598 if x > y { 599 x, y = y, x 600 } 601 602 writeFloat(w, x/y) 603 endLine(w) 604 writeFloat(w, y/x) 605 endLine(w) 606 writeFloat(w, 1-x/y) 607 return endLine(w) 608 609 default: 610 return wrongToolArgs{`expected 1 or 2 args`, ``} 611 } 612 } 613 614 // drop ignores all substring occurrences of all the substrings given, in the 615 // order given 616 func drop(w writer, r io.Reader, args []string) error { 617 var left []byte 618 avoid := make([][]byte, 0, len(args)) 619 for _, s := range args { 620 avoid = append(avoid, []byte(s)) 621 } 622 623 return loopLines(r, func(i int, line []byte) error { 624 left = left[:0] 625 left = append(left, line...) 626 l := left 627 628 for _, s := range avoid { 629 for { 630 i := bytes.Index(l, s) 631 if i < 0 { 632 break 633 } 634 635 copy(l[i:len(l)-len(s)], l[i+len(s):]) 636 l = l[:len(l)-len(s)] 637 } 638 } 639 640 return writeln(w, l) 641 }) 642 } 643 644 // end emits back all input lines, and then emits the few strings given as 645 // their own lines 646 func end(w writer, r io.Reader, args []string) error { 647 err := loopLines(r, func(i int, line []byte) error { 648 return writeln(w, line) 649 }) 650 651 if err != nil { 652 return err 653 } 654 655 for _, s := range args { 656 w.WriteString(s) 657 if err := endLine(w); err != nil { 658 return err 659 } 660 } 661 return nil 662 } 663 664 // endTSV emits back all input lines, and then emits a line of tab-separated 665 // values (TSV) 666 func endTSV(w writer, r io.Reader, args []string) error { 667 err := loopLines(r, func(i int, line []byte) error { 668 return writeln(w, line) 669 }) 670 671 if err != nil { 672 return err 673 } 674 675 for i, s := range args { 676 if i > 0 { 677 w.WriteByte('\t') 678 } 679 w.WriteString(s) 680 } 681 return endLine(w) 682 } 683 684 // folders recursively finds all files in all the folder names given 685 func files(w writer, r io.Reader, args []string) error { 686 return walk(args, func(path string, d fs.DirEntry, err error) error { 687 if err != nil || d.IsDir() { 688 return err 689 } 690 691 w.WriteString(path) 692 return endLine(w) 693 }) 694 } 695 696 // folders recursively finds all subfolders in all the folder names given 697 func folders(w writer, r io.Reader, args []string) error { 698 return walk(args, func(path string, d fs.DirEntry, err error) error { 699 if err != nil || !d.IsDir() { 700 return err 701 } 702 703 w.WriteString(path) 704 return endLine(w) 705 }) 706 } 707 708 // first limits input up to its first few lines 709 func first(w writer, r io.Reader, args []string) error { 710 max, err := optionalInteger(args, 1) 711 if err != nil { 712 return err 713 } 714 715 if max < 1 { 716 return nil 717 } 718 719 return loopLines(r, func(i int, line []byte) error { 720 if i >= max { 721 return noMoreOutput{} 722 } 723 return writeln(w, line) 724 }) 725 } 726 727 // gz gzips input bytes 728 func gz(w writer, r io.Reader) error { 729 enc := gzip.NewWriter(w) 730 defer enc.Flush() 731 if _, err := io.Copy(enc, r); err != nil { 732 return noMoreOutput{} 733 } 734 return nil 735 } 736 737 // hexify turns input bytes into a line of ASCII-valued hexadecimal pairs 738 func hexify(w writer, r io.Reader) error { 739 enc := hex.NewEncoder(w) 740 if _, err := io.Copy(enc, r); err != nil { 741 return noMoreOutput{} 742 } 743 return endLine(w) 744 } 745 746 // hold reads all input bytes, holding everything, and starts copying them 747 // all into the main output only after the last input byte was read 748 func hold(w writer, r io.Reader) error { 749 all, err := io.ReadAll(r) 750 if err != nil { 751 return err 752 } 753 w.Write(all) 754 return nil 755 } 756 757 // identity copies input to its main output verbatim 758 func identity(w writer, r io.Reader) error { 759 io.Copy(w, r) 760 return nil 761 } 762 763 // indent adds extra leading spaces to each non-empty input line 764 func indent(w writer, r io.Reader, args []string) error { 765 spaces, err := optionalInteger(args, 2) 766 if err != nil { 767 return err 768 } 769 770 return loopLines(r, func(i int, line []byte) error { 771 if len(line) == 0 { 772 return endLine(w) 773 } 774 writeSpaces(w, spaces) 775 return writeln(w, line) 776 }) 777 } 778 779 // items emits all field/word-like items from all input lines, each match 780 // shown on its own output line 781 func items(w writer, i int, line []byte) error { 782 if i := bytes.IndexByte(line, '\t'); i >= 0 { 783 return loopTSV(line, func(i int, s []byte) error { 784 return writeln(w, s) 785 }) 786 } 787 788 return loopItems(line, func(i int, s []byte) error { 789 return writeln(w, s) 790 }) 791 } 792 793 // join emits all input lines into a single output line, using the separator 794 // given to it between items 795 func join(w writer, r io.Reader, separator string) error { 796 empty := true 797 err := loopLines(r, func(i int, line []byte) error { 798 empty = false 799 if i > 0 { 800 w.WriteString(separator) 801 } 802 return write(w, line) 803 }) 804 805 if err != nil { 806 return err 807 } 808 809 if empty { 810 return nil 811 } 812 return endLine(w) 813 } 814 815 // jsonl turns valid JSON into JSON Lines, a variant of JSON often used for 816 // logging purposes, as it's very amenable to line-based streaming, and where 817 // each line is valid JSON on its own 818 func jsonl(w writer, r io.Reader) error { 819 dec := json.NewDecoder(r) 820 dec.UseNumber() 821 822 t, err := dec.Token() 823 if err == io.EOF { 824 return errInputEarlyEnd 825 } 826 if err != nil { 827 return err 828 } 829 830 if t == json.Delim('[') { 831 return jsonlTopArray(w, dec) 832 } 833 834 if err := jsonlToken(w, t, dec); err != nil { 835 return err 836 } 837 return endLine(w) 838 } 839 840 // jsonlTopArray handles top-level arrays for func jsonl 841 func jsonlTopArray(w writer, dec *json.Decoder) error { 842 for { 843 t, err := dec.Token() 844 if err != nil { 845 return err 846 } 847 848 if t == json.Delim(']') { 849 return nil 850 } 851 852 if err := jsonlToken(w, t, dec); err != nil { 853 return err 854 } 855 if err := endLine(w); err != nil { 856 return err 857 } 858 } 859 } 860 861 // jsonlArray handles non-top-level arrays for func jsonl 862 func jsonlArray(w writer, dec *json.Decoder) error { 863 w.WriteByte('[') 864 865 for i := 0; true; i++ { 866 t, err := dec.Token() 867 if err != nil { 868 return err 869 } 870 871 if t == json.Delim(']') { 872 w.WriteByte(']') 873 return nil 874 } 875 876 if i > 0 { 877 w.WriteByte(',') 878 } 879 if err := jsonlToken(w, t, dec); err != nil { 880 return err 881 } 882 } 883 884 return nil 885 } 886 887 // jsonlObject handles objects for func jsonl 888 func jsonlObject(w writer, dec *json.Decoder) error { 889 w.WriteByte('{') 890 891 for i := 0; true; i++ { 892 t, err := dec.Token() 893 if err != nil { 894 return err 895 } 896 897 if t == json.Delim('}') { 898 w.WriteByte('}') 899 return nil 900 } 901 902 s, ok := t.(string) 903 if !ok { 904 return errInvalidToken 905 } 906 907 if i > 0 { 908 w.WriteByte(',') 909 } 910 911 w.WriteByte('"') 912 writeInnerStringJSON(w, s) 913 w.WriteString(`":`) 914 915 t, err = dec.Token() 916 if err != nil { 917 return err 918 } 919 if err := jsonlToken(w, t, dec); err != nil { 920 return err 921 } 922 } 923 924 return nil 925 } 926 927 // jsonlToken handles values/recursion for func jsonl 928 func jsonlToken(w writer, t json.Token, dec *json.Decoder) error { 929 switch t := t.(type) { 930 case nil: 931 w.WriteString(`null`) 932 return nil 933 934 case bool: 935 if t { 936 w.WriteString(`true`) 937 } else { 938 w.WriteString(`false`) 939 } 940 return nil 941 942 case json.Number: 943 w.WriteString(t.String()) 944 return nil 945 946 case string: 947 w.WriteByte('"') 948 writeInnerStringJSON(w, t) 949 w.WriteByte('"') 950 return nil 951 952 case json.Delim: 953 switch t { 954 case json.Delim('['): 955 return jsonlArray(w, dec) 956 case json.Delim('{'): 957 return jsonlObject(w, dec) 958 default: 959 return errInvalidToken 960 } 961 962 default: 963 return errInvalidToken 964 } 965 } 966 967 // junk emits the number of pseudo-random bytes given 968 func junk(w writer, r io.Reader, n int) error { 969 var buf [bufferSize]byte 970 971 for n > 0 { 972 size := n 973 if size > bufferSize { 974 size = bufferSize 975 } 976 chunk := buf[:size] 977 978 got, err := rand.Read(chunk) 979 if err != nil { 980 return err 981 } 982 983 _, err = w.Write(chunk) 984 if err != nil { 985 return nil 986 } 987 988 n -= got 989 } 990 991 return nil 992 } 993 994 // last limits input up to its last few lines 995 func last(w writer, r io.Reader, args []string) error { 996 max, err := optionalInteger(args, 1) 997 if err != nil { 998 return err 999 } 1000 1001 if max < 1 { 1002 return nil 1003 } 1004 1005 if max == 1 { 1006 empty := true 1007 var last []byte 1008 1009 loopLines(r, func(i int, line []byte) error { 1010 empty = false 1011 last = line 1012 return nil 1013 }) 1014 1015 if empty { 1016 return nil 1017 } 1018 return writeln(w, last) 1019 } 1020 1021 index := 0 1022 var last [][]byte 1023 1024 loopLines(r, func(i int, line []byte) error { 1025 if len(last) < max { 1026 last = append(last, line) 1027 return nil 1028 } 1029 1030 last[index] = line 1031 index = (index + 1) % len(last) 1032 return nil 1033 }) 1034 1035 if len(last) == 0 { 1036 return nil 1037 } 1038 1039 for _, line := range last[index:] { 1040 if err := writeln(w, line); err != nil { 1041 return err 1042 } 1043 } 1044 for _, line := range last[:index] { 1045 if err := writeln(w, line); err != nil { 1046 return err 1047 } 1048 } 1049 return nil 1050 } 1051 1052 // leak helps debug pipes, by copying all input lines both to stderr, as well 1053 // as its main output 1054 func leak(w writer, r io.Reader, args []string) error { 1055 style, err := pickStyle(args, `plain`) 1056 if err != nil { 1057 return err 1058 } 1059 1060 if bytes.Equal(style, []byte("\x1b[0m")) { 1061 style = nil 1062 } 1063 1064 return loopLines(r, func(i int, line []byte) error { 1065 if style == nil { 1066 os.Stderr.Write(line) 1067 } else { 1068 os.Stderr.Write(style) 1069 os.Stderr.Write(line) 1070 os.Stderr.WriteString("\x1b[0m") 1071 } 1072 os.Stderr.Write([]byte{'\n'}) 1073 return writeln(w, line) 1074 }) 1075 } 1076 1077 // limit caps data to the max number of bytes given 1078 func limit(w writer, r io.Reader, maxBytes int) error { 1079 max := int64(maxBytes) 1080 if _, err := io.Copy(w, io.LimitReader(r, max)); err != nil { 1081 return noMoreOutput{} 1082 } 1083 return nil 1084 } 1085 1086 // lines ignored a leading UTF-8 BOM, if present, turns all CRLF byte-pairs 1087 // into single line-feed bytes, and ensures the final line always ends with 1088 // a line-feed; an empty (0 bytes) input results in an empty output 1089 func lines(w writer, i int, line []byte) error { 1090 return writeln(w, line) 1091 } 1092 1093 // lineup joins input lines via tabs, up to the number given: whenever that 1094 // number is exceeded, a new output line starts; when not given a number, or 1095 // when that number is 0 or negative, all input lines are tab-joined into a 1096 // single output line 1097 func lineup(w writer, r io.Reader, args []string) error { 1098 size, err := optionalInteger(args, 0) 1099 if err != nil { 1100 return err 1101 } 1102 1103 empty := true 1104 err = loopLines(r, func(i int, line []byte) error { 1105 empty = false 1106 if i == 0 { 1107 return write(w, line) 1108 } 1109 1110 if i%size == 0 && size > 0 { 1111 w.WriteByte('\n') 1112 } else { 1113 w.WriteByte('\t') 1114 } 1115 return write(w, line) 1116 }) 1117 1118 if err != nil { 1119 return err 1120 } 1121 1122 if empty { 1123 return nil 1124 } 1125 return endLine(w) 1126 } 1127 1128 const linksPattern = `https?://[A-Za-z0-9+_.:%-]+(/[A-Za-z0-9+_.%/,#?&=-]*)*` 1129 1130 var linksRegexp = regexp.MustCompile(linksPattern) 1131 1132 // links gets all hyperlink-type substrings from the input, each match shown 1133 // on its own line 1134 func links(w writer, i int, line []byte) error { 1135 for { 1136 loc := linksRegexp.FindIndex(line) 1137 if loc == nil { 1138 return nil 1139 } 1140 1141 w.Write(line[loc[0]:loc[1]]) 1142 if err := endLine(w); err != nil { 1143 return err 1144 } 1145 1146 line = line[loc[1]:] 1147 } 1148 } 1149 1150 // lower lowercases all symbols in all lines 1151 func lower(w writer, i int, line []byte) error { 1152 // for len(line) > 0 { 1153 // r, size := utf8.DecodeRune(line) 1154 // w.WriteRune(unicode.ToLower(r)) 1155 // line = line[size:] 1156 // } 1157 // return endline(w) 1158 1159 var buf [64]byte 1160 chunk := buf[:0] 1161 1162 for len(line) > 0 { 1163 r, size := utf8.DecodeRune(line) 1164 line = line[size:] 1165 1166 if cap(buf) < len(chunk)+size { 1167 w.Write(chunk) 1168 chunk = buf[:0] 1169 } 1170 1171 chunk = utf8.AppendRune(chunk, unicode.ToLower(r)) 1172 } 1173 1174 if len(chunk) > 0 { 1175 w.Write(chunk) 1176 } 1177 return endLine(w) 1178 } 1179 1180 // mimeDetect, when successful, shows the MIME-type auto-detected from the 1181 // first few input bytes 1182 func mimeDetect(w writer, r io.Reader) error { 1183 var buf [4 * 1024]byte 1184 n, err := r.Read(buf[:]) 1185 if err != nil && err != io.EOF { 1186 return err 1187 } 1188 1189 start := buf[:n] 1190 mime, ok := guessMIME(start) 1191 if !ok { 1192 if n < 24 { 1193 return errors.New(`too few bytes to autodetect the MIME-type`) 1194 } 1195 return errors.New(`can't autodetect the MIME-type`) 1196 } 1197 1198 w.WriteString(mime) 1199 return endLine(w) 1200 } 1201 1202 // n numbers lines using the optional starting counter given, which is 1 by 1203 // default; each output line starts with the current counter, followed by a 1204 // tab, ending with the original input line 1205 func n(w writer, r io.Reader, args []string) error { 1206 start, err := optionalInteger(args, 1) 1207 if err != nil { 1208 return err 1209 } 1210 1211 return loopLines(r, func(i int, line []byte) error { 1212 writeInt(w, start+i) 1213 w.WriteByte('\t') 1214 return writeln(w, line) 1215 }) 1216 } 1217 1218 // nj stands for `nice json`, and renders JSON data as ANSI-styled text 1219 func nj(w writer, r io.Reader) error { 1220 dec := json.NewDecoder(r) 1221 dec.UseNumber() 1222 1223 t, err := dec.Token() 1224 if err != nil { 1225 return err 1226 } 1227 1228 if err := niceJSON(w, dec, t, 0, 0); err != nil { 1229 return err 1230 } 1231 return endLine(w) 1232 } 1233 1234 // niceJSON handles recursion for func nj 1235 func niceJSON(w writer, r *json.Decoder, t json.Token, pre, level int) error { 1236 writeSpaces(w, pre) 1237 1238 switch t := t.(type) { 1239 case nil: 1240 w.WriteString("\x1b[38;5;248mnull\x1b[0m") 1241 return nil 1242 1243 case bool: 1244 if t { 1245 w.WriteString("\x1b[38;5;74mtrue\x1b[0m") 1246 } else { 1247 w.WriteString("\x1b[38;5;74mfalse\x1b[0m") 1248 } 1249 return nil 1250 1251 case json.Number: 1252 w.WriteString("\x1b[38;5;29m") 1253 w.WriteString(t.String()) 1254 w.WriteString("\x1b[0m") 1255 return nil 1256 1257 case string: 1258 w.WriteString("\x1b[38;5;248m\"\x1b[0m") 1259 // w.WriteString("\x1b[38;5;248m\"\x1b[38;5;24m") 1260 writeInnerStringJSON(w, t) 1261 w.WriteString("\x1b[38;5;248m\"\x1b[0m") 1262 return nil 1263 1264 case json.Delim: 1265 switch t { 1266 case json.Delim('['): 1267 return niceArrayJSON(w, r, level) 1268 case json.Delim('{'): 1269 return niceObjectJSON(w, r, level) 1270 default: 1271 return errors.New(`unsupported JSON delimiter`) 1272 } 1273 1274 default: 1275 return errors.New(`unsupported JSON token type`) 1276 } 1277 } 1278 1279 // writeInnerStringJSON helps the `nj` tool JSON-encode strings more quickly 1280 func writeInnerStringJSON(w writer, s string) { 1281 needsEscaping := false 1282 for _, r := range s { 1283 if '#' <= r && r <= '~' && r != '\\' { 1284 continue 1285 } 1286 if r == ' ' || r == '!' || unicode.IsLetter(r) { 1287 continue 1288 } 1289 1290 needsEscaping = true 1291 break 1292 } 1293 1294 if !needsEscaping { 1295 w.WriteString(s) 1296 return 1297 } 1298 1299 outer, err := json.Marshal(s) 1300 if err != nil { 1301 return 1302 } 1303 inner := outer[1 : len(outer)-1] 1304 w.Write(inner) 1305 } 1306 1307 // niceArrayJSON handles arrays for func niceJSON 1308 func niceArrayJSON(w writer, r *json.Decoder, level int) error { 1309 w.WriteString("\x1b[38;5;248m[\x1b[0m") 1310 1311 for i := 0; true; i++ { 1312 t, err := r.Token() 1313 if err != nil { 1314 return err 1315 } 1316 1317 if t == json.Delim(']') { 1318 if i == 0 { 1319 w.WriteString("\x1b[38;5;248m]\x1b[0m") 1320 return nil 1321 } 1322 1323 w.WriteString("\x1b[38;5;248m,\x1b[0m") 1324 w.WriteByte('\n') 1325 writeSpaces(w, level) 1326 w.WriteString("\x1b[38;5;248m]\x1b[0m") 1327 return nil 1328 } 1329 1330 if i > 0 { 1331 w.WriteString("\x1b[38;5;248m,\x1b[0m") 1332 } 1333 1334 if err := endLine(w); err != nil { 1335 return err 1336 } 1337 1338 if err := niceJSON(w, r, t, level+2, level+2); err != nil { 1339 return err 1340 } 1341 } 1342 1343 return nil 1344 } 1345 1346 // niceObjectJSON handles objects for func niceJSON 1347 func niceObjectJSON(w writer, r *json.Decoder, level int) error { 1348 w.WriteString("\x1b[38;5;248m{\x1b[0m") 1349 1350 for i := 0; true; i++ { 1351 t, err := r.Token() 1352 if err != nil { 1353 return err 1354 } 1355 1356 if t == json.Delim('}') { 1357 if i == 0 { 1358 w.WriteString("\x1b[38;5;248m]\x1b[0m") 1359 return nil 1360 } 1361 1362 w.WriteString("\x1b[38;5;248m,\x1b[0m") 1363 w.WriteByte('\n') 1364 writeSpaces(w, level) 1365 w.WriteString("\x1b[38;5;248m}\x1b[0m") 1366 return nil 1367 } 1368 1369 if i > 0 { 1370 w.WriteString("\x1b[38;5;248m,\x1b[0m") 1371 } 1372 1373 if err := endLine(w); err != nil { 1374 return err 1375 } 1376 1377 writeSpaces(w, level+2) 1378 key, ok := t.(string) 1379 if !ok { 1380 return errors.New(`object key isn't a string`) 1381 } 1382 w.WriteString("\x1b[38;5;248m\"\x1b[38;5;99m") 1383 writeInnerStringJSON(w, key) 1384 w.WriteString("\x1b[38;5;248m\":\x1b[0m ") 1385 1386 t, err = r.Token() 1387 if err != nil { 1388 return err 1389 } 1390 if err := niceJSON(w, r, t, 0, level+2); err != nil { 1391 return err 1392 } 1393 } 1394 1395 return nil 1396 } 1397 1398 func noEmpty(w writer, i int, line []byte) error { 1399 if len(line) == 0 { 1400 return nil 1401 } 1402 1403 w.Write(line) 1404 return endLine(w) 1405 } 1406 1407 // nothing reads nothing and writes nothing, and thus gets nothing done 1408 func nothing(w writer, r io.Reader) error { 1409 return nil 1410 } 1411 1412 // now shows the current date, time, month-name, and weekday-name 1413 func now(w writer, r io.Reader, args []string) error { 1414 var buf [72]byte 1415 s := time.Now().AppendFormat(buf[:0], `2006-01-02 15:04:05 Jan Mon`) 1416 return writeln(w, s) 1417 } 1418 1419 // numbers shows all detected numbers from all input lines, one match per 1420 // output line 1421 func numbers(w writer, i int, line []byte) error { 1422 handle := func(i int, s []byte) error { 1423 f, err := strconv.ParseFloat(string(s), 64) 1424 if err != nil || math.IsNaN(f) || math.IsInf(f, 0) { 1425 return nil 1426 } 1427 return writeln(w, s) 1428 } 1429 1430 if i := bytes.IndexByte(line, '\t'); i >= 0 { 1431 return loopTSV(line, handle) 1432 } 1433 return loopItems(line, handle) 1434 } 1435 1436 // plain ignores all ANSI-style sequences 1437 func plain(w writer, i int, line []byte) error { 1438 err := loopPlain(line, func(i int, s []byte) error { 1439 if _, err := w.Write(s); err != nil { 1440 return noMoreOutput{} 1441 } 1442 return nil 1443 }) 1444 1445 if err != nil { 1446 return err 1447 } 1448 return endLine(w) 1449 } 1450 1451 // primes shows the first few prime numbers 1452 func primes(w writer, r io.Reader, count int) error { 1453 if count > 0 { 1454 w.WriteString("2\n") 1455 count-- 1456 } 1457 1458 next: 1459 for n := uint64(3); count > 0; n += 2 { 1460 max := uint64(math.Sqrt(float64(n))) 1461 for div := uint64(3); div <= max; div++ { 1462 if n%div == 0 { 1463 continue next 1464 } 1465 } 1466 1467 // current value is a prime number 1468 count-- 1469 var buf [32]byte 1470 w.Write(strconv.AppendUint(buf[:0], n, 10)) 1471 if err := endLine(w); err != nil { 1472 return err 1473 } 1474 } 1475 1476 return nil 1477 } 1478 1479 func prun(w writer, r io.Reader) error { 1480 var commands []string 1481 1482 err := loopLines(r, func(i int, line []byte) error { 1483 line = bytes.TrimSpace(line) 1484 if len(line) > 0 { 1485 commands = append(commands, string(line)) 1486 } 1487 return nil 1488 }) 1489 1490 if err != nil { 1491 return err 1492 } 1493 1494 if len(commands) == 0 { 1495 return nil 1496 } 1497 1498 errors := 0 1499 var mut sync.Mutex 1500 var done sync.WaitGroup 1501 permissions := make(chan struct{}, runtime.NumCPU()) 1502 1503 for _, cmd := range commands { 1504 permissions <- struct{}{} 1505 done.Add(1) 1506 1507 go func(cmd string) { 1508 defer func() { 1509 done.Done() 1510 <-permissions 1511 }() 1512 1513 var out bytes.Buffer 1514 var err bytes.Buffer 1515 c := exec.Command(`sh`, `-c`, cmd) 1516 c.Stdin = bytes.NewReader([]byte{}) 1517 c.Stdout = &out 1518 c.Stderr = &err 1519 problem := c.Run() 1520 1521 mut.Lock() 1522 defer mut.Unlock() 1523 1524 if problem != nil { 1525 errors++ 1526 os.Stderr.WriteString("\x1b[31m") 1527 os.Stderr.WriteString(problem.Error()) 1528 os.Stderr.WriteString("\x1b[0m\n") 1529 } 1530 1531 b := err.Bytes() 1532 os.Stderr.Write(b) 1533 if len(b) > 0 && b[len(b)-1] != '\n' { 1534 os.Stderr.WriteString("\n") 1535 } 1536 1537 b = out.Bytes() 1538 os.Stdout.Write(b) 1539 if len(b) > 0 && b[len(b)-1] != '\n' { 1540 os.Stdout.WriteString("\n") 1541 } 1542 }(cmd) 1543 } 1544 1545 done.Wait() 1546 1547 if errors > 0 { 1548 return multipleErrors{} 1549 } 1550 return nil 1551 } 1552 1553 // rangeLines keeps only the input lines whose 1-based index is between the 1554 // 2 line-numbers given, inclusively 1555 func rangeLines(w writer, r io.Reader, args []string) error { 1556 switch len(args) { 1557 case 1: 1558 start, err := parseInteger(args[0]) 1559 if err != nil { 1560 return wrongToolArgs{err.Error(), ``} 1561 } 1562 skip := start - 1 1563 1564 return loopLines(r, func(i int, line []byte) error { 1565 if i < skip { 1566 return nil 1567 } 1568 return writeln(w, line) 1569 }) 1570 1571 case 2: 1572 start, err := parseInteger(args[0]) 1573 if err != nil { 1574 return wrongToolArgs{err.Error(), ``} 1575 } 1576 skip := start - 1 1577 1578 stop, err := parseInteger(args[1]) 1579 if err != nil { 1580 return wrongToolArgs{err.Error(), ``} 1581 } 1582 1583 if stop < start { 1584 return wrongToolArgs{`"start" number can't be more than "stop"`, ``} 1585 } 1586 1587 return loopLines(r, func(i int, line []byte) error { 1588 if i >= stop { 1589 return noMoreOutput{} 1590 } 1591 if i < skip { 1592 return nil 1593 } 1594 return writeln(w, line) 1595 }) 1596 1597 default: 1598 return wrongToolArgs{`expected either 1 or 2 integer-like numbers`, ``} 1599 } 1600 } 1601 1602 // realign realigns all items in all lines, according to each column's widest 1603 // item on any line 1604 func realign(w writer, r io.Reader) error { 1605 const gap = 2 1606 var lines [][]byte 1607 var widths []int 1608 1609 loopLines(r, func(i int, line []byte) error { 1610 s := append([]byte{}, line...) 1611 lines = append(lines, s) 1612 1613 loop := loopItems 1614 if i := bytes.IndexByte(line, '\t'); i >= 0 { 1615 loop = loopTSV 1616 } 1617 1618 return loop(line, func(i int, s []byte) error { 1619 if i >= len(widths) { 1620 widths = append(widths, 0) 1621 } 1622 if w := findWidth(s); widths[i] < w { 1623 widths[i] = w 1624 } 1625 return nil 1626 }) 1627 }) 1628 1629 for _, line := range lines { 1630 loop := loopItems 1631 if i := bytes.IndexByte(line, '\t'); i >= 0 { 1632 loop = loopTSV 1633 } 1634 1635 prevWidth := 0 1636 loop(line, func(i int, s []byte) error { 1637 if i > 0 { 1638 n := widths[i-1] - prevWidth 1639 if n < 0 { 1640 n = 0 1641 } 1642 writeSpaces(w, n+gap) 1643 } 1644 1645 w.Write(s) 1646 prevWidth = findWidth(s) 1647 return nil 1648 }) 1649 1650 if err := endLine(w); err != nil { 1651 return err 1652 } 1653 } 1654 1655 return nil 1656 } 1657 1658 // reproseState implements the behavior of func reprose, via its func 1659 // handleLine; it's a good idea to check if the counter is non-zero 1660 // after the last input line is processed, to emit a final line-feed 1661 // when that's the case 1662 type reproseState struct { 1663 // maxw is the maximum line-width as a rune-count 1664 maxw int 1665 1666 // n is the current output line's rune-count 1667 n int 1668 } 1669 1670 // handleLine lets you process a line of text, possibly emitting it over 1671 // multiple output lines 1672 func (rs *reproseState) handleLine(w writer, line []byte) error { 1673 line = bytes.TrimSpace(line) 1674 if len(line) == 0 { 1675 // emit empty(ish) lines as empty lines 1676 return endLine(w) 1677 } 1678 1679 for len(line) > 0 { 1680 i := bytes.IndexByte(line, ' ') 1681 if i < 0 { 1682 // no more spaces/words 1683 return rs.emitWord(w, line) 1684 } 1685 1686 err := rs.emitWord(w, line[:i]) 1687 if err != nil { 1688 return err 1689 } 1690 1691 // skip past the space right after the current word 1692 line = line[i+1:] 1693 1694 // ignore all leading spaces 1695 for len(line) > 0 && line[0] == ' ' { 1696 line = line[1:] 1697 } 1698 } 1699 1700 return nil 1701 } 1702 1703 // handleEnd ends the last line, once all input lines have been processed 1704 func (rs *reproseState) handleEnd(w writer) error { 1705 if rs.n > 0 { 1706 rs.n = 0 1707 return endLine(w) 1708 } 1709 return nil 1710 } 1711 1712 // tooMany checks if adding a word with the rune-count given would exceed 1713 // the target max-width for output lines 1714 func (rs reproseState) tooMany(runeCount int) bool { 1715 if rs.n > 0 { 1716 return rs.n+1+runeCount > rs.maxw 1717 } 1718 return rs.n+runeCount > rs.maxw 1719 } 1720 1721 // emitWord is called by func handleLine to emit strings with no spaces 1722 func (rs *reproseState) emitWord(w writer, s []byte) error { 1723 c := findWidth(s) 1724 // c := utf8.RuneCountInString(s) 1725 1726 // end current line if this word in it would exceed the max-rune count 1727 if rs.tooMany(c) { 1728 err := endLine(w) 1729 if err != nil { 1730 return err 1731 } 1732 rs.n = 0 1733 } 1734 1735 // precede all words with a space, except the first one of its line 1736 if rs.n > 0 { 1737 rs.n++ 1738 w.WriteString(` `) 1739 } 1740 1741 // put current word in its own line, if it exceeds the max-rune count 1742 // by itself 1743 if c > rs.maxw { 1744 rs.n = 0 1745 w.Write(s) 1746 return endLine(w) 1747 } 1748 1749 // word fits in the current line, so update the rune-counter 1750 rs.n += c 1751 w.Write(s) 1752 return nil 1753 } 1754 1755 // reprose reflows lines of plain-text prose, trying to emit lines not wider 1756 // than the rune-count given, even if that's not always possible, depending 1757 // on the input lines being processed; when not given a rune-count, the 1758 // default is 80 runes max per line 1759 func reprose(w writer, r io.Reader, args []string) error { 1760 width, err := optionalInteger(args, 80) 1761 if err != nil { 1762 return err 1763 } 1764 1765 var rs reproseState 1766 rs.maxw = width 1767 1768 err = loopLines(r, func(i int, line []byte) error { 1769 return rs.handleLine(w, line) 1770 }) 1771 1772 if err != nil { 1773 return err 1774 } 1775 1776 return rs.handleEnd(w) 1777 } 1778 1779 // restyle starts each input line with an ANSI-style appropriate for the 1780 // style/color-name given to it, then ends each line with an ANSI-style reset 1781 func restyle(w writer, r io.Reader, args []string) error { 1782 if len(args) == 0 { 1783 return wrongToolArgs{`no style name given`, ``} 1784 } 1785 1786 style, err := pickStyle(args, `plain`) 1787 if err != nil { 1788 return err 1789 } 1790 1791 if bytes.Equal(style, []byte("\x1b[0m")) { 1792 return loopLines(r, func(i int, line []byte) error { 1793 return plain(w, i, line) 1794 }) 1795 } 1796 1797 return loopLines(r, func(i int, line []byte) error { 1798 w.Write(style) 1799 w.Write(line) 1800 w.WriteString("\x1b[0m") 1801 return endLine(w) 1802 }) 1803 } 1804 1805 // reuse gives the same input bytes to all its tools in its `compose`-like 1806 // pipe/chain of commands: unlike the `compose` tool, steps in this tool's 1807 // chain are run in sequence, with no overlap 1808 func reuse(w writer, r io.Reader, args []string) error { 1809 if len(args) == 0 { 1810 return wrongToolArgs{`expected at least 1 argument`, ``} 1811 } 1812 1813 sep := args[0] 1814 cmds := splitSliceNonEmpty(args[1:], sep) 1815 1816 input, err := io.ReadAll(r) 1817 if err != nil { 1818 return err 1819 } 1820 1821 for _, cmd := range cmds { 1822 if err := run(w, bytes.NewReader(input), cmd); err != nil { 1823 return err 1824 } 1825 } 1826 return nil 1827 } 1828 1829 // sha1encode turns input bytes into a ASCII-hex SHA-1 checksum line 1830 func sha1encode(w writer, r io.Reader) error { 1831 return hashencode(w, r, sha1.New()) 1832 } 1833 1834 // sha256encode turns input bytes into a ASCII-hex SHA-256 checksum line 1835 func sha256encode(w writer, r io.Reader) error { 1836 return hashencode(w, r, sha256.New()) 1837 } 1838 1839 // sha512encode turns input bytes into a ASCII-hex SHA-512 checksum line 1840 func sha512encode(w writer, r io.Reader) error { 1841 return hashencode(w, r, sha512.New()) 1842 } 1843 1844 // hashencode is the common logic for several checksum-type tools 1845 func hashencode(w writer, r io.Reader, h hash.Hash) error { 1846 if _, err := io.Copy(h, r); err != nil { 1847 return noMoreOutput{} 1848 } 1849 enc := hex.NewEncoder(w) 1850 enc.Write(h.Sum(nil)) 1851 return endLine(w) 1852 } 1853 1854 // size counts input bytes 1855 func size(w writer, r io.Reader) error { 1856 n, err := io.Copy(io.Discard, r) 1857 if err != nil { 1858 return err 1859 } 1860 writeInt(w, int(n)) 1861 return endLine(w) 1862 } 1863 1864 // skip ignores up to the first given number of input lines 1865 func skip(w writer, r io.Reader, args []string) error { 1866 skip, err := optionalInteger(args, 1) 1867 if err != nil { 1868 return err 1869 } 1870 1871 return loopLines(r, func(i int, line []byte) error { 1872 if i < skip { 1873 return nil 1874 } 1875 return writeln(w, line) 1876 }) 1877 } 1878 1879 // skipLast ignores the last few lines 1880 func skipLast(w writer, r io.Reader, args []string) error { 1881 max, err := optionalInteger(args, 1) 1882 if err != nil { 1883 return err 1884 } 1885 1886 if max < 1 { 1887 return loopLines(r, func(i int, line []byte) error { 1888 return writeln(w, line) 1889 }) 1890 } 1891 1892 index := 0 1893 var last [][]byte 1894 1895 return loopLines(r, func(i int, line []byte) error { 1896 if len(last) < max { 1897 last = append(last, line) 1898 return nil 1899 } 1900 1901 if err := writeln(w, last[index]); err != nil { 1902 return err 1903 } 1904 1905 last[index] = line 1906 index = (index + 1) % len(last) 1907 return nil 1908 }) 1909 } 1910 1911 func soak(w writer, r io.Reader) error { 1912 var all []byte 1913 var chunk [bufferSize]byte 1914 1915 for { 1916 got, err := r.Read(chunk[:]) 1917 if err == io.EOF { 1918 all = append(all, chunk[:got]...) 1919 break 1920 } 1921 1922 if err != nil { 1923 return err 1924 } 1925 1926 all = append(all, chunk[:got]...) 1927 } 1928 1929 w.Write(all) 1930 return nil 1931 } 1932 1933 func split(w writer, r io.Reader, args []string) error { 1934 if len(args) == 0 { 1935 return loopLines(r, func(i int, line []byte) error { 1936 return items(w, i, line) 1937 }) 1938 } 1939 1940 seps := make([][]byte, 0, len(args)) 1941 for _, a := range args { 1942 seps = append(seps, []byte(a)) 1943 } 1944 1945 return loopLines(r, func(i int, line []byte) error { 1946 for len(line) > 0 { 1947 i, j := indexAny(line, seps) 1948 1949 if i < 0 { 1950 if len(line) == 0 { 1951 return nil 1952 } 1953 w.Write(line) 1954 return endLine(w) 1955 } 1956 1957 if i < j { 1958 w.Write(line[:i]) 1959 if err := endLine(w); err != nil { 1960 return err 1961 } 1962 } 1963 1964 line = line[j:] 1965 } 1966 1967 return nil 1968 }) 1969 } 1970 1971 func splitAny(w writer, r io.Reader, seps string) error { 1972 return split(w, r, strings.Split(seps, ``)) 1973 } 1974 1975 // squeeze aggressively trims input lines, even turning runs of multiple 1976 // spaces into single spaces 1977 func squeeze(w writer, i int, line []byte) error { 1978 for len(line) > 0 { 1979 line = bytes.TrimLeftFunc(line, func(r rune) bool { 1980 return r == ' ' 1981 }) 1982 1983 i := bytes.IndexAny(line, " \t") 1984 if i < 0 { 1985 break 1986 } 1987 1988 w.WriteByte(line[i]) 1989 if err := write(w, line[:i]); err != nil { 1990 return err 1991 } 1992 1993 line = line[i+1:] 1994 } 1995 1996 // handle the last item in its line 1997 return writeln(w, line) 1998 } 1999 2000 // stomp ignores leading/trailing empty lines, and turns runs of empty lines 2001 // into single empty lines 2002 func stomp(w writer, r io.Reader) error { 2003 nlines := 0 2004 empty := false 2005 2006 return loopLines(r, func(i int, line []byte) error { 2007 if len(line) == 0 { 2008 empty = true 2009 return nil 2010 } 2011 2012 if empty && nlines > 0 { 2013 w.WriteByte('\n') 2014 } 2015 empty = false 2016 2017 nlines++ 2018 return writeln(w, line) 2019 }) 2020 } 2021 2022 // stringsTool detects all ASCII-type byte sequences from the input bytes, 2023 // whether those were intended as ASCII or not 2024 func stringsTool(w writer, r io.Reader) error { 2025 ascii := false 2026 var buf [bufferSize]byte 2027 2028 for { 2029 n, err := r.Read(buf[:]) 2030 if n < 1 { 2031 if err == io.EOF { 2032 err = nil 2033 } 2034 if ascii { 2035 if err == nil { 2036 return endLine(w) 2037 } 2038 endLine(w) 2039 } 2040 return err 2041 } 2042 2043 for _, b := range buf[:n] { 2044 if isSymbolASCII[b] { 2045 ascii = true 2046 w.WriteByte(b) 2047 continue 2048 } 2049 2050 if ascii { 2051 ascii = false 2052 if b == '\n' { 2053 continue 2054 } 2055 if err := endLine(w); err != nil { 2056 return err 2057 } 2058 } 2059 } 2060 } 2061 } 2062 2063 // tally keeps counts of each distinct input-line value, emitting the result 2064 // at the end as TSV lines sorted in reverse order, commonest lines first 2065 func tally(w writer, r io.Reader) error { 2066 tally := make(map[string]int) 2067 loopLines(r, func(i int, line []byte) error { 2068 s := string(line) 2069 tally[s] += 1 2070 return nil 2071 }) 2072 2073 keys := make([]string, 0, len(tally)) 2074 for k := range tally { 2075 keys = append(keys, k) 2076 } 2077 sort.SliceStable(keys, func(i, j int) bool { 2078 return tally[keys[j]] < tally[keys[i]] 2079 }) 2080 2081 w.WriteString("tally\tvalue\n") 2082 for _, k := range keys { 2083 writeInt(w, tally[k]) 2084 w.WriteByte('\t') 2085 w.WriteString(k) 2086 if err := endLine(w); err != nil { 2087 return err 2088 } 2089 } 2090 return nil 2091 } 2092 2093 // teletype simulates the cadence of old teletype machines 2094 func teletype(w writer, i int, line []byte) error { 2095 defer w.Flush() 2096 2097 // make runs of empty lines go by very quickly 2098 if len(line) == 0 { 2099 return endLine(w) 2100 } 2101 2102 // type symbols quickly 2103 for len(line) > 0 { 2104 r, size := utf8.DecodeRune(line) 2105 line = line[size:] 2106 time.Sleep(15 * time.Millisecond) 2107 w.WriteRune(r) 2108 w.Flush() 2109 } 2110 2111 // line-feeds from non-empty lines hang on for a bit, for suspense 2112 time.Sleep(500 * time.Millisecond) 2113 return endLine(w) 2114 } 2115 2116 // today shows a line with the current date, including the names of both the 2117 // current month and the current weekday 2118 func today(w writer, r io.Reader, args []string) error { 2119 var buf [32]byte 2120 s := time.Now().AppendFormat(buf[:0], `2006-01-02 Jan Mon`) 2121 return writeln(w, s) 2122 } 2123 2124 // title makes the first symbol in each line uppercase, lowercasing the rest 2125 func title(w writer, i int, line []byte) error { 2126 if len(line) == 0 { 2127 return endLine(w) 2128 } 2129 2130 // uppercase leading symbol, and lowercase all later ones 2131 r, size := utf8.DecodeRune(line) 2132 w.WriteRune(unicode.ToUpper(r)) 2133 return lower(w, i, line[size:]) 2134 } 2135 2136 // topfiles finds all top/surface-level files in all the folder names given 2137 func topfiles(w writer, r io.Reader, args []string) error { 2138 return walktop(args, func(e fs.DirEntry) error { 2139 if e.IsDir() { 2140 return nil 2141 } 2142 w.WriteString(e.Name()) 2143 return endLine(w) 2144 }) 2145 } 2146 2147 // topfiles finds all top/surface-level folders in all the folder names given 2148 func topfolders(w writer, r io.Reader, args []string) error { 2149 return walktop(args, func(e fs.DirEntry) error { 2150 if !e.IsDir() { 2151 return nil 2152 } 2153 w.WriteString(e.Name()) 2154 return endLine(w) 2155 }) 2156 } 2157 2158 // trim ignores leading/trailing whitespace-type symbols in all lines 2159 func trim(w writer, i int, line []byte) error { 2160 return writeln(w, bytes.TrimSpace(line)) 2161 } 2162 2163 // trim ignores trailing whitespace-type symbols in all lines 2164 func trimend(w writer, i int, line []byte) error { 2165 return writeln(w, bytes.TrimRightFunc(line, unicode.IsSpace)) 2166 } 2167 2168 // tsv emits all tab-separated values from all input lines, each item shown 2169 // on its own output line 2170 func tsv(w writer, i int, line []byte) error { 2171 return loopTSV(line, func(i int, s []byte) error { 2172 return writeln(w, s) 2173 }) 2174 } 2175 2176 // unique avoids emitting the same input line more than once 2177 func unique(w writer, r io.Reader) error { 2178 got := make(map[string]struct{}) 2179 2180 return loopLines(r, func(i int, line []byte) error { 2181 s := string(line) 2182 if _, ok := got[s]; ok { 2183 return nil 2184 } 2185 2186 got[s] = struct{}{} 2187 return writeln(w, line) 2188 }) 2189 } 2190 2191 // urify URI-encodes each input line 2192 func urify(w writer, i int, line []byte) error { 2193 // s := url.PathEscape(string(line)) 2194 // w.WriteString(s) 2195 // return endLine(w) 2196 2197 for len(line) > 0 { 2198 r, size := utf8.DecodeRune(line) 2199 line = line[size:] 2200 2201 if r < 128 && uriUnescapedASCII[r] { 2202 w.WriteByte(byte(r)) 2203 continue 2204 } 2205 2206 const hex = `0123456789ABCDEF` 2207 const l = byte(len(hex)) 2208 w.WriteByte('%') 2209 w.WriteByte(hex[byte(r)/l]) 2210 w.WriteByte(hex[byte(r)%l]) 2211 } 2212 2213 return endLine(w) 2214 } 2215 2216 // wait waits the given number of seconds, before running the tool given 2217 func wait(w writer, r io.Reader, args []string) error { 2218 sec, rest, err := requireLeadingNumber(args) 2219 if err != nil { 2220 return err 2221 } 2222 2223 dt := time.Duration(sec * float64(time.Second)) 2224 time.Sleep(dt) 2225 2226 if len(rest) == 0 { 2227 return nil 2228 } 2229 return run(w, r, rest) 2230 } File: box/utf8.go 1 package main 2 3 import ( 4 "bufio" 5 "io" 6 "unicode/utf16" 7 ) 8 9 // readBytePairBE gets you a pair of bytes in big-endian (original) order 10 func readBytePairBE(br *bufio.Reader) (byte, byte, error) { 11 a, err := br.ReadByte() 12 if err != nil { 13 return a, 0, err 14 } 15 b, err := br.ReadByte() 16 return a, b, err 17 } 18 19 // readBytePairLE gets you a pair of bytes in little-endian order 20 func readBytePairLE(br *bufio.Reader) (byte, byte, error) { 21 a, b, err := readBytePairBE(br) 22 return b, a, err 23 } 24 25 // utf8Tool turns UTF-16 bytes (both kinds) and BOMed UTF-8 bytes into 26 // proper UTF-8 bytes: this is one of the few text-related tools which 27 // keeps CRLF sequences verbatim 28 func utf8Tool(w writer, r io.Reader) error { 29 br := bufio.NewReader(r) 30 31 a, err := br.ReadByte() 32 if err == io.EOF { 33 return nil 34 } 35 if err != nil { 36 return err 37 } 38 39 b, err := br.ReadByte() 40 if err == io.EOF { 41 w.WriteByte(a) 42 return nil 43 } 44 if err != nil { 45 return err 46 } 47 48 // handle potential leading UTF-8 BOM 49 if a == 0xEF && b == 0xBB { 50 c, err := br.ReadByte() 51 if err == io.EOF { 52 w.WriteByte(a) 53 w.WriteByte(b) 54 return nil 55 } 56 57 if err != nil { 58 return err 59 } 60 61 if c != 0xBF { 62 w.WriteByte(a) 63 w.WriteByte(b) 64 w.WriteByte(c) 65 } 66 67 if _, err := io.Copy(w, br); err != nil { 68 return noMoreOutput{} 69 } 70 } 71 72 // handle leading UTF-16 big-endian BOM 73 if a == 0xFE && b == 0xFF { 74 return deUTF16(w, br, readBytePairBE) 75 } 76 77 // handle leading UTF-16 little-endian BOM 78 if a == 0xFF && b == 0xFE { 79 return deUTF16(w, br, readBytePairLE) 80 } 81 82 // handle lack of leading UTF-16 BOM 83 sym := rune(256*int(b) + int(a)) 84 85 if utf16.IsSurrogate(sym) { 86 a, b, err := readBytePairLE(br) 87 if err == io.EOF { 88 return nil 89 } 90 if err != nil { 91 return err 92 } 93 94 next := rune(256*int(a) + int(b)) 95 sym = utf16.DecodeRune(sym, next) 96 } 97 98 w.WriteRune(sym) 99 return deUTF16(w, br, readBytePairLE) 100 } 101 102 // readPairFunc narrows source-code lines for func deUTF16 103 type readPairFunc func(*bufio.Reader) (byte, byte, error) 104 105 // deUTF16 is used by func utf8Tool 106 func deUTF16(w writer, br *bufio.Reader, readPair readPairFunc) error { 107 for { 108 a, b, err := readPair(br) 109 if err == io.EOF { 110 return nil 111 } 112 if err != nil { 113 return err 114 } 115 116 r := rune(256*int(a) + int(b)) 117 if utf16.IsSurrogate(r) { 118 a, b, err := readPair(br) 119 if err == io.EOF { 120 return nil 121 } 122 if err != nil { 123 return err 124 } 125 126 next := rune(256*int(a) + int(b)) 127 r = utf16.DecodeRune(r, next) 128 } 129 130 if _, err := w.WriteRune(r); err != nil { 131 return noMoreOutput{} 132 } 133 } 134 } File: box/wave.go 1 package main 2 3 import ( 4 "encoding/binary" 5 "errors" 6 "io" 7 "math" 8 "strconv" 9 ) 10 11 // aiff header format 12 // 13 // http://paulbourke.net/dataformats/audio/ 14 // 15 // wav header format 16 // 17 // http://soundfile.sapp.org/doc/WaveFormat/ 18 // http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html 19 // https://docs.fileformat.com/audio/wav/ 20 21 const ( 22 // maxInt helps convert float64 values into int16 ones 23 maxInt = 1<<15 - 1 24 25 // wavIntPCM declares integer PCM sound-data in a wav header 26 wavIntPCM = 1 27 28 // wavFloatPCM declares floating-point PCM sound-data in a wav header 29 wavFloatPCM = 3 30 ) 31 32 type sampleFormat byte 33 34 const ( 35 int16BE sampleFormat = 1 36 int16LE sampleFormat = 2 37 float32BE sampleFormat = 3 38 float32LE sampleFormat = 4 39 ) 40 41 // emitInt16LE writes a 16-bit signed integer in little-endian byte order 42 func emitInt16LE(w io.Writer, f float64) (n int, err error) { 43 // binary.Write(w, binary.LittleEndian, int16(maxInt*f)) 44 var buf [2]byte 45 binary.LittleEndian.PutUint16(buf[:2], uint16(int16(maxInt*f))) 46 return w.Write(buf[:2]) 47 } 48 49 // emitFloat32LE writes a 32-bit float in little-endian byte order 50 // func emitFloat32LE(w io.Writer, f float64) { 51 // var buf [4]byte 52 // binary.LittleEndian.PutUint32(buf[:4], math.Float32bits(float32(f))) 53 // w.Write(buf[:4]) 54 // } 55 56 // wavSettings is an item in the type2wavSettings table 57 type wavSettings struct { 58 Type byte 59 BitsPerSample byte 60 } 61 62 // type2wavSettings encodes values used when emitting wav headers 63 var type2wavSettings = map[sampleFormat]wavSettings{ 64 int16LE: {wavIntPCM, 16}, 65 float32LE: {wavFloatPCM, 32}, 66 } 67 68 // waveOutputSettings are format-specific settings which are controlled by the 69 // output-format option on the cmd-line 70 type waveOutputSettings struct { 71 Samples sampleFormat 72 73 // MaxTime is the play duration of the resulting sound 74 MaxTime float64 75 76 // SampleRate is the number of samples per second for all channels 77 SampleRate uint32 78 79 // NumChannels is the number of output channels, either 1 or 2 80 NumChannels byte 81 } 82 83 // emitWaveHeader writes the start of a valid .wav file: since it also starts 84 // the wav data section and emits its size, you only need to write all samples 85 // after calling this func 86 func emitWaveHeader(w io.Writer, cfg waveOutputSettings) error { 87 const fmtChunkSize = 16 88 duration := cfg.MaxTime 89 numchan := uint32(cfg.NumChannels) 90 sampleRate := cfg.SampleRate 91 92 ws, ok := type2wavSettings[cfg.Samples] 93 if !ok { 94 const pre = `internal error: invalid output-format code ` 95 return errors.New(pre + strconv.Itoa(int(cfg.Samples))) 96 } 97 kind := uint16(ws.Type) 98 bps := uint32(ws.BitsPerSample) 99 100 // byte rate 101 br := sampleRate * bps * numchan / 8 102 // data size in bytes 103 dataSize := uint32(float64(br) * duration) 104 // total file size 105 totalSize := uint32(dataSize + 44) 106 107 // general descriptor 108 w.Write([]byte(`RIFF`)) 109 binary.Write(w, binary.LittleEndian, uint32(totalSize)) 110 w.Write([]byte(`WAVE`)) 111 112 // fmt chunk 113 w.Write([]byte(`fmt `)) 114 binary.Write(w, binary.LittleEndian, uint32(fmtChunkSize)) 115 binary.Write(w, binary.LittleEndian, uint16(kind)) 116 binary.Write(w, binary.LittleEndian, uint16(numchan)) 117 binary.Write(w, binary.LittleEndian, uint32(sampleRate)) 118 binary.Write(w, binary.LittleEndian, uint32(br)) 119 binary.Write(w, binary.LittleEndian, uint16(bps*numchan/8)) 120 binary.Write(w, binary.LittleEndian, uint16(bps)) 121 122 // start data chunk 123 w.Write([]byte(`data`)) 124 binary.Write(w, binary.LittleEndian, uint32(dataSize)) 125 return nil 126 } 127 128 func tone(w writer, r io.Reader, args []string) error { 129 switch len(args) { 130 case 0: 131 return emitTone(w, 2.0, 440) 132 133 case 1: 134 x, err := strconv.ParseFloat(args[0], 64) 135 if err != nil { 136 return err 137 } 138 return emitTone(w, x, 440) 139 140 case 2: 141 x, err := strconv.ParseFloat(args[0], 64) 142 if err != nil { 143 return err 144 } 145 y, err := strconv.ParseFloat(args[1], 64) 146 if err != nil { 147 return err 148 } 149 return emitTone(w, x, y) 150 151 default: 152 return wrongToolArgs{`expected no more than 2 args`, ``} 153 } 154 } 155 156 func emitTone(w writer, seconds float64, frequency float64) error { 157 const rate = 48_000 158 const tau = 2 * math.Pi 159 160 badSec := math.IsNaN(seconds) || math.IsInf(seconds, 0) 161 badFreq := math.IsNaN(frequency) || math.IsInf(frequency, 0) 162 if badSec || badFreq { 163 return wrongToolArgs{`invalid numbers`, ``} 164 } 165 166 emitWaveHeader(w, waveOutputSettings{ 167 Samples: int16LE, 168 MaxTime: math.Max(seconds, 0.0), 169 SampleRate: rate, 170 NumChannels: 1, 171 }) 172 173 if seconds < 0 { 174 return nil 175 } 176 177 last := int(seconds * rate) 178 dt := 1.0 / rate 179 for i := 0; i < last; i++ { 180 t := float64(i) * dt 181 _, err := emitInt16LE(w.Writer, math.Sin(frequency*tau*t)) 182 if err != nil { 183 return noMoreOutput{} 184 } 185 } 186 return nil 187 }