File: easybox.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath easybox.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "bytes" 37 "encoding/base64" 38 "encoding/binary" 39 "encoding/json" 40 "errors" 41 "fmt" 42 "io" 43 "math" 44 "os" 45 "regexp" 46 "sort" 47 "strconv" 48 "strings" 49 "unicode" 50 "unicode/utf16" 51 "unicode/utf8" 52 ) 53 54 const easyboxInfo = ` 55 easybox [tool...] [options...] [arguments...] 56 57 This is a busybox-style command-line app, with several simple (easy) tools 58 in it. Running this tool without a tool name shows this help message, along 59 with all tool names and aliases available. 60 61 All (optional) leading options start with either single or double-dash: 62 63 -h, -help show this help message 64 -list show all tools available 65 ` 66 67 var mains = map[string]func(){ 68 `avoid`: avoidMain, 69 `bytedump`: bytedumpMain, 70 `catl`: catlMain, 71 `coma`: comaMain, 72 `datauri`: datauriMain, 73 `debase64`: debase64Main, 74 `dedup`: dedupMain, 75 `dejsonl`: dejsonlMain, 76 `dessv`: dessvMain, 77 `erase`: eraseMain, 78 `fixlines`: fixlinesMain, 79 `hima`: himaMain, 80 `json0`: json0Main, 81 `json2`: json2Main, 82 `jsonl`: jsonlMain, 83 `jsons`: jsonsMain, 84 `match`: matchMain, 85 `ncol`: ncolMain, 86 `njson`: njsonMain, 87 `nn`: nnMain, 88 `plain`: plainMain, 89 `primes`: primesMain, 90 `realign`: realignMain, 91 `squeeze`: squeezeMain, 92 `tcatl`: tcatlMain, 93 `utfate`: utfateMain, 94 } 95 96 var toolAliases = map[string]string{ 97 `deduplicate`: `dedup`, 98 `detrail`: `fixlines`, 99 `entab`: `dessv`, 100 `entabulate`: `dessv`, 101 `j0`: `json0`, 102 `j2`: `json2`, 103 `jl`: `jsonl`, 104 `jsonlines`: `jsonl`, 105 `ndjson`: `jsonl`, 106 `nicej`: `njson`, 107 `nicejson`: `njson`, 108 `nicenum`: `nn`, 109 `nicenums`: `nn`, 110 `nj`: `njson`, 111 `nicenumbers`: `nn`, 112 `unique`: `dedup`, 113 `utf8`: `utfate`, 114 } 115 116 // errNoMoreOutput is a dummy error whose message is ignored, and which 117 // causes the app to quit immediately and successfully 118 var errNoMoreOutput = errors.New(`no more output`) 119 120 const ( 121 gb = 1024 * 1024 * 1024 122 123 bufSize = 32 * 1024 124 maxLineBufSize = 8 * gb 125 126 stdinDisplayName = `<stdin>` 127 ) 128 129 func main() { 130 if len(os.Args) > 1 { 131 switch os.Args[1] { 132 case `-h`, `--h`, `-help`, `--help`: 133 os.Stdout.WriteString(easyboxInfo[1:]) 134 return 135 136 case `-list`, `--list`: 137 easyboxList(os.Stdout) 138 return 139 } 140 } 141 142 // skip past all folder names, if present 143 tool := os.Args[0] 144 if i := strings.LastIndexByte(tool, '/'); i >= 0 { 145 tool = tool[i+1:] 146 } 147 148 // if not called from a link, make the tool the first cmd-line argument 149 switch tool { 150 case `easybox`, `eb`, `ebox`: 151 if len(os.Args) == 1 { 152 easyboxHelp(os.Stderr) 153 os.Exit(1) 154 } 155 156 os.Args = os.Args[1:] 157 158 // skip past all folder names, if present 159 tool = os.Args[0] 160 if i := strings.LastIndexByte(tool, '/'); i >= 0 { 161 tool = tool[i+1:] 162 } 163 } 164 165 // ignore all dashes and/or underscores in tool names 166 tool = strings.Replace(tool, `-`, ``, -1) 167 tool = strings.Replace(tool, `_`, ``, -1) 168 169 if tool == `help` { 170 easyboxHelp(os.Stdout) 171 return 172 } 173 174 // dealias tool name 175 if name, ok := toolAliases[tool]; ok { 176 tool = name 177 } 178 179 main, ok := mains[tool] 180 if !ok { 181 os.Stderr.WriteString(`easybox: tool named '`) 182 os.Stderr.WriteString(tool) 183 os.Stderr.WriteString("' not found\n") 184 os.Stderr.WriteString("hint: try using the 'help' tool\n") 185 os.Exit(1) 186 } 187 188 main() 189 } 190 191 func easyboxHelp(w io.Writer) { 192 n := len(mains) 193 if n < len(toolAliases) { 194 n = len(toolAliases) 195 } 196 197 sortedKeys := make([]string, 0, n) 198 for k := range mains { 199 sortedKeys = append(sortedKeys, k) 200 } 201 sort.Strings(sortedKeys) 202 203 io.WriteString(w, easyboxInfo[1:]) 204 205 io.WriteString(w, "\nTools Available\n\n") 206 for _, k := range sortedKeys { 207 io.WriteString(w, ` `) 208 io.WriteString(w, k) 209 io.WriteString(w, "\n") 210 } 211 212 n = 0 213 sortedKeys = sortedKeys[:0] 214 for k := range toolAliases { 215 if n < len(k) { 216 n = len(k) 217 } 218 sortedKeys = append(sortedKeys, k) 219 } 220 sort.Strings(sortedKeys) 221 222 io.WriteString(w, "\nAliases Available\n\n") 223 for _, k := range sortedKeys { 224 fmt.Fprintf(w, " %-*s -> %s\n", n, k, toolAliases[k]) 225 } 226 } 227 228 func easyboxList(w io.Writer) { 229 sortedKeys := make([]string, 0, len(mains)) 230 for k := range mains { 231 sortedKeys = append(sortedKeys, k) 232 } 233 sort.Strings(sortedKeys) 234 235 for _, k := range sortedKeys { 236 io.WriteString(w, k) 237 io.WriteString(w, "\n") 238 } 239 } 240 241 type easyboxRunner func(bw *bufio.Writer, r io.Reader, name string) error 242 243 func easyboxRun(args []string, run easyboxRunner) { 244 dashes := 0 245 for _, name := range args { 246 if name == `-` { 247 dashes++ 248 } 249 250 // only need to tell whether more than 1 dash was given 251 if dashes > 1 { 252 break 253 } 254 } 255 256 bw := bufio.NewWriterSize(os.Stdout, bufSize) 257 defer bw.Flush() 258 259 if len(args) == 0 { 260 if err := run(bw, os.Stdin, stdinDisplayName); err != nil { 261 handleError(bw, err) 262 } 263 return 264 } 265 266 gotStdin := false 267 multipleDashes := dashes > 1 268 269 var stdin []byte 270 271 for _, name := range args { 272 // allow re-reading stdin more than once 273 if name == `-` { 274 var r io.Reader = os.Stdin 275 if multipleDashes && !gotStdin { 276 gotStdin = true 277 stdin, _ = io.ReadAll(r) 278 } 279 if gotStdin { 280 r = bytes.NewReader(stdin) 281 } 282 283 if err := run(bw, r, stdinDisplayName); err != nil { 284 handleError(bw, err) 285 return 286 } 287 continue 288 } 289 290 if err := easyboxHandleFile(bw, name, run); err != nil { 291 handleError(bw, err) 292 return 293 } 294 } 295 } 296 297 func easyboxHandleFile(w *bufio.Writer, name string, run easyboxRunner) error { 298 if name == `` || name == `-` { 299 return run(w, os.Stdin, stdinDisplayName) 300 } 301 302 f, err := os.Open(name) 303 if err != nil { 304 return errors.New(`can't read from file named "` + name + `"`) 305 } 306 defer f.Close() 307 308 return run(w, f, name) 309 } 310 311 // countDecimals counts decimal digits from the string given, assuming it 312 // represents a valid/useable float64, when parsed 313 func countDecimals(s string) int { 314 dot := strings.IndexByte(s, '.') 315 if dot < 0 { 316 return 0 317 } 318 319 decs := 0 320 s = s[dot+1:] 321 322 for len(s) > 0 { 323 s = skipLeadingEscapeSequences(s) 324 if len(s) == 0 { 325 break 326 } 327 if '0' <= s[0] && s[0] <= '9' { 328 decs++ 329 } 330 s = s[1:] 331 } 332 333 return decs 334 } 335 336 // countDotDecimals is like func countDecimals, but this one also includes 337 // the dot, when any decimals are present, else the count stays at 0 338 func countDotDecimals(s string) int { 339 decs := countDecimals(s) 340 if decs > 0 { 341 return decs + 1 342 } 343 return decs 344 } 345 346 func countWidth(s string) int { 347 width := 0 348 349 for len(s) > 0 { 350 i := indexStartANSI(s) 351 if i < 0 { 352 width += utf8.RuneCountInString(s) 353 return width 354 } 355 356 width += utf8.RuneCountInString(s[:i]) 357 358 for len(s) > 0 { 359 upper := s[0] &^ 32 360 s = s[1:] 361 if 'A' <= upper && upper <= 'Z' { 362 break 363 } 364 } 365 } 366 367 return width 368 } 369 370 func handleError(w *bufio.Writer, err error) { 371 if err == nil { 372 return 373 } 374 375 if err == errNoMoreOutput { 376 // os.Exit(141) 377 return 378 } 379 380 if w != nil { 381 w.Flush() 382 } 383 384 os.Stderr.WriteString(err.Error()) 385 os.Stderr.WriteString("\n") 386 os.Exit(1) 387 } 388 389 // hasPrefixByte is a simpler, single-byte version of bytes.HasPrefix 390 func hasPrefixByte(b []byte, prefix byte) bool { 391 return len(b) > 0 && b[0] == prefix 392 } 393 394 // hasPrefixFold is a case-insensitive bytes.HasPrefix 395 func hasPrefixFold(s []byte, prefix []byte) bool { 396 n := len(prefix) 397 return len(s) >= n && bytes.EqualFold(s[:n], prefix) 398 } 399 400 // hasPrefixBOM checks if a byte-slice starts with a UTF-8 BOM sequence 401 func hasPrefixBOM(s []byte) bool { 402 return len(s) > 2 && s[0] == 0xef && s[1] == 0xbb && s[2] == 0xbf 403 } 404 405 // indexDigit finds the index of the first digit in a string, or -1 when the 406 // string has no decimal digits 407 func indexDigit(s string) int { 408 for i := 0; i < len(s); i++ { 409 switch s[i] { 410 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 411 return i 412 } 413 } 414 415 // empty slice, or a slice without any digits 416 return -1 417 } 418 419 // indexNonDigit finds the index of the first non-digit in a string, or -1 420 // when the string is all decimal digits 421 func indexNonDigit(s string) int { 422 for i := 0; i < len(s); i++ { 423 switch s[i] { 424 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 425 continue 426 default: 427 return i 428 } 429 } 430 431 // empty slice, or a slice which only has digits 432 return -1 433 } 434 435 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is 436 // the multi-byte sequences starting with ESC[; the result is a pair of slice 437 // indices which can be independently negative when either the start/end of 438 // a sequence isn't found; given their fairly-common use, even the hyperlink 439 // ESC]8 sequences are supported 440 func indexEscapeSequence(s []byte) (int, int) { 441 var prev byte 442 443 for i, b := range s { 444 if prev == '\x1b' && b == '[' { 445 j := indexLetter(s[i+1:]) 446 if j < 0 { 447 return i, -1 448 } 449 return i - 1, i + 1 + j + 1 450 } 451 452 if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' { 453 j := indexPair(s[i+1:], '\x1b', '\\') 454 if j < 0 { 455 return i, -1 456 } 457 return i - 1, i + 1 + j + 2 458 } 459 460 prev = b 461 } 462 463 return -1, -1 464 } 465 466 func indexLetter(s []byte) int { 467 for i, b := range s { 468 upper := b &^ 32 469 if 'A' <= upper && upper <= 'Z' { 470 return i 471 } 472 } 473 474 return -1 475 } 476 477 func indexPair(s []byte, x byte, y byte) int { 478 var prev byte 479 480 for i, b := range s { 481 if prev == x && b == y { 482 return i 483 } 484 prev = b 485 } 486 487 return -1 488 } 489 490 func indexStartANSI(s string) int { 491 var prev byte 492 493 for i := range s { 494 b := s[i] 495 if prev == '\x1b' && b == '[' { 496 return i - 1 497 } 498 prev = b 499 } 500 501 return -1 502 } 503 504 func loopTSV(line string, f func(i int, s string)) { 505 for i := 0; len(line) > 0; i++ { 506 pos := strings.IndexByte(line, '\t') 507 if pos < 0 { 508 f(i, line) 509 return 510 } 511 512 f(i, line[:pos]) 513 line = line[pos+1:] 514 } 515 } 516 517 func match(what []byte, with []*regexp.Regexp) bool { 518 for _, e := range with { 519 if e.Match(what) { 520 return true 521 } 522 } 523 return false 524 } 525 526 func seemsDataURI(s string) bool { 527 start := s 528 if len(s) > 64 { 529 start = s[:64] 530 } 531 return strings.HasPrefix(s, `data:`) && strings.Contains(start, `;base64,`) 532 } 533 534 // skip ignores n bytes from the reader given 535 func skip(r io.Reader, n int) { 536 if n < 1 { 537 return 538 } 539 540 // use func Seek for input files, except for stdin, which you can't seek 541 if f, ok := r.(*os.File); ok && r != os.Stdin { 542 f.Seek(int64(n), io.SeekCurrent) 543 return 544 } 545 io.CopyN(io.Discard, r, int64(n)) 546 } 547 548 func skipLeadingEscapeSequences(s string) string { 549 for len(s) >= 2 { 550 if s[0] != '\x1b' { 551 return s 552 } 553 554 switch s[1] { 555 case '[': 556 s = skipSingleLeadingANSI(s[2:]) 557 558 case ']': 559 if len(s) < 3 || s[2] != '8' { 560 return s 561 } 562 s = skipSingleLeadingOSC(s[3:]) 563 564 default: 565 return s 566 } 567 } 568 569 return s 570 } 571 572 func skipSingleLeadingANSI(s string) string { 573 for len(s) > 0 { 574 upper := s[0] &^ 32 575 s = s[1:] 576 if 'A' <= upper && upper <= 'Z' { 577 break 578 } 579 } 580 581 return s 582 } 583 584 func skipSingleLeadingOSC(s string) string { 585 var prev byte 586 587 for len(s) > 0 { 588 b := s[0] 589 s = s[1:] 590 if prev == '\x1b' && b == '\\' { 591 break 592 } 593 prev = b 594 } 595 596 return s 597 } 598 599 func trimLeadingSpaces(s string) string { 600 for len(s) > 0 && s[0] == ' ' { 601 s = s[1:] 602 } 603 return s 604 } 605 606 // trimLeadingWhitespace ignores leading space-like symbols: this is useful 607 // to handle text-based data formats more flexibly 608 func trimLeadingWhitespace(b []byte) []byte { 609 for len(b) > 0 { 610 switch b[0] { 611 case ' ', '\t', '\n', '\r': 612 b = b[1:] 613 default: 614 return b 615 } 616 } 617 618 // an empty slice is all that's left, at this point 619 return nil 620 } 621 622 func trimTrailingSpaces(s string) string { 623 for len(s) > 0 && s[len(s)-1] == ' ' { 624 s = s[:len(s)-1] 625 } 626 return s 627 } 628 629 // writeSpaces minimizes calls to write-like funcs 630 func writeSpaces(w *bufio.Writer, n int) { 631 const spaces = ` ` 632 if n < 1 { 633 return 634 } 635 636 for n >= len(spaces) { 637 w.WriteString(spaces) 638 n -= len(spaces) 639 } 640 w.WriteString(spaces[:n]) 641 } 642 643 func writeTabs(w *bufio.Writer, n int) { 644 for n > 0 { 645 w.WriteByte('\t') 646 n-- 647 } 648 } 649 650 const avoidInfo = ` 651 avoid [options...] [regular expressions...] 652 653 Avoid/ignore lines which match any of the extended-mode regular expressions 654 given. When not given any regex, all empty lines are ignored by default. 655 656 The options are, available both in single and double-dash versions 657 658 -h, -help show this help message 659 -i, -ins match regexes case-insensitively 660 ` 661 662 func avoidMain() { 663 nerr := 0 664 buffered := false 665 sensitive := true 666 args := os.Args[1:] 667 668 out: 669 for len(args) > 0 { 670 switch args[0] { 671 case `-b`, `--b`, `-buffered`, `--buffered`: 672 buffered = true 673 args = args[1:] 674 675 case `-i`, `--i`, `-ins`, `--ins`: 676 sensitive = false 677 args = args[1:] 678 679 case `-h`, `--h`, `-help`, `--help`: 680 os.Stdout.WriteString(avoidInfo[1:]) 681 return 682 683 default: 684 break out 685 } 686 } 687 688 if len(args) > 0 && args[0] == `--` { 689 args = args[1:] 690 } 691 692 liveLines := !buffered 693 if !buffered { 694 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 695 liveLines = false 696 } 697 } 698 699 if len(args) == 0 { 700 args = []string{`^$`} 701 } 702 703 exprs := make([]*regexp.Regexp, 0, len(args)) 704 705 for _, src := range args { 706 var err error 707 var exp *regexp.Regexp 708 if !sensitive { 709 exp, err = regexp.Compile(`(?i)` + src) 710 } else { 711 exp, err = regexp.Compile(src) 712 } 713 714 if err != nil { 715 os.Stderr.WriteString(err.Error()) 716 os.Stderr.WriteString("\n") 717 nerr++ 718 } 719 720 exprs = append(exprs, exp) 721 } 722 723 if nerr > 0 { 724 os.Exit(1) 725 } 726 727 sc := bufio.NewScanner(os.Stdin) 728 sc.Buffer(nil, maxLineBufSize) 729 bw := bufio.NewWriterSize(os.Stdout, bufSize) 730 defer bw.Flush() 731 732 for i := 0; sc.Scan(); i++ { 733 s := sc.Bytes() 734 if i == 0 && hasPrefixBOM(s) { 735 s = s[3:] 736 } 737 738 if !match(s, exprs) { 739 bw.Write(s) 740 bw.WriteByte('\n') 741 742 if !liveLines { 743 continue 744 } 745 746 if err := bw.Flush(); err != nil { 747 return 748 } 749 } 750 } 751 } 752 753 const bytedumpInfo = ` 754 bytedump [options...] [filenames...] 755 756 Show bytes as hexadecimal and ascii on the side. 757 758 Each line shows the starting offset for the bytes shown, 16 of the bytes 759 themselves in base-16 notation, and any ASCII codes when the byte values 760 are in the typical ASCII range. The offsets shown are base-10. 761 ` 762 763 const bytedumpChunkSize = 16 764 765 // bytedumpHexSymbols is a direct lookup table combining 2 hex digits with 766 // either a space or a displayable ASCII symbol matching the byte's own ASCII 767 // value; this table was autogenerated by running the command 768 // 769 // seq 0 255 | ./hex-symbols.awk 770 var bytedumpHexSymbols = [256]string{ 771 `00 `, `01 `, `02 `, `03 `, `04 `, `05 `, `06 `, `07 `, 772 `08 `, `09 `, `0a `, `0b `, `0c `, `0d `, `0e `, `0f `, 773 `10 `, `11 `, `12 `, `13 `, `14 `, `15 `, `16 `, `17 `, 774 `18 `, `19 `, `1a `, `1b `, `1c `, `1d `, `1e `, `1f `, 775 `20 `, `21!`, `22"`, `23#`, `24$`, `25%`, `26&`, `27'`, 776 `28(`, `29)`, `2a*`, `2b+`, `2c,`, `2d-`, `2e.`, `2f/`, 777 `300`, `311`, `322`, `333`, `344`, `355`, `366`, `377`, 778 `388`, `399`, `3a:`, `3b;`, `3c<`, `3d=`, `3e>`, `3f?`, 779 `40@`, `41A`, `42B`, `43C`, `44D`, `45E`, `46F`, `47G`, 780 `48H`, `49I`, `4aJ`, `4bK`, `4cL`, `4dM`, `4eN`, `4fO`, 781 `50P`, `51Q`, `52R`, `53S`, `54T`, `55U`, `56V`, `57W`, 782 `58X`, `59Y`, `5aZ`, `5b[`, `5c\`, `5d]`, `5e^`, `5f_`, 783 "60`", `61a`, `62b`, `63c`, `64d`, `65e`, `66f`, `67g`, 784 `68h`, `69i`, `6aj`, `6bk`, `6cl`, `6dm`, `6en`, `6fo`, 785 `70p`, `71q`, `72r`, `73s`, `74t`, `75u`, `76v`, `77w`, 786 `78x`, `79y`, `7az`, `7b{`, `7c|`, `7d}`, `7e~`, `7f `, 787 `80 `, `81 `, `82 `, `83 `, `84 `, `85 `, `86 `, `87 `, 788 `88 `, `89 `, `8a `, `8b `, `8c `, `8d `, `8e `, `8f `, 789 `90 `, `91 `, `92 `, `93 `, `94 `, `95 `, `96 `, `97 `, 790 `98 `, `99 `, `9a `, `9b `, `9c `, `9d `, `9e `, `9f `, 791 `a0 `, `a1 `, `a2 `, `a3 `, `a4 `, `a5 `, `a6 `, `a7 `, 792 `a8 `, `a9 `, `aa `, `ab `, `ac `, `ad `, `ae `, `af `, 793 `b0 `, `b1 `, `b2 `, `b3 `, `b4 `, `b5 `, `b6 `, `b7 `, 794 `b8 `, `b9 `, `ba `, `bb `, `bc `, `bd `, `be `, `bf `, 795 `c0 `, `c1 `, `c2 `, `c3 `, `c4 `, `c5 `, `c6 `, `c7 `, 796 `c8 `, `c9 `, `ca `, `cb `, `cc `, `cd `, `ce `, `cf `, 797 `d0 `, `d1 `, `d2 `, `d3 `, `d4 `, `d5 `, `d6 `, `d7 `, 798 `d8 `, `d9 `, `da `, `db `, `dc `, `dd `, `de `, `df `, 799 `e0 `, `e1 `, `e2 `, `e3 `, `e4 `, `e5 `, `e6 `, `e7 `, 800 `e8 `, `e9 `, `ea `, `eb `, `ec `, `ed `, `ee `, `ef `, 801 `f0 `, `f1 `, `f2 `, `f3 `, `f4 `, `f5 `, `f6 `, `f7 `, 802 `f8 `, `f9 `, `fa `, `fb `, `fc `, `fd `, `fe `, `ff `, 803 } 804 805 func bytedumpMain() { 806 args := os.Args[1:] 807 808 if len(args) > 0 { 809 switch args[0] { 810 case `-h`, `--h`, `-help`, `--help`: 811 os.Stdout.WriteString(bytedumpInfo[1:]) 812 return 813 } 814 } 815 816 if len(args) > 0 && args[0] == `--` { 817 args = args[1:] 818 } 819 820 handleError(nil, bytedumpRun(args)) 821 } 822 823 func bytedumpRun(args []string) error { 824 w := bufio.NewWriterSize(os.Stdout, 32*1024) 825 defer w.Flush() 826 827 // with no filenames given, handle stdin and quit 828 if len(args) == 0 { 829 return bytedump(w, os.Stdin, stdinDisplayName, -1) 830 } 831 832 for i, fname := range args { 833 if i > 0 { 834 w.WriteString("\n") 835 w.WriteString("\n") 836 } 837 838 if err := bytedumpHandleFile(w, fname); err != nil { 839 return err 840 } 841 } 842 843 return nil 844 } 845 846 func bytedumpHandleFile(w *bufio.Writer, fname string) error { 847 f, err := os.Open(fname) 848 if err != nil { 849 return err 850 } 851 defer f.Close() 852 853 stat, err := f.Stat() 854 if err != nil { 855 return bytedump(w, f, fname, -1) 856 } 857 858 fsize := int(stat.Size()) 859 return bytedump(w, f, fname, fsize) 860 } 861 862 // bytedump shows some messages related to the input and the cmd-line options 863 // used, and then follows them by the hexadecimal byte-view 864 func bytedump(w *bufio.Writer, r io.Reader, name string, size int) error { 865 owidth := -1 866 if size > 0 { 867 w := math.Log10(float64(size)) 868 w = math.Max(math.Ceil(w), 1) 869 owidth = int(w) 870 } 871 if owidth < 0 { 872 owidth = 8 873 } 874 875 rc := bytedumpConfig{ 876 out: w, 877 offsetWidth: owidth, 878 } 879 880 if size < 0 { 881 fmt.Fprintf(w, "• %s\n", name) 882 } else { 883 const fs = "• %s (%s bytes)\n" 884 fmt.Fprintf(w, fs, name, sprintCommas(size)) 885 } 886 w.WriteByte('\n') 887 888 // when done, emit a new line in case only part of the last line is 889 // shown, which means no newline was emitted for it 890 defer w.WriteString("\n") 891 892 // calling func Read directly can sometimes result in chunks shorter 893 // than the max chunk-size, even when there are plenty of bytes yet 894 // to read; to avoid that, use a buffered-reader to explicitly fill 895 // a slice instead 896 br := bufio.NewReader(r) 897 898 // to show ASCII up to 1 full chunk ahead, 2 chunks are needed 899 cur := make([]byte, 0, bytedumpChunkSize) 900 ahead := make([]byte, 0, bytedumpChunkSize) 901 902 // the ASCII-panel's wide output requires staying 1 step/chunk behind, 903 // so to speak 904 cur, err := bytedumpFillChunk(cur[:0], bytedumpChunkSize, br) 905 if len(cur) == 0 { 906 if err == io.EOF { 907 err = nil 908 } 909 return err 910 } 911 912 for { 913 ahead, err := bytedumpFillChunk(ahead[:0], bytedumpChunkSize, br) 914 if err != nil && err != io.EOF { 915 return err 916 } 917 918 if len(ahead) == 0 { 919 // done, maybe except for an extra line of output 920 break 921 } 922 923 // show the byte-chunk on its own output line 924 if err := bytedumpWriteChunk(rc, cur, ahead); err != nil { 925 return errNoMoreOutput 926 } 927 928 rc.chunks++ 929 rc.offset += uint(len(cur)) 930 cur = cur[:copy(cur, ahead)] 931 } 932 933 // don't forget the last output line 934 if rc.chunks > 0 && len(cur) > 0 { 935 return bytedumpWriteChunk(rc, cur, nil) 936 } 937 return nil 938 } 939 940 // bytedumpFillChunk tries to read the number of bytes given, appending them 941 // to the byte-slice given; this func returns an EOF error only when no bytes 942 // are read, which somewhat simplifies error-handling for the func caller 943 func bytedumpFillChunk(chunk []byte, n int, br *bufio.Reader) ([]byte, error) { 944 // read buffered-bytes up to the max chunk-size 945 for i := 0; i < n; i++ { 946 b, err := br.ReadByte() 947 if err == nil { 948 chunk = append(chunk, b) 949 continue 950 } 951 952 if err == io.EOF && i > 0 { 953 return chunk, nil 954 } 955 return chunk, err 956 } 957 958 // got the full byte-count asked for 959 return chunk, nil 960 } 961 962 // bytedumpConfig groups several arguments given to any of the rendering funcs 963 type bytedumpConfig struct { 964 // out is writer to send all output to 965 out *bufio.Writer 966 967 // offset is the byte-offset of the first byte shown on the current output 968 // line: if shown at all, it's shown at the start the line 969 offset uint 970 971 // chunks is the 0-based counter for byte-chunks/lines shown so far, which 972 // indirectly keeps track of when it's time to show a `breather` line 973 chunks uint 974 975 // perLine is how many hex-encoded bytes are shown per line 976 perLine uint 977 978 // offsetWidth is the max string-width for the byte-offsets shown at the 979 // start of output lines, and determines those values' left-padding 980 offsetWidth int 981 } 982 983 // loopThousandsGroups comes from my lib/package `mathplus`: that's why it 984 // handles negatives, even though this app only uses it with non-negatives. 985 func loopThousandsGroups(n int, fn func(i, n int)) { 986 // 0 doesn't have a log10 987 if n == 0 { 988 fn(0, 0) 989 return 990 } 991 992 sign := +1 993 if n < 0 { 994 n = -n 995 sign = -1 996 } 997 998 intLog1000 := int(math.Log10(float64(n)) / 3) 999 remBase := int(math.Pow10(3 * intLog1000)) 1000 1001 for i := 0; remBase > 0; i++ { 1002 group := (1000 * n) / remBase / 1000 1003 fn(i, sign*group) 1004 // if original number was negative, ensure only first 1005 // group gives a negative input to the callback 1006 sign = +1 1007 1008 n %= remBase 1009 remBase /= 1000 1010 } 1011 } 1012 1013 // sprintCommas turns the non-negative number given into a readable string, 1014 // where digits are grouped-separated by commas 1015 func sprintCommas(n int) string { 1016 var sb strings.Builder 1017 loopThousandsGroups(n, func(i, n int) { 1018 if i == 0 { 1019 var buf [4]byte 1020 sb.Write(strconv.AppendInt(buf[:0], int64(n), 10)) 1021 return 1022 } 1023 sb.WriteByte(',') 1024 writePad0Sub1000Counter(&sb, uint(n)) 1025 }) 1026 return sb.String() 1027 } 1028 1029 // writePad0Sub1000Counter is an alternative to fmt.Fprintf(w, `%03d`, n) 1030 func writePad0Sub1000Counter(w io.Writer, n uint) { 1031 // precondition is 0...999 1032 if n > 999 { 1033 w.Write([]byte(`???`)) 1034 return 1035 } 1036 1037 var buf [3]byte 1038 buf[0] = byte(n/100) + '0' 1039 n %= 100 1040 buf[1] = byte(n/10) + '0' 1041 buf[2] = byte(n%10) + '0' 1042 w.Write(buf[:]) 1043 } 1044 1045 // bytedumpWriteHex is faster than calling fmt.Fprintf(w, `%02x`, b): this 1046 // matters when handling megabytes, as it's called for every byte of input 1047 func bytedumpWriteHex(w *bufio.Writer, b byte) { 1048 const hexDigits = `0123456789abcdef` 1049 w.WriteByte(hexDigits[b>>4]) 1050 w.WriteByte(hexDigits[b&0x0f]) 1051 } 1052 1053 // bytedumpPadding is the padding/spacing emitted across each output line 1054 const bytedumpPadding = 2 1055 1056 func bytedumpWriteChunk(cfg bytedumpConfig, first, second []byte) error { 1057 w := cfg.out 1058 1059 // start each line with the byte-offset for the 1st item shown on it 1060 writeCounter(w, cfg.offsetWidth, cfg.offset) 1061 w.WriteByte(' ') 1062 1063 for _, b := range first { 1064 // fmt.Fprintf(w, ` %02x`, b) 1065 // 1066 // the commented part above was a performance bottleneck, since 1067 // the slow/generic fmt.Fprintf was called for each input byte 1068 w.WriteByte(' ') 1069 bytedumpWriteHex(w, b) 1070 } 1071 1072 bytedumpWriteASCII(w, first, second, bytedumpChunkSize) 1073 return w.WriteByte('\n') 1074 } 1075 1076 // writeCounter just emits a left-padded number 1077 func writeCounter(w *bufio.Writer, width int, n uint) { 1078 var buf [32]byte 1079 str := strconv.AppendUint(buf[:0], uint64(n), 10) 1080 writeSpaces(w, width-len(str)) 1081 w.Write(str) 1082 } 1083 1084 // bytedumpWriteASCII emits the side-panel showing all ASCII runs for each line 1085 func bytedumpWriteASCII(w *bufio.Writer, first, second []byte, width int) { 1086 // prev keeps track of the previous byte, so spaces are added 1087 // when bytes change from non-visible-ASCII to visible-ASCII 1088 var prev byte 1089 1090 spaces := 3*(width-len(first)) + bytedumpPadding 1091 1092 for _, b := range first { 1093 if 32 < b && b < 127 { 1094 if !(32 < prev && prev < 127) { 1095 writeSpaces(w, spaces) 1096 spaces = 1 1097 } 1098 w.WriteByte(b) 1099 } 1100 prev = b 1101 } 1102 1103 for _, b := range second { 1104 if 32 < b && b < 127 { 1105 if !(32 < prev && prev < 127) { 1106 writeSpaces(w, spaces) 1107 spaces = 1 1108 } 1109 w.WriteByte(b) 1110 } 1111 prev = b 1112 } 1113 } 1114 1115 const catlInfo = ` 1116 catl [options...] [file...] 1117 1118 1119 Unlike "cat", conCATenate Lines ensures lines across inputs are never joined 1120 by accident, when an input's last line doesn't end with a line-feed. 1121 1122 Input is assumed to be UTF-8, and all CRLF byte-pairs are turned into line 1123 feeds. Leading BOM (byte-order marks) on first lines are also ignored. 1124 1125 All (optional) leading options start with either single or double-dash: 1126 1127 -h show this help message 1128 -help show this help message 1129 ` 1130 1131 func catlMain() { 1132 buffered := false 1133 args := os.Args[1:] 1134 1135 if len(args) > 0 { 1136 switch args[0] { 1137 case `-b`, `--b`, `-buffered`, `--buffered`: 1138 buffered = true 1139 args = args[1:] 1140 1141 case `-h`, `--h`, `-help`, `--help`: 1142 os.Stdout.WriteString(catlInfo[1:]) 1143 return 1144 } 1145 } 1146 1147 if len(args) > 0 && args[0] == `--` { 1148 args = args[1:] 1149 } 1150 1151 liveLines := !buffered 1152 if !buffered { 1153 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 1154 liveLines = false 1155 } 1156 } 1157 1158 easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { 1159 return catl(w, r, liveLines) 1160 }) 1161 } 1162 1163 func catl(w *bufio.Writer, r io.Reader, live bool) error { 1164 sc := bufio.NewScanner(r) 1165 sc.Buffer(nil, maxLineBufSize) 1166 1167 for i := 0; sc.Scan(); i++ { 1168 s := sc.Bytes() 1169 if i == 0 && hasPrefixBOM(s) { 1170 s = s[3:] 1171 } 1172 1173 w.Write(s) 1174 if w.WriteByte('\n') != nil { 1175 return errNoMoreOutput 1176 } 1177 1178 if !live { 1179 continue 1180 } 1181 1182 if err := w.Flush(); err != nil { 1183 return errNoMoreOutput 1184 } 1185 } 1186 1187 return sc.Err() 1188 } 1189 1190 const comaInfo = ` 1191 coma [options...] [regexes/style pairs...] 1192 1193 1194 COlor MAtches ANSI-styles matching regular expressions along lines read 1195 from the standard input. The regular-expression mode used is "re2", which 1196 is a superset of the commonly-used "extended-mode". 1197 1198 Regexes always avoid matching any ANSI-style sequences, to avoid messing 1199 those up. Also, multiple matches in a line never overlap: at each step 1200 along a line, the earliest-starting match among the regexes always wins, 1201 as the order regexes are given among the arguments never matters. 1202 1203 The options are, available both in single and double-dash versions 1204 1205 -h show this help message 1206 -help show this help message 1207 1208 -i match regexes case-insensitively 1209 -ins match regexes case-insensitively 1210 ` 1211 1212 var styleAliases = map[string]string{ 1213 `b`: `blue`, 1214 `g`: `green`, 1215 `m`: `magenta`, 1216 `o`: `orange`, 1217 `p`: `purple`, 1218 `r`: `red`, 1219 `u`: `underline`, 1220 1221 `bb`: `blueback`, 1222 `bg`: `greenback`, 1223 `bm`: `magentaback`, 1224 `bo`: `orangeback`, 1225 `bp`: `purpleback`, 1226 `br`: `redback`, 1227 1228 `gb`: `greenback`, 1229 `mb`: `magentaback`, 1230 `ob`: `orangeback`, 1231 `pb`: `purpleback`, 1232 `rb`: `redback`, 1233 1234 `hi`: `inverse`, 1235 `inv`: `inverse`, 1236 `mag`: `magenta`, 1237 1238 `du`: `doubleunderline`, 1239 1240 `flip`: `inverse`, 1241 `swap`: `inverse`, 1242 1243 `reset`: `plain`, 1244 `highlight`: `inverse`, 1245 `hilite`: `inverse`, 1246 `invert`: `inverse`, 1247 `inverted`: `inverse`, 1248 `swapped`: `inverse`, 1249 1250 `dunderline`: `doubleunderline`, 1251 `dunderlined`: `doubleunderline`, 1252 1253 `strikethrough`: `strike`, 1254 `strikethru`: `strike`, 1255 `struck`: `strike`, 1256 1257 `underlined`: `underline`, 1258 1259 `bblue`: `blueback`, 1260 `bgray`: `grayback`, 1261 `bgreen`: `greenback`, 1262 `bmagenta`: `magentaback`, 1263 `borange`: `orangeback`, 1264 `bpurple`: `purpleback`, 1265 `bred`: `redback`, 1266 1267 `bgblue`: `blueback`, 1268 `bggray`: `grayback`, 1269 `bggreen`: `greenback`, 1270 `bgmag`: `magentaback`, 1271 `bgmagenta`: `magentaback`, 1272 `bgorange`: `orangeback`, 1273 `bgpurple`: `purpleback`, 1274 `bgred`: `redback`, 1275 1276 `bluebg`: `blueback`, 1277 `graybg`: `grayback`, 1278 `greenbg`: `greenback`, 1279 `magbg`: `magentaback`, 1280 `magentabg`: `magentaback`, 1281 `orangebg`: `orangeback`, 1282 `purplebg`: `purpleback`, 1283 `redbg`: `redback`, 1284 1285 `backblue`: `blueback`, 1286 `backgray`: `grayback`, 1287 `backgreen`: `greenback`, 1288 `backmag`: `magentaback`, 1289 `backmagenta`: `magentaback`, 1290 `backorange`: `orangeback`, 1291 `backpurple`: `purpleback`, 1292 `backred`: `redback`, 1293 } 1294 1295 var styles = map[string]string{ 1296 `blue`: "\x1b[38;2;0;95;215m", 1297 `bold`: "\x1b[1m", 1298 `doubleunderline`: "\x1b[21m", 1299 `gray`: "\x1b[38;2;168;168;168m", 1300 `green`: "\x1b[38;2;0;135;95m", 1301 `inverse`: "\x1b[7m", 1302 `magenta`: "\x1b[38;2;215;0;255m", 1303 `orange`: "\x1b[38;2;215;95;0m", 1304 `plain`: "\x1b[0m", 1305 `purple`: "\x1b[38;2;135;95;255m", 1306 `red`: "\x1b[38;2;204;0;0m", 1307 `strike`: "\x1b[9m", 1308 `underline`: "\x1b[4m", 1309 1310 `blueback`: "\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m", 1311 `grayback`: "\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m", 1312 `greenback`: "\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m", 1313 `magentaback`: "\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m", 1314 `orangeback`: "\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m", 1315 `purpleback`: "\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m", 1316 `redback`: "\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m", 1317 } 1318 1319 type patternStylePair struct { 1320 expr *regexp.Regexp 1321 style string 1322 } 1323 1324 func comaMain() { 1325 buffered := false 1326 insensitive := false 1327 args := os.Args[1:] 1328 1329 out: 1330 for len(args) > 0 { 1331 switch args[0] { 1332 case `-b`, `--b`, `-buffered`, `--buffered`: 1333 buffered = true 1334 args = args[1:] 1335 continue 1336 1337 case `-h`, `--h`, `-help`, `--help`: 1338 os.Stdout.WriteString(comaInfo[1:]) 1339 return 1340 1341 case `-i`, `--i`, `-ins`, `--ins`: 1342 insensitive = true 1343 args = args[1:] 1344 continue 1345 1346 default: 1347 break out 1348 } 1349 } 1350 1351 if len(args) > 0 && args[0] == `--` { 1352 args = args[1:] 1353 } 1354 1355 if len(args)%2 != 0 { 1356 const msg = "you forgot the style-name for/after the last regex\n" 1357 os.Stderr.WriteString(msg) 1358 os.Exit(1) 1359 } 1360 1361 nerr := 0 1362 pairs := make([]patternStylePair, 0, len(args)/2) 1363 1364 for len(args) >= 2 { 1365 src := args[0] 1366 sname := args[1] 1367 1368 var err error 1369 var exp *regexp.Regexp 1370 if insensitive { 1371 exp, err = regexp.Compile(`(?i)` + src) 1372 } else { 1373 exp, err = regexp.Compile(src) 1374 } 1375 if err != nil { 1376 os.Stderr.WriteString(err.Error()) 1377 os.Stderr.WriteString("\n") 1378 nerr++ 1379 } 1380 1381 if alias, ok := styleAliases[sname]; ok { 1382 sname = alias 1383 } 1384 1385 style, ok := styles[sname] 1386 if !ok { 1387 os.Stderr.WriteString("no style named `") 1388 os.Stderr.WriteString(args[1]) 1389 os.Stderr.WriteString("`\n") 1390 nerr++ 1391 } 1392 1393 pairs = append(pairs, patternStylePair{expr: exp, style: style}) 1394 args = args[2:] 1395 } 1396 1397 if nerr > 0 { 1398 os.Exit(1) 1399 } 1400 1401 liveLines := !buffered 1402 if !buffered { 1403 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 1404 liveLines = false 1405 } 1406 } 1407 1408 sc := bufio.NewScanner(os.Stdin) 1409 sc.Buffer(nil, maxLineBufSize) 1410 bw := bufio.NewWriterSize(os.Stdout, bufSize) 1411 defer bw.Flush() 1412 1413 for i := 0; sc.Scan(); i++ { 1414 s := sc.Bytes() 1415 if i == 0 && hasPrefixBOM(s) { 1416 s = s[3:] 1417 } 1418 1419 comaHandleLine(bw, s, pairs) 1420 if err := bw.WriteByte('\n'); err != nil { 1421 return 1422 } 1423 1424 if !liveLines { 1425 continue 1426 } 1427 1428 if err := bw.Flush(); err != nil { 1429 return 1430 } 1431 } 1432 } 1433 1434 func comaHandleLine(w *bufio.Writer, s []byte, with []patternStylePair) { 1435 for len(s) > 0 { 1436 i, j := indexEscapeSequence(s) 1437 if i < 0 { 1438 comaHandleLineChunk(w, s, with) 1439 return 1440 } 1441 1442 comaHandleLineChunk(w, s[:i], with) 1443 w.Write(s[i:j]) 1444 1445 if j < 0 { 1446 break 1447 } 1448 s = s[j:] 1449 } 1450 } 1451 1452 func comaHandleLineChunk(w *bufio.Writer, s []byte, with []patternStylePair) { 1453 start := -1 1454 end := -1 1455 which := -1 1456 1457 for len(s) > 0 { 1458 start = -1 1459 for i, pair := range with { 1460 span := pair.expr.FindIndex(s) 1461 // also ignore empty regex matches to avoid infinite outer loops, 1462 // as skipping empty slices isn't advancing at all, leaving the 1463 // string stuck to being empty-matched forever by the same regex 1464 if span == nil || span[0] == span[1] { 1465 continue 1466 } 1467 1468 if span[0] < start || start < 0 { 1469 start = span[0] 1470 end = span[1] 1471 which = i 1472 } 1473 } 1474 1475 if start < 0 { 1476 w.Write(s) 1477 return 1478 } 1479 1480 w.Write(s[:start]) 1481 w.WriteString(with[which].style) 1482 w.Write(s[start:end]) 1483 w.WriteString("\x1b[0m") 1484 s = s[end:] 1485 } 1486 } 1487 1488 const datauriInfo = ` 1489 datauri [options...] [filenames...] 1490 1491 1492 Encode bytes as data-URIs, auto-detecting the file/data type using the first 1493 few bytes from each data/file stream. When given multiple inputs, the output 1494 will be multiple lines, one for each file given. 1495 1496 Empty files/inputs result in empty lines. A simple dash (-) stands for the 1497 standard-input, which is also used automatically when not given any files. 1498 1499 Data-URIs are base64-encoded text representations of arbitrary data, which 1500 include their payload's MIME-type, and which are directly useable/shareable 1501 in web-browsers as links, despite not looking like normal links/URIs. 1502 1503 Some web-browsers limit the size of handled data-URIs to tens of kilobytes. 1504 1505 1506 Options 1507 1508 -h, -help, --h, --help show this help message 1509 ` 1510 1511 func datauriMain() { 1512 args := os.Args[1:] 1513 1514 if len(args) > 0 { 1515 switch args[0] { 1516 case `-h`, `--h`, `-help`, `--help`: 1517 os.Stdout.WriteString(datauriInfo[1:]) 1518 return 1519 } 1520 } 1521 1522 if len(args) > 0 && args[0] == `--` { 1523 args = args[1:] 1524 } 1525 1526 easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { 1527 return dataURI(w, r, name) 1528 }) 1529 } 1530 1531 func dataURI(w *bufio.Writer, r io.Reader, name string) error { 1532 var buf [64]byte 1533 n, err := r.Read(buf[:]) 1534 if err != nil && err != io.EOF { 1535 return err 1536 } 1537 start := buf[:n] 1538 1539 // handle regular data, trying to auto-detect its MIME type using 1540 // its first few bytes 1541 mime, ok := detectMIME(start) 1542 if !ok { 1543 return errors.New(name + `: unknown file type`) 1544 } 1545 1546 w.WriteString(`data:`) 1547 w.WriteString(mime) 1548 w.WriteString(`;base64,`) 1549 r = io.MultiReader(bytes.NewReader(start), r) 1550 enc := base64.NewEncoder(base64.StdEncoding, w) 1551 if _, err := io.Copy(enc, r); err != nil { 1552 return err 1553 } 1554 enc.Close() 1555 1556 w.WriteByte('\n') 1557 if err := w.Flush(); err != nil { 1558 return errNoMoreOutput 1559 } 1560 return nil 1561 } 1562 1563 // makeDotless is similar to filepath.Ext, except its results never start 1564 // with a dot 1565 func makeDotless(s string) string { 1566 i := strings.LastIndexByte(s, '.') 1567 if i >= 0 { 1568 return s[(i + 1):] 1569 } 1570 return s 1571 } 1572 1573 // nameToMIME tries to match a MIME type to a filename, dotted file extension, 1574 // or a dot-less filetype/extension given 1575 func nameToMIME(fname string) (mimeType string, ok bool) { 1576 // handle dotless file types and filenames alike 1577 kind, ok := type2mime[makeDotless(fname)] 1578 return kind, ok 1579 } 1580 1581 // detectMIME guesses the first appropriate MIME type from the first few 1582 // data bytes given: 24 bytes are enough to detect all supported types 1583 func detectMIME(b []byte) (mimeType string, ok bool) { 1584 t, ok := detectType(b) 1585 if ok { 1586 return t, true 1587 } 1588 return ``, false 1589 } 1590 1591 // detectType guesses the first appropriate file type for the data given: 1592 // here the type is a a filename extension without the leading dot 1593 func detectType(b []byte) (dotlessExt string, ok bool) { 1594 // empty data, so there's no way to detect anything 1595 if len(b) == 0 { 1596 return ``, false 1597 } 1598 1599 // check for plain-text web-document formats case-insensitively 1600 kind, ok := checkDoc(b) 1601 if ok { 1602 return kind, true 1603 } 1604 1605 // check data formats which allow any byte at the start 1606 kind, ok = checkSpecial(b) 1607 if ok { 1608 return kind, true 1609 } 1610 1611 // check all other supported data formats 1612 headers := hdrDispatch[b[0]] 1613 for _, t := range headers { 1614 if hasPrefixPattern(b[1:], t.Header[1:], cba) { 1615 return t.Type, true 1616 } 1617 } 1618 1619 // unrecognized data format 1620 return ``, false 1621 } 1622 1623 // checkDoc tries to guess if the bytes given are the start of HTML, SVG, 1624 // XML, or JSON data 1625 func checkDoc(b []byte) (kind string, ok bool) { 1626 // ignore leading whitespaces 1627 b = trimLeadingWhitespace(b) 1628 1629 // can't detect anything with empty data 1630 if len(b) == 0 { 1631 return ``, false 1632 } 1633 1634 // handle XHTML documents which don't start with a doctype declaration 1635 if bytes.Contains(b, doctypeHTML) { 1636 return html, true 1637 } 1638 1639 // handle HTML/SVG/XML documents 1640 if hasPrefixByte(b, '<') { 1641 if hasPrefixFold(b, []byte{'<', '?', 'x', 'm', 'l'}) { 1642 if bytes.Contains(b, []byte{'<', 's', 'v', 'g'}) { 1643 return svg, true 1644 } 1645 return xml, true 1646 } 1647 1648 headers := hdrDispatch['<'] 1649 for _, v := range headers { 1650 if hasPrefixFold(b, v.Header) { 1651 return v.Type, true 1652 } 1653 } 1654 return ``, false 1655 } 1656 1657 // handle JSON with top-level arrays 1658 if hasPrefixByte(b, '[') { 1659 // match [", or [[, or [{, ignoring spaces between 1660 b = trimLeadingWhitespace(b[1:]) 1661 if len(b) > 0 { 1662 switch b[0] { 1663 case '"', '[', '{': 1664 return json_, true 1665 } 1666 } 1667 return ``, false 1668 } 1669 1670 // handle JSON with top-level objects 1671 if hasPrefixByte(b, '{') { 1672 // match {", ignoring spaces between: after {, the only valid syntax 1673 // which can follow is the opening quote for the expected object-key 1674 b = trimLeadingWhitespace(b[1:]) 1675 if hasPrefixByte(b, '"') { 1676 return json_, true 1677 } 1678 return ``, false 1679 } 1680 1681 // checking for a quoted string, any of the JSON keywords, or even a 1682 // number seems too ambiguous to declare the data valid JSON 1683 1684 // no web-document format detected 1685 return ``, false 1686 } 1687 1688 // checkSpecial handles special file-format headers, which should be checked 1689 // before the normal file-type headers, since the first-byte dispatch algo 1690 // doesn't work for these 1691 func checkSpecial(b []byte) (kind string, ok bool) { 1692 if len(b) >= 8 && bytes.Index(b, []byte{'f', 't', 'y', 'p'}) == 4 { 1693 for _, t := range specialHeaders { 1694 if hasPrefixPattern(b[4:], t.Header[4:], cba) { 1695 return t.Type, true 1696 } 1697 } 1698 } 1699 return ``, false 1700 } 1701 1702 // hasPrefixPattern works like bytes.HasPrefix, except it allows for a special 1703 // value to signal any byte is allowed on specific spots 1704 func hasPrefixPattern(what []byte, pat []byte, wildcard byte) bool { 1705 // if the data are shorter than the pattern to match, there's no match 1706 if len(what) < len(pat) { 1707 return false 1708 } 1709 1710 // use a slice which ensures the pattern length is never exceeded 1711 what = what[:len(pat)] 1712 1713 for i, x := range what { 1714 y := pat[i] 1715 if x != y && y != wildcard { 1716 return false 1717 } 1718 } 1719 return true 1720 } 1721 1722 // all the MIME types used/recognized in this package 1723 const ( 1724 aiff = `audio/aiff` 1725 au = `audio/basic` 1726 avi = `video/avi` 1727 avif = `image/avif` 1728 bmp = `image/x-bmp` 1729 caf = `audio/x-caf` 1730 cur = `image/vnd.microsoft.icon` 1731 css = `text/css` 1732 csv = `text/csv` 1733 djvu = `image/x-djvu` 1734 elf = `application/x-elf` 1735 exe = `application/vnd.microsoft.portable-executable` 1736 flac = `audio/x-flac` 1737 gif = `image/gif` 1738 gz = `application/gzip` 1739 heic = `image/heic` 1740 htm = `text/html` 1741 html = `text/html` 1742 ico = `image/x-icon` 1743 iso = `application/octet-stream` 1744 jpg = `image/jpeg` 1745 jpeg = `image/jpeg` 1746 js = `application/javascript` 1747 json_ = `application/json` 1748 m4a = `audio/aac` 1749 m4v = `video/x-m4v` 1750 mid = `audio/midi` 1751 mov = `video/quicktime` 1752 mp4 = `video/mp4` 1753 mp3 = `audio/mpeg` 1754 mpg = `video/mpeg` 1755 ogg = `audio/ogg` 1756 opus = `audio/opus` 1757 pdf = `application/pdf` 1758 png = `image/png` 1759 ps = `application/postscript` 1760 psd = `image/vnd.adobe.photoshop` 1761 rtf = `application/rtf` 1762 sqlite3 = `application/x-sqlite3` 1763 svg = `image/svg+xml` 1764 text = `text/plain` 1765 tiff = `image/tiff` 1766 tsv = `text/tsv` 1767 wasm = `application/wasm` 1768 wav = `audio/x-wav` 1769 webp = `image/webp` 1770 webm = `video/webm` 1771 xml = `application/xml` 1772 zip = `application/zip` 1773 zst = `application/zstd` 1774 ) 1775 1776 // type2mime turns dotless format-names into MIME types 1777 var type2mime = map[string]string{ 1778 `aiff`: aiff, 1779 `wav`: wav, 1780 `avi`: avi, 1781 `jpg`: jpg, 1782 `jpeg`: jpeg, 1783 `m4a`: m4a, 1784 `mp4`: mp4, 1785 `m4v`: m4v, 1786 `mov`: mov, 1787 `png`: png, 1788 `avif`: avif, 1789 `webp`: webp, 1790 `gif`: gif, 1791 `tiff`: tiff, 1792 `psd`: psd, 1793 `flac`: flac, 1794 `webm`: webm, 1795 `mpg`: mpg, 1796 `zip`: zip, 1797 `gz`: gz, 1798 `zst`: zst, 1799 `mp3`: mp3, 1800 `opus`: opus, 1801 `bmp`: bmp, 1802 `mid`: mid, 1803 `ogg`: ogg, 1804 `html`: html, 1805 `htm`: htm, 1806 `svg`: svg, 1807 `xml`: xml, 1808 `rtf`: rtf, 1809 `pdf`: pdf, 1810 `ps`: ps, 1811 `au`: au, 1812 `ico`: ico, 1813 `cur`: cur, 1814 `caf`: caf, 1815 `heic`: heic, 1816 `sqlite3`: sqlite3, 1817 `elf`: elf, 1818 `exe`: exe, 1819 `wasm`: wasm, 1820 `iso`: iso, 1821 `txt`: text, 1822 `css`: css, 1823 `csv`: csv, 1824 `tsv`: tsv, 1825 `js`: js, 1826 `json`: json_, 1827 `geojson`: json_, 1828 } 1829 1830 // formatDescriptor ties a file-header pattern to its data-format type 1831 type formatDescriptor struct { 1832 Header []byte 1833 Type string 1834 } 1835 1836 // can be anything: ensure this value differs from all other literal bytes 1837 // in the generic-headers table: failing that, its value could cause subtle 1838 // type-misdetection bugs 1839 const cba = 0xFD // 253, which is > 127, the highest-valued ascii symbol 1840 1841 // dash-streamed m4a format 1842 var m4aDash = []byte{ 1843 cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 1844 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', 1845 } 1846 1847 // format markers with leading wildcards, which should be checked before the 1848 // normal ones: this is to prevent mismatches with the latter types, even 1849 // though you can make probabilistic arguments which suggest these mismatches 1850 // should be very unlikely in practice 1851 var specialHeaders = []formatDescriptor{ 1852 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a}, 1853 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a}, 1854 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4}, 1855 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4}, 1856 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v}, 1857 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov}, 1858 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic}, 1859 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif}, 1860 {m4aDash, m4a}, 1861 } 1862 1863 // sqlite3 database format 1864 var sqlite3db = []byte{ 1865 'S', 'Q', 'L', 'i', 't', 'e', ' ', 1866 'f', 'o', 'r', 'm', 'a', 't', ' ', '3', 1867 000, 1868 } 1869 1870 // windows-variant bitmap file-header, which is followed by a byte-counter for 1871 // the 40-byte infoheader which follows that 1872 var winbmp = []byte{ 1873 'B', 'M', cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, 40, 1874 } 1875 1876 // deja-vu document format 1877 var djv = []byte{ 1878 'A', 'T', '&', 'T', 'F', 'O', 'R', 'M', cba, cba, cba, cba, 'D', 'J', 'V', 1879 } 1880 1881 var doctypeHTML = []byte{ 1882 '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E', ' ', 'h', 't', 'm', 'l', 1883 } 1884 1885 // hdrDispatch groups format-description-groups by their first byte, thus 1886 // shortening total lookups for some data header: notice how the `ftyp` data 1887 // formats aren't handled here, since these can start with any byte, instead 1888 // of the literal value of the any-byte markers they use 1889 var hdrDispatch = [256][]formatDescriptor{ 1890 { 1891 {[]byte{000, 000, 001, 0xBA}, mpg}, 1892 {[]byte{000, 000, 001, 0xB3}, mpg}, 1893 {[]byte{000, 000, 001, 000}, ico}, 1894 {[]byte{000, 000, 002, 000}, cur}, 1895 {[]byte{000, 'a', 's', 'm'}, wasm}, 1896 }, // 0 1897 nil, // 1 1898 nil, // 2 1899 nil, // 3 1900 nil, // 4 1901 nil, // 5 1902 nil, // 6 1903 nil, // 7 1904 nil, // 8 1905 nil, // 9 1906 nil, // 10 1907 nil, // 11 1908 nil, // 12 1909 nil, // 13 1910 nil, // 14 1911 nil, // 15 1912 nil, // 16 1913 nil, // 17 1914 nil, // 18 1915 nil, // 19 1916 nil, // 20 1917 nil, // 21 1918 nil, // 22 1919 nil, // 23 1920 nil, // 24 1921 nil, // 25 1922 { 1923 {[]byte{0x1A, 0x45, 0xDF, 0xA3}, webm}, 1924 }, // 26 1925 nil, // 27 1926 nil, // 28 1927 nil, // 29 1928 nil, // 30 1929 { 1930 // {[]byte{0x1F, 0x8B, 0x08, 0x08}, gz}, 1931 {[]byte{0x1F, 0x8B, 0x08}, gz}, 1932 }, // 31 1933 nil, // 32 1934 nil, // 33 ! 1935 nil, // 34 " 1936 { 1937 {[]byte{'#', '!', ' '}, text}, 1938 {[]byte{'#', '!', '/'}, text}, 1939 }, // 35 # 1940 nil, // 36 $ 1941 { 1942 {[]byte{'%', 'P', 'D', 'F'}, pdf}, 1943 {[]byte{'%', '!', 'P', 'S'}, ps}, 1944 }, // 37 % 1945 nil, // 38 & 1946 nil, // 39 ' 1947 { 1948 {[]byte{0x28, 0xB5, 0x2F, 0xFD}, zst}, 1949 }, // 40 ( 1950 nil, // 41 ) 1951 nil, // 42 * 1952 nil, // 43 + 1953 nil, // 44 , 1954 nil, // 45 - 1955 { 1956 {[]byte{'.', 's', 'n', 'd'}, au}, 1957 }, // 46 . 1958 nil, // 47 / 1959 nil, // 48 0 1960 nil, // 49 1 1961 nil, // 50 2 1962 nil, // 51 3 1963 nil, // 52 4 1964 nil, // 53 5 1965 nil, // 54 6 1966 nil, // 55 7 1967 { 1968 {[]byte{'8', 'B', 'P', 'S'}, psd}, 1969 }, // 56 8 1970 nil, // 57 9 1971 nil, // 58 : 1972 nil, // 59 ; 1973 { 1974 // func checkDoc is better for these, since it's case-insensitive 1975 {doctypeHTML, html}, 1976 {[]byte{'<', 's', 'v', 'g'}, svg}, 1977 {[]byte{'<', 'h', 't', 'm', 'l', '>'}, html}, 1978 {[]byte{'<', 'h', 'e', 'a', 'd', '>'}, html}, 1979 {[]byte{'<', 'b', 'o', 'd', 'y', '>'}, html}, 1980 {[]byte{'<', '?', 'x', 'm', 'l'}, xml}, 1981 }, // 60 < 1982 nil, // 61 = 1983 nil, // 62 > 1984 nil, // 63 ? 1985 nil, // 64 @ 1986 { 1987 {djv, djvu}, 1988 }, // 65 A 1989 { 1990 {winbmp, bmp}, 1991 }, // 66 B 1992 nil, // 67 C 1993 nil, // 68 D 1994 nil, // 69 E 1995 { 1996 {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff}, 1997 {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff}, 1998 }, // 70 F 1999 { 2000 {[]byte{'G', 'I', 'F', '8', '7', 'a'}, gif}, 2001 {[]byte{'G', 'I', 'F', '8', '9', 'a'}, gif}, 2002 }, // 71 G 2003 nil, // 72 H 2004 { 2005 {[]byte{'I', 'D', '3', 2}, mp3}, // ID3-format metadata 2006 {[]byte{'I', 'D', '3', 3}, mp3}, // ID3-format metadata 2007 {[]byte{'I', 'D', '3', 4}, mp3}, // ID3-format metadata 2008 {[]byte{'I', 'I', '*', 000}, tiff}, 2009 }, // 73 I 2010 nil, // 74 J 2011 nil, // 75 K 2012 nil, // 76 L 2013 { 2014 {[]byte{'M', 'M', 000, '*'}, tiff}, 2015 {[]byte{'M', 'T', 'h', 'd'}, mid}, 2016 {[]byte{'M', 'Z', cba, 000, cba, 000}, exe}, 2017 // {[]byte{'M', 'Z', 0x90, 000, 003, 000}, exe}, 2018 // {[]byte{'M', 'Z', 0x78, 000, 001, 000}, exe}, 2019 // {[]byte{'M', 'Z', 'P', 000, 002, 000}, exe}, 2020 }, // 77 M 2021 nil, // 78 N 2022 { 2023 {[]byte{'O', 'g', 'g', 'S'}, ogg}, 2024 }, // 79 O 2025 { 2026 {[]byte{'P', 'K', 003, 004}, zip}, 2027 }, // 80 P 2028 nil, // 81 Q 2029 { 2030 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp}, 2031 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav}, 2032 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi}, 2033 }, // 82 R 2034 { 2035 {sqlite3db, sqlite3}, 2036 }, // 83 S 2037 nil, // 84 T 2038 nil, // 85 U 2039 nil, // 86 V 2040 nil, // 87 W 2041 nil, // 88 X 2042 nil, // 89 Y 2043 nil, // 90 Z 2044 nil, // 91 [ 2045 nil, // 92 \ 2046 nil, // 93 ] 2047 nil, // 94 ^ 2048 nil, // 95 _ 2049 nil, // 96 ` 2050 nil, // 97 a 2051 nil, // 98 b 2052 { 2053 {[]byte{'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf}, 2054 }, // 99 c 2055 nil, // 100 d 2056 nil, // 101 e 2057 { 2058 {[]byte{'f', 'L', 'a', 'C'}, flac}, 2059 }, // 102 f 2060 nil, // 103 g 2061 nil, // 104 h 2062 nil, // 105 i 2063 nil, // 106 j 2064 nil, // 107 k 2065 nil, // 108 l 2066 nil, // 109 m 2067 nil, // 110 n 2068 nil, // 111 o 2069 nil, // 112 p 2070 nil, // 113 q 2071 nil, // 114 r 2072 nil, // 115 s 2073 nil, // 116 t 2074 nil, // 117 u 2075 nil, // 118 v 2076 nil, // 119 w 2077 nil, // 120 x 2078 nil, // 121 y 2079 nil, // 122 z 2080 { 2081 {[]byte{'{', '\\', 'r', 't', 'f'}, rtf}, 2082 }, // 123 { 2083 nil, // 124 | 2084 nil, // 125 } 2085 nil, // 126 2086 { 2087 {[]byte{127, 'E', 'L', 'F'}, elf}, 2088 }, // 127 2089 nil, // 128 2090 nil, // 129 2091 nil, // 130 2092 nil, // 131 2093 nil, // 132 2094 nil, // 133 2095 nil, // 134 2096 nil, // 135 2097 nil, // 136 2098 { 2099 {[]byte{0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png}, 2100 }, // 137 2101 nil, // 138 2102 nil, // 139 2103 nil, // 140 2104 nil, // 141 2105 nil, // 142 2106 nil, // 143 2107 nil, // 144 2108 nil, // 145 2109 nil, // 146 2110 nil, // 147 2111 nil, // 148 2112 nil, // 149 2113 nil, // 150 2114 nil, // 151 2115 nil, // 152 2116 nil, // 153 2117 nil, // 154 2118 nil, // 155 2119 nil, // 156 2120 nil, // 157 2121 nil, // 158 2122 nil, // 159 2123 nil, // 160 2124 nil, // 161 2125 nil, // 162 2126 nil, // 163 2127 nil, // 164 2128 nil, // 165 2129 nil, // 166 2130 nil, // 167 2131 nil, // 168 2132 nil, // 169 2133 nil, // 170 2134 nil, // 171 2135 nil, // 172 2136 nil, // 173 2137 nil, // 174 2138 nil, // 175 2139 nil, // 176 2140 nil, // 177 2141 nil, // 178 2142 nil, // 179 2143 nil, // 180 2144 nil, // 181 2145 nil, // 182 2146 nil, // 183 2147 nil, // 184 2148 nil, // 185 2149 nil, // 186 2150 nil, // 187 2151 nil, // 188 2152 nil, // 189 2153 nil, // 190 2154 nil, // 191 2155 nil, // 192 2156 nil, // 193 2157 nil, // 194 2158 nil, // 195 2159 nil, // 196 2160 nil, // 197 2161 nil, // 198 2162 nil, // 199 2163 nil, // 200 2164 nil, // 201 2165 nil, // 202 2166 nil, // 203 2167 nil, // 204 2168 nil, // 205 2169 nil, // 206 2170 nil, // 207 2171 nil, // 208 2172 nil, // 209 2173 nil, // 210 2174 nil, // 211 2175 nil, // 212 2176 nil, // 213 2177 nil, // 214 2178 nil, // 215 2179 nil, // 216 2180 nil, // 217 2181 nil, // 218 2182 nil, // 219 2183 nil, // 220 2184 nil, // 221 2185 nil, // 222 2186 nil, // 223 2187 nil, // 224 2188 nil, // 225 2189 nil, // 226 2190 nil, // 227 2191 nil, // 228 2192 nil, // 229 2193 nil, // 230 2194 nil, // 231 2195 nil, // 232 2196 nil, // 233 2197 nil, // 234 2198 nil, // 235 2199 nil, // 236 2200 nil, // 237 2201 nil, // 238 2202 nil, // 239 2203 nil, // 240 2204 nil, // 241 2205 nil, // 242 2206 nil, // 243 2207 nil, // 244 2208 nil, // 245 2209 nil, // 246 2210 nil, // 247 2211 nil, // 248 2212 nil, // 249 2213 nil, // 250 2214 nil, // 251 2215 nil, // 252 2216 nil, // 253 2217 nil, // 254 2218 { 2219 {[]byte{0xFF, 0xD8, 0xFF}, jpg}, 2220 {[]byte{0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3}, 2221 {[]byte{0xFF, 0xFB}, mp3}, 2222 }, // 255 2223 } 2224 2225 const debase64Info = ` 2226 debase64 [file/data-URI...] 2227 2228 Decode base64-encoded files and/or data-URIs. 2229 ` 2230 2231 func debase64Main() { 2232 args := os.Args[1:] 2233 2234 if len(args) > 0 { 2235 switch args[0] { 2236 case `-h`, `--h`, `-help`, `--help`: 2237 os.Stdout.WriteString(debase64Info[1:]) 2238 return 2239 2240 case `--`: 2241 args = args[1:] 2242 } 2243 } 2244 2245 if len(args) > 1 { 2246 os.Stderr.WriteString(debase64Info[1:]) 2247 os.Exit(1) 2248 } 2249 2250 name := `-` 2251 if len(args) == 1 { 2252 name = args[0] 2253 } 2254 2255 if err := debase64Run(name); err != nil { 2256 os.Stderr.WriteString(err.Error()) 2257 os.Stderr.WriteString("\n") 2258 os.Exit(1) 2259 } 2260 } 2261 2262 func debase64Run(s string) error { 2263 bw := bufio.NewWriterSize(os.Stdout, bufSize) 2264 defer bw.Flush() 2265 w := bw 2266 2267 if s == `-` { 2268 return debase64(w, os.Stdin) 2269 } 2270 2271 if seemsDataURI(s) { 2272 return debase64(w, strings.NewReader(s)) 2273 } 2274 2275 f, err := os.Open(s) 2276 if err != nil { 2277 return err 2278 } 2279 defer f.Close() 2280 2281 return debase64(w, f) 2282 } 2283 2284 // debase64 decodes base64 chunks explicitly, so decoding errors can be told 2285 // apart from output-writing ones 2286 func debase64(w io.Writer, r io.Reader) error { 2287 br := bufio.NewReaderSize(r, bufSize) 2288 start, err := br.Peek(64) 2289 if err != nil && err != io.EOF { 2290 return err 2291 } 2292 2293 skip, err := skipIntroDataURI(start) 2294 if err != nil { 2295 return err 2296 } 2297 2298 if skip > 0 { 2299 br.Discard(skip) 2300 } 2301 2302 dec := base64.NewDecoder(base64.StdEncoding, br) 2303 _, err = io.Copy(w, dec) 2304 return err 2305 } 2306 2307 func skipIntroDataURI(chunk []byte) (skip int, err error) { 2308 if bytes.HasPrefix(chunk, []byte{0xef, 0xbb, 0xbf}) { 2309 chunk = chunk[3:] 2310 skip += 3 2311 } 2312 2313 if !bytes.HasPrefix(chunk, []byte(`data:`)) { 2314 return skip, nil 2315 } 2316 2317 start := chunk 2318 if len(start) > 64 { 2319 start = start[:64] 2320 } 2321 2322 i := bytes.Index(start, []byte(`;base64,`)) 2323 if i < 0 { 2324 return skip, errors.New(`invalid data URI`) 2325 } 2326 2327 skip += i + len(`;base64,`) 2328 return skip, nil 2329 } 2330 2331 const dedupInfo = ` 2332 dedup [options...] [file...] 2333 2334 2335 DEDUPlicate lines prevents the same line from appearing again in the output, 2336 after the first time. Unique lines are remembered across inputs. 2337 2338 Input is assumed to be UTF-8, and all CRLF byte-pairs are turned into line 2339 feeds by default. 2340 2341 All (optional) leading options start with either single or double-dash: 2342 2343 -h show this help message 2344 -help show this help message 2345 ` 2346 2347 type stringSet map[string]struct{} 2348 2349 func dedupMain() { 2350 buffered := false 2351 args := os.Args[1:] 2352 2353 if len(args) > 0 { 2354 switch args[0] { 2355 case `-b`, `--b`, `-buffered`, `--buffered`: 2356 buffered = true 2357 args = args[1:] 2358 2359 case `-h`, `--h`, `-help`, `--help`: 2360 os.Stdout.WriteString(dedupInfo[1:]) 2361 return 2362 } 2363 } 2364 2365 if len(args) > 0 && args[0] == `--` { 2366 args = args[1:] 2367 } 2368 2369 liveLines := !buffered 2370 if !buffered { 2371 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 2372 liveLines = false 2373 } 2374 } 2375 2376 err := dedupRun(os.Stdout, args, liveLines) 2377 if err != nil && err != errNoMoreOutput { 2378 os.Stderr.WriteString(err.Error()) 2379 os.Stderr.WriteString("\n") 2380 os.Exit(1) 2381 } 2382 } 2383 2384 func dedupRun(w io.Writer, args []string, live bool) error { 2385 files := make(stringSet) 2386 lines := make(stringSet) 2387 bw := bufio.NewWriterSize(w, bufSize) 2388 defer bw.Flush() 2389 2390 for _, name := range args { 2391 if _, ok := files[name]; ok { 2392 continue 2393 } 2394 files[name] = struct{}{} 2395 2396 if err := dedupHandleFile(bw, name, lines, live); err != nil { 2397 return err 2398 } 2399 } 2400 2401 if len(args) == 0 { 2402 return dedup(bw, os.Stdin, lines, live) 2403 } 2404 return nil 2405 } 2406 2407 func dedupHandleFile(w *bufio.Writer, name string, got stringSet, live bool) error { 2408 if name == `` || name == `-` { 2409 return dedup(w, os.Stdin, got, live) 2410 } 2411 2412 f, err := os.Open(name) 2413 if err != nil { 2414 return errors.New(`can't read from file named "` + name + `"`) 2415 } 2416 defer f.Close() 2417 2418 return dedup(w, f, got, live) 2419 } 2420 2421 func dedup(w *bufio.Writer, r io.Reader, got stringSet, live bool) error { 2422 sc := bufio.NewScanner(r) 2423 sc.Buffer(nil, maxLineBufSize) 2424 2425 for sc.Scan() { 2426 line := sc.Text() 2427 if _, ok := got[line]; ok { 2428 continue 2429 } 2430 got[line] = struct{}{} 2431 2432 w.Write(sc.Bytes()) 2433 if w.WriteByte('\n') != nil { 2434 return errNoMoreOutput 2435 } 2436 2437 if !live { 2438 continue 2439 } 2440 2441 if err := w.Flush(); err != nil { 2442 return errNoMoreOutput 2443 } 2444 } 2445 2446 return sc.Err() 2447 } 2448 2449 const dejsonlInfo = ` 2450 dejsonl [filepath...] 2451 2452 Turn JSON Lines (JSONL) into proper-JSON arrays. The JSON Lines format is 2453 simply plain-text lines, where each line is valid JSON on its own. 2454 ` 2455 2456 const dejsonlIndent = ` ` 2457 2458 func dejsonlMain() { 2459 buffered := false 2460 args := os.Args[1:] 2461 2462 if len(args) > 0 { 2463 switch args[0] { 2464 case `-b`, `--b`, `-buffered`, `--buffered`: 2465 buffered = true 2466 args = args[1:] 2467 2468 case `-h`, `--h`, `-help`, `--help`: 2469 os.Stdout.WriteString(dejsonlInfo[1:]) 2470 return 2471 } 2472 } 2473 2474 if len(args) > 0 && args[0] == `--` { 2475 args = args[1:] 2476 } 2477 2478 liveLines := !buffered 2479 if !buffered { 2480 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 2481 liveLines = false 2482 } 2483 } 2484 2485 got := 0 2486 easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { 2487 return dejsonl(w, r, liveLines, &got) 2488 }) 2489 2490 if got == 0 { 2491 os.Stdout.WriteString("[\n]\n") 2492 } else { 2493 os.Stdout.WriteString("\n]\n") 2494 } 2495 } 2496 2497 func dejsonl(w *bufio.Writer, r io.Reader, live bool, got *int) error { 2498 sc := bufio.NewScanner(r) 2499 sc.Buffer(nil, maxLineBufSize) 2500 2501 for i := 0; sc.Scan(); i++ { 2502 s := sc.Text() 2503 if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") { 2504 s = s[3:] 2505 } 2506 2507 // trim spaces at both ends of the current line 2508 for len(s) > 0 && s[0] == ' ' { 2509 s = s[1:] 2510 } 2511 for len(s) > 0 && s[len(s)-1] == ' ' { 2512 s = s[:len(s)-1] 2513 } 2514 2515 // ignore empty(ish) lines 2516 if len(s) == 0 { 2517 continue 2518 } 2519 2520 // ignore lines starting with unix-style comments 2521 if len(s) > 0 && s[0] == '#' { 2522 continue 2523 } 2524 2525 if err := checkJSONL(strings.NewReader(s)); err != nil { 2526 return err 2527 } 2528 2529 if *got == 0 { 2530 w.WriteByte('[') 2531 } else { 2532 w.WriteByte(',') 2533 } 2534 if w.WriteByte('\n') != nil { 2535 return errNoMoreOutput 2536 } 2537 w.WriteString(dejsonlIndent) 2538 w.WriteString(s) 2539 *got++ 2540 2541 if !live { 2542 continue 2543 } 2544 2545 if err := w.Flush(); err != nil { 2546 return errNoMoreOutput 2547 } 2548 } 2549 2550 return sc.Err() 2551 } 2552 2553 func checkJSONL(r io.Reader) error { 2554 dec := json.NewDecoder(r) 2555 // avoid parsing numbers, so unusually-long numbers are kept verbatim, 2556 // even if JSON parsers aren't required to guarantee such input-fidelity 2557 // for numbers 2558 dec.UseNumber() 2559 2560 t, err := dec.Token() 2561 if err == io.EOF { 2562 return errors.New(`input has no JSON values`) 2563 } 2564 2565 if err := checkToken(dec, t); err != nil { 2566 return err 2567 } 2568 2569 _, err = dec.Token() 2570 if err == io.EOF { 2571 // input is over, so it's a success 2572 return nil 2573 } 2574 2575 if err == nil { 2576 // a successful `read` is a failure, as it means there are 2577 // trailing JSON tokens 2578 return errors.New(`unexpected trailing data`) 2579 } 2580 2581 // any other error, perhaps some invalid-JSON-syntax-type error 2582 return err 2583 } 2584 2585 func checkToken(dec *json.Decoder, t json.Token) error { 2586 switch t := t.(type) { 2587 case json.Delim: 2588 switch t { 2589 case json.Delim('['): 2590 return checkArray(dec) 2591 case json.Delim('{'): 2592 return checkObject(dec) 2593 default: 2594 return errors.New(`unsupported JSON syntax ` + string(t)) 2595 } 2596 2597 case nil, bool, float64, json.Number, string: 2598 return nil 2599 2600 default: 2601 // return fmt.Errorf(`unsupported token type %T`, t) 2602 return errors.New(`invalid JSON token`) 2603 } 2604 } 2605 2606 func checkArray(dec *json.Decoder) error { 2607 for { 2608 t, err := dec.Token() 2609 if err != nil { 2610 return err 2611 } 2612 2613 if t == json.Delim(']') { 2614 return nil 2615 } 2616 2617 if err := checkToken(dec, t); err != nil { 2618 return err 2619 } 2620 } 2621 } 2622 2623 func checkObject(dec *json.Decoder) error { 2624 for { 2625 t, err := dec.Token() 2626 if err != nil { 2627 return err 2628 } 2629 2630 if t == json.Delim('}') { 2631 return nil 2632 } 2633 2634 if _, ok := t.(string); !ok { 2635 return errors.New(`expected a string for a key-value pair`) 2636 } 2637 2638 t, err = dec.Token() 2639 if err == io.EOF || t == json.Delim('}') { 2640 return errors.New(`expected a value for a key-value pair`) 2641 } 2642 2643 if err := checkToken(dec, t); err != nil { 2644 return err 2645 } 2646 } 2647 } 2648 2649 const dessvInfo = ` 2650 dessv [filenames...] 2651 2652 Turn Space(s)-Separated Values (SSV) into Tab-Separated Values (TSV), where 2653 both leading and trailing spaces from input lines are ignored. 2654 ` 2655 2656 func dessvMain() { 2657 buffered := false 2658 args := os.Args[1:] 2659 2660 if len(args) > 0 { 2661 switch args[0] { 2662 case `-b`, `--b`, `-buffered`, `--buffered`: 2663 buffered = true 2664 args = args[1:] 2665 2666 case `-h`, `--h`, `-help`, `--help`: 2667 os.Stdout.WriteString(dessvInfo[1:]) 2668 return 2669 } 2670 } 2671 2672 if len(args) > 0 && args[0] == `--` { 2673 args = args[1:] 2674 } 2675 2676 liveLines := !buffered 2677 if !buffered { 2678 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 2679 liveLines = false 2680 } 2681 } 2682 2683 easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { 2684 return dessv(w, r, liveLines) 2685 }) 2686 } 2687 2688 func dessv(w *bufio.Writer, r io.Reader, live bool) error { 2689 sc := bufio.NewScanner(r) 2690 sc.Buffer(nil, maxLineBufSize) 2691 handleRow := dessvHandleRowSSV 2692 numTabs := ^0 2693 2694 for i := 0; sc.Scan(); i++ { 2695 s := sc.Bytes() 2696 if i == 0 { 2697 if bytes.HasPrefix(s, []byte{0xef, 0xbb, 0xbf}) { 2698 s = s[3:] 2699 } 2700 2701 for _, b := range s { 2702 if b == '\t' { 2703 handleRow = dessvHandleRowTSV 2704 break 2705 } 2706 } 2707 numTabs = handleRow(w, s, numTabs) 2708 } else { 2709 handleRow(w, s, numTabs) 2710 } 2711 2712 if w.WriteByte('\n') != nil { 2713 return errNoMoreOutput 2714 } 2715 2716 if !live { 2717 continue 2718 } 2719 2720 if err := w.Flush(); err != nil { 2721 return errNoMoreOutput 2722 } 2723 } 2724 2725 return sc.Err() 2726 } 2727 2728 func dessvHandleRowSSV(w *bufio.Writer, s []byte, n int) int { 2729 for len(s) > 0 && s[0] == ' ' { 2730 s = s[1:] 2731 } 2732 for len(s) > 0 && s[len(s)-1] == ' ' { 2733 s = s[:len(s)-1] 2734 } 2735 2736 got := 0 2737 2738 for got = 0; len(s) > 0; got++ { 2739 if got > 0 { 2740 w.WriteByte('\t') 2741 } 2742 2743 i := bytes.IndexByte(s, ' ') 2744 if i < 0 { 2745 w.Write(s) 2746 s = nil 2747 n-- 2748 break 2749 } 2750 2751 w.Write(s[:i]) 2752 s = s[i+1:] 2753 for len(s) > 0 && s[0] == ' ' { 2754 s = s[1:] 2755 } 2756 n-- 2757 } 2758 2759 w.Write(s) 2760 writeTabs(w, n) 2761 return got 2762 } 2763 2764 func dessvHandleRowTSV(w *bufio.Writer, s []byte, n int) int { 2765 got := 0 2766 for _, b := range s { 2767 if b == '\t' { 2768 got++ 2769 } 2770 } 2771 2772 w.Write(s) 2773 writeTabs(w, n-got) 2774 return got 2775 } 2776 2777 const eraseInfo = ` 2778 erase [options...] [regexes...] 2779 2780 2781 Ignore/remove all occurrences of all regex matches along lines read from the 2782 standard input. The regular-expression mode used is "re2", which is a superset 2783 of the commonly-used "extended-mode". 2784 2785 Regexes always avoid matching any ANSI-style sequences, to avoid messing those 2786 up. Each regex erases all its occurrences on the current line in the order 2787 given among the arguments, so regex-order matters. 2788 2789 The options are, available both in single and double-dash versions 2790 2791 -h show this help message 2792 -help show this help message 2793 2794 -i match regexes case-insensitively 2795 -ins match regexes case-insensitively 2796 ` 2797 2798 func eraseMain() { 2799 args := os.Args[1:] 2800 buffered := false 2801 insensitive := false 2802 2803 out: 2804 for len(args) > 0 { 2805 switch args[0] { 2806 case `-b`, `--b`, `-buffered`, `--buffered`: 2807 buffered = true 2808 args = args[1:] 2809 2810 case `-h`, `--h`, `-help`, `--help`: 2811 os.Stdout.WriteString(eraseInfo[1:]) 2812 return 2813 2814 case `-i`, `--i`, `-ins`, `--ins`: 2815 insensitive = true 2816 args = args[1:] 2817 2818 default: 2819 break out 2820 } 2821 } 2822 2823 if len(args) > 0 && args[0] == `--` { 2824 args = args[1:] 2825 } 2826 2827 exprs := make([]*regexp.Regexp, 0, len(args)) 2828 2829 for _, s := range args { 2830 var err error 2831 var exp *regexp.Regexp 2832 2833 if insensitive { 2834 exp, err = regexp.Compile(`(?i)` + s) 2835 } else { 2836 exp, err = regexp.Compile(s) 2837 } 2838 2839 if err != nil { 2840 os.Stderr.WriteString(err.Error()) 2841 os.Stderr.WriteString("\n") 2842 continue 2843 } 2844 2845 exprs = append(exprs, exp) 2846 } 2847 2848 // quit right away when given invalid regexes 2849 if len(exprs) < len(args) { 2850 os.Exit(1) 2851 } 2852 2853 liveLines := !buffered 2854 if !buffered { 2855 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 2856 liveLines = false 2857 } 2858 } 2859 2860 sc := bufio.NewScanner(os.Stdin) 2861 sc.Buffer(nil, maxLineBufSize) 2862 bw := bufio.NewWriterSize(os.Stdout, bufSize) 2863 defer bw.Flush() 2864 2865 var src []byte 2866 var dst []byte 2867 2868 for i := 0; sc.Scan(); i++ { 2869 s := sc.Bytes() 2870 if i == 0 && hasPrefixBOM(s) { 2871 s = s[3:] 2872 } 2873 2874 src = append(src[:0], s...) 2875 for _, e := range exprs { 2876 dst = erase(dst[:0], src, e) 2877 src = append(src[:0], dst...) 2878 } 2879 2880 bw.Write(dst) 2881 2882 if err := bw.WriteByte('\n'); err != nil { 2883 return 2884 } 2885 2886 if !liveLines { 2887 continue 2888 } 2889 2890 if err := bw.Flush(); err != nil { 2891 return 2892 } 2893 } 2894 2895 handleError(bw, sc.Err()) 2896 } 2897 2898 func erase(dst []byte, src []byte, with *regexp.Regexp) []byte { 2899 for len(src) > 0 { 2900 i, j := indexEscapeSequence(src) 2901 if i < 0 { 2902 dst = eraseHandleChunk(dst, src, with) 2903 break 2904 } 2905 if j < 0 { 2906 j = len(src) 2907 } 2908 2909 dst = eraseHandleChunk(dst, src[:i], with) 2910 dst = append(dst, src[i:j]...) 2911 src = src[j:] 2912 } 2913 2914 return dst 2915 } 2916 2917 func eraseHandleChunk(dst []byte, src []byte, with *regexp.Regexp) []byte { 2918 for len(src) > 0 { 2919 span := with.FindIndex(src) 2920 if span == nil { 2921 return append(dst, src...) 2922 } 2923 2924 start := span[0] 2925 end := span[1] 2926 2927 dst = append(dst, src[:start]...) 2928 // avoid infinite loops caused by empty regex matches 2929 if start == end && end < len(src) { 2930 dst = append(dst, src[end]) 2931 end++ 2932 } 2933 src = src[end:] 2934 } 2935 2936 return dst 2937 } 2938 2939 const fixlinesInfo = ` 2940 fixlines [options...] [filepaths...] 2941 2942 This tool fixes lines in UTF-8 text, ignoring leading UTF-8 BOMs, trailing 2943 carriage-returns on all lines, and ensures no lines across inputs are 2944 accidentally joined, since all lines it outputs end with line-feeds, 2945 even when the original files don't. 2946 2947 The only option available is to show this help message, using any of 2948 "-h", "--h", "-help", or "--help", without the quotes. 2949 ` 2950 2951 func fixlinesMain() { 2952 buffered := false 2953 args := os.Args[1:] 2954 2955 if len(args) > 0 { 2956 switch args[0] { 2957 case `-b`, `--b`, `-buffered`, `--buffered`: 2958 buffered = true 2959 args = args[1:] 2960 2961 case `-h`, `--h`, `-help`, `--help`: 2962 os.Stdout.WriteString(fixlinesInfo[1:]) 2963 return 2964 } 2965 } 2966 2967 if len(args) > 0 && args[0] == `--` { 2968 args = args[1:] 2969 } 2970 2971 liveLines := !buffered 2972 if !buffered { 2973 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 2974 liveLines = false 2975 } 2976 } 2977 2978 easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { 2979 return fixLines(w, r, liveLines) 2980 }) 2981 } 2982 2983 func fixLines(w *bufio.Writer, r io.Reader, live bool) error { 2984 sc := bufio.NewScanner(r) 2985 sc.Buffer(nil, maxLineBufSize) 2986 2987 for i := 0; sc.Scan(); i++ { 2988 s := sc.Bytes() 2989 2990 // ignore leading UTF-8 BOM on the first line 2991 if i == 0 && hasPrefixBOM(s) { 2992 s = s[3:] 2993 } 2994 2995 // trim trailing spaces on the current line 2996 for len(s) > 0 && s[len(s)-1] == ' ' { 2997 s = s[:len(s)-1] 2998 } 2999 3000 w.Write(s) 3001 if w.WriteByte('\n') != nil { 3002 return errNoMoreOutput 3003 } 3004 3005 if !live { 3006 continue 3007 } 3008 3009 if err := w.Flush(); err != nil { 3010 return errNoMoreOutput 3011 } 3012 } 3013 3014 return sc.Err() 3015 } 3016 3017 const himaInfo = ` 3018 hima [options...] [regexes...] 3019 3020 3021 HIlight MAtches ANSI-styles matching regular expressions along lines read 3022 from the standard input. The regular-expression mode used is "re2", which 3023 is a superset of the commonly-used "extended-mode". 3024 3025 Regexes always avoid matching any ANSI-style sequences, to avoid messing 3026 those up. Also, multiple matches in a line never overlap: at each step 3027 along a line, the earliest-starting match among the regexes always wins, 3028 as the order regexes are given among the arguments never matters. 3029 3030 The options are, available both in single and double-dash versions 3031 3032 -h show this help message 3033 -help show this help message 3034 3035 -i match regexes case-insensitively 3036 -ins match regexes case-insensitively 3037 ` 3038 3039 const highlightStyle = "\x1b[7m" 3040 3041 func himaMain() { 3042 buffered := false 3043 insensitive := false 3044 args := os.Args[1:] 3045 3046 if len(args) > 0 { 3047 switch args[0] { 3048 case `-h`, `--h`, `-help`, `--help`: 3049 os.Stdout.WriteString(himaInfo[1:]) 3050 return 3051 } 3052 } 3053 3054 out: 3055 for len(args) > 0 { 3056 switch args[0] { 3057 case `-b`, `--b`, `-buffered`, `--buffered`: 3058 buffered = true 3059 args = args[1:] 3060 3061 case `-i`, `--i`, `-ins`, `--ins`: 3062 insensitive = true 3063 args = args[1:] 3064 3065 default: 3066 break out 3067 } 3068 } 3069 3070 if len(args) > 0 && args[0] == `--` { 3071 args = args[1:] 3072 } 3073 3074 exprs := make([]*regexp.Regexp, 0, len(args)) 3075 3076 for _, s := range args { 3077 var err error 3078 var exp *regexp.Regexp 3079 3080 if insensitive { 3081 exp, err = regexp.Compile(`(?i)` + s) 3082 } else { 3083 exp, err = regexp.Compile(s) 3084 } 3085 3086 if err != nil { 3087 os.Stderr.WriteString(err.Error()) 3088 os.Stderr.WriteString("\n") 3089 continue 3090 } 3091 3092 exprs = append(exprs, exp) 3093 } 3094 3095 // quit right away when given invalid regexes 3096 if len(exprs) < len(args) { 3097 os.Exit(1) 3098 } 3099 3100 liveLines := !buffered 3101 if !buffered { 3102 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 3103 liveLines = false 3104 } 3105 } 3106 3107 sc := bufio.NewScanner(os.Stdin) 3108 sc.Buffer(nil, maxLineBufSize) 3109 bw := bufio.NewWriterSize(os.Stdout, bufSize) 3110 defer bw.Flush() 3111 3112 for i := 0; sc.Scan(); i++ { 3113 s := sc.Bytes() 3114 if i == 0 && hasPrefixBOM(s) { 3115 s = s[3:] 3116 } 3117 3118 for len(s) > 0 { 3119 i, j := indexEscapeSequence(s) 3120 if i < 0 { 3121 himaHandleChunk(bw, s, exprs) 3122 break 3123 } 3124 if j < 0 { 3125 j = len(s) 3126 } 3127 3128 himaHandleChunk(bw, s[:i], exprs) 3129 bw.Write(s[i:j]) 3130 3131 s = s[j:] 3132 } 3133 3134 if err := bw.WriteByte('\n'); err != nil { 3135 return 3136 } 3137 3138 if !liveLines { 3139 continue 3140 } 3141 3142 if err := bw.Flush(); err != nil { 3143 return 3144 } 3145 } 3146 3147 handleError(bw, sc.Err()) 3148 } 3149 3150 // note: looking at the results of restoring ANSI-styles after style-resets 3151 // doesn't seem to be worth it, as a previous version used to do 3152 3153 // himaHandleChunk handles line-slices around any detected ANSI-style sequences, 3154 // or even whole lines, when no ANSI-styles are found in them 3155 func himaHandleChunk(w *bufio.Writer, s []byte, with []*regexp.Regexp) { 3156 start := -1 3157 end := -1 3158 3159 for len(s) > 0 { 3160 start = -1 3161 for _, e := range with { 3162 span := e.FindIndex(s) 3163 // also ignore empty regex matches to avoid infinite outer loops, 3164 // as skipping empty slices isn't advancing at all, leaving the 3165 // string stuck to being empty-matched forever by the same regex 3166 if span == nil || span[0] == span[1] { 3167 continue 3168 } 3169 3170 if span[0] < start || start < 0 { 3171 start = span[0] 3172 end = span[1] 3173 } 3174 } 3175 3176 if start < 0 { 3177 w.Write(s) 3178 return 3179 } 3180 3181 w.Write(s[:start]) 3182 w.WriteString(highlightStyle) 3183 w.Write(s[start:end]) 3184 w.WriteString("\x1b[0m") 3185 3186 s = s[end:] 3187 } 3188 } 3189 3190 const json0Info = ` 3191 json0 [options...] [file...] 3192 3193 3194 JSON-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output. 3195 Its output is always a single line, which ends with a line-feed. 3196 3197 Besides minimizing bytes, this tool also adapts almost-JSON input into 3198 valid JSON, since it 3199 3200 - ignores both rest-of-line and multi-line comments 3201 - ignores extra/trailing commas in arrays and objects 3202 - turns single-quoted strings/keys into double-quoted strings 3203 - double-quotes unquoted object keys 3204 - changes \x 2-hex-digit into \u 4-hex-digit string-escapes 3205 3206 All options available can either start with a single or a double-dash 3207 3208 -h show this help message 3209 -help show this help message 3210 -jsonl emit JSON Lines, when top-level value is an array 3211 ` 3212 3213 const json0ChunkPeekSize = 16 3214 3215 func json0Main() { 3216 args := os.Args[1:] 3217 buffered := false 3218 handler := json0 3219 3220 out: 3221 for len(args) > 0 { 3222 switch args[0] { 3223 case `-b`, `--b`, `-buffered`, `--buffered`: 3224 buffered = true 3225 args = args[1:] 3226 continue 3227 3228 case `-h`, `--h`, `-help`, `--help`: 3229 os.Stdout.WriteString(json0Info[1:]) 3230 return 3231 3232 case `-jsonl`, `--jsonl`: 3233 handler = jsonl0 3234 args = args[1:] 3235 continue 3236 3237 default: 3238 break out 3239 } 3240 } 3241 3242 if len(args) > 0 && args[0] == `--` { 3243 args = args[1:] 3244 } 3245 3246 if len(args) > 1 { 3247 const msg = "multiple inputs aren't allowed\n" 3248 os.Stderr.WriteString(msg) 3249 os.Exit(1) 3250 } 3251 3252 liveLines := !buffered 3253 if !buffered { 3254 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 3255 liveLines = false 3256 } 3257 } 3258 3259 // figure out whether input should come from a named file or from stdin 3260 var paths [1]string 3261 paths[0] = `-` 3262 if len(args) > 0 { 3263 paths[0] = args[0] 3264 } 3265 3266 easyboxRun(paths[:], func(w *bufio.Writer, r io.Reader, name string) error { 3267 br := bufio.NewReaderSize(r, bufSize) 3268 defer w.Flush() 3269 return handler(w, br, liveLines) 3270 }) 3271 } 3272 3273 type handlerFunc func(w *bufio.Writer, r *bufio.Reader, live bool) error 3274 3275 var ( 3276 errCommentEarlyEnd = errors.New(`unexpected early-end of comment`) 3277 errInputEarlyEnd = errors.New(`expected end of input data`) 3278 errInvalidComment = errors.New(`expected / or *`) 3279 errInvalidHex = errors.New(`expected a base-16 digit`) 3280 errInvalidRune = errors.New(`invalid UTF-8 bytes`) 3281 errInvalidToken = errors.New(`invalid JSON token`) 3282 errNoDigits = errors.New(`expected numeric digits`) 3283 errNoStringQuote = errors.New(`expected " or '`) 3284 errNoArrayComma = errors.New(`missing comma between array values`) 3285 errNoObjectComma = errors.New(`missing comma between key-value pairs`) 3286 errStringEarlyEnd = errors.New(`unexpected early-end of string`) 3287 errExtraBytes = errors.New(`unexpected extra input bytes`) 3288 ) 3289 3290 // linePosError is a more descriptive kind of error, showing the source of 3291 // the input-related problem, as 1-based a line/pos number pair in front 3292 // of the error message 3293 type linePosError struct { 3294 // line is the 1-based line count from the input 3295 line int 3296 3297 // pos is the 1-based `horizontal` position in its line 3298 pos int 3299 3300 // err is the error message to `decorate` with the position info 3301 err error 3302 } 3303 3304 // Error satisfies the error interface 3305 func (lpe linePosError) Error() string { 3306 where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos) 3307 return where + `: ` + lpe.err.Error() 3308 } 3309 3310 // isIdentifier improves control-flow of func json0HandleKey, when it handles 3311 // unquoted object keys 3312 var isIdentifier = [256]bool{ 3313 '_': true, 3314 3315 '0': true, '1': true, '2': true, '3': true, '4': true, 3316 '5': true, '6': true, '7': true, '8': true, '9': true, 3317 3318 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 3319 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 3320 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 3321 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 3322 'Y': true, 'Z': true, 3323 3324 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 3325 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 3326 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 3327 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 3328 'y': true, 'z': true, 3329 } 3330 3331 // matchHex both figures out if a byte is a valid ASCII hex-digit, by not 3332 // being 0, and normalizes letter-case for the hex letters 3333 var matchHex = [256]byte{ 3334 '0': '0', '1': '1', '2': '2', '3': '3', '4': '4', 3335 '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', 3336 'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F', 3337 'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F', 3338 } 3339 3340 // json0 converts JSON/pseudo-JSON into (valid) minimal JSON; final boolean 3341 // value isn't used, and is just there to match the signature of func jsonl 3342 func json0(w *bufio.Writer, r *bufio.Reader, live bool) error { 3343 jr := jsonReader{r, 1, 1} 3344 defer w.Flush() 3345 3346 if err := jr.handleLeadingJunk(); err != nil { 3347 return err 3348 } 3349 3350 // handle a single top-level JSON value 3351 err := json0HandleValue(w, &jr) 3352 3353 // end the only output-line with a line-feed; this also avoids showing 3354 // error messages on the same line as the main output, since JSON-0 3355 // output has no line-feeds before its last byte 3356 outputByte(w, '\n') 3357 3358 if err != nil { 3359 return err 3360 } 3361 return jr.handleTrailingJunk() 3362 } 3363 3364 // jsonl0 converts JSON/pseudo-JSON into (valid) minimal JSON Lines; this func 3365 // avoids writing a trailing line-feed, leaving that up to its caller 3366 func jsonl0(w *bufio.Writer, r *bufio.Reader, live bool) error { 3367 jr := jsonReader{r, 1, 1} 3368 3369 if err := jr.handleLeadingJunk(); err != nil { 3370 return err 3371 } 3372 3373 chunk, err := jr.r.Peek(1) 3374 if err == nil && len(chunk) >= 1 { 3375 switch b := chunk[0]; b { 3376 case '[', '(': 3377 return json0HandleArrayJSONL(w, &jr, b, live) 3378 } 3379 } 3380 3381 // handle a single top-level JSON value 3382 err = json0HandleValue(w, &jr) 3383 3384 // end the only output-line with a line-feed; this also avoids showing 3385 // error messages on the same line as the main output, since JSON-0 3386 // output has no line-feeds before its last byte 3387 outputByte(w, '\n') 3388 3389 if err != nil { 3390 return err 3391 } 3392 return jr.handleTrailingJunk() 3393 } 3394 3395 // json0HandleArrayJSONL handles top-level arrays for func jsonl0 3396 func json0HandleArrayJSONL(w *bufio.Writer, jr *jsonReader, start byte, live bool) error { 3397 if err := jr.demandSyntax(start); err != nil { 3398 return err 3399 } 3400 3401 var end byte = ']' 3402 if start == '(' { 3403 end = ')' 3404 } 3405 3406 for n := 0; true; n++ { 3407 // there may be whitespace/comments before the next comma 3408 if err := jr.seekNext(); err != nil { 3409 return err 3410 } 3411 3412 // handle commas between values, as well as trailing ones 3413 comma := false 3414 b, _ := jr.peekByte() 3415 if b == ',' { 3416 jr.readByte() 3417 comma = true 3418 3419 // there may be whitespace/comments before an ending ']' 3420 if err := jr.seekNext(); err != nil { 3421 return err 3422 } 3423 b, _ = jr.peekByte() 3424 } 3425 3426 // handle end of array 3427 if b == end { 3428 jr.readByte() 3429 if n > 0 { 3430 err := outputByte(w, '\n') 3431 if live { 3432 w.Flush() 3433 } 3434 return err 3435 } 3436 return nil 3437 } 3438 3439 // turn commas between adjacent values into line-feeds, as the 3440 // output for this custom func is supposed to be JSON Lines 3441 if n > 0 { 3442 if !comma { 3443 return errNoArrayComma 3444 } 3445 if err := outputByte(w, '\n'); err != nil { 3446 return err 3447 } 3448 if live { 3449 w.Flush() 3450 } 3451 } 3452 3453 // handle the next value 3454 if err := jr.seekNext(); err != nil { 3455 return err 3456 } 3457 if err := json0HandleValue(w, jr); err != nil { 3458 return err 3459 } 3460 } 3461 3462 // make the compiler happy 3463 return nil 3464 } 3465 3466 // jsonReader reads data via a buffer, keeping track of the input position: 3467 // this in turn allows showing much more useful errors, when these happen 3468 type jsonReader struct { 3469 // r is the actual reader 3470 r *bufio.Reader 3471 3472 // line is the 1-based line-counter for input bytes, and gives errors 3473 // useful position info 3474 line int 3475 3476 // pos is the 1-based `horizontal` position in its line, and gives 3477 // errors useful position info 3478 pos int 3479 } 3480 3481 // improveError makes any error more useful, by giving it info about the 3482 // current input-position, as a 1-based line/within-line-position pair 3483 func (jr jsonReader) improveError(err error) error { 3484 if _, ok := err.(linePosError); ok { 3485 return err 3486 } 3487 3488 if err == io.EOF { 3489 return linePosError{jr.line, jr.pos, errInputEarlyEnd} 3490 } 3491 if err != nil { 3492 return linePosError{jr.line, jr.pos, err} 3493 } 3494 return nil 3495 } 3496 3497 func (jr *jsonReader) handleLeadingJunk() error { 3498 // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order 3499 // mark) gives no useful info if present, as UTF-8 leaves no ambiguity 3500 // about byte-order by design 3501 jr.skipUTF8BOM() 3502 3503 // ignore leading whitespace and/or comments 3504 return jr.seekNext() 3505 } 3506 3507 func (jr *jsonReader) handleTrailingJunk() error { 3508 // ignore trailing whitespace and/or comments 3509 if err := jr.seekNext(); err != nil { 3510 return err 3511 } 3512 3513 // ignore trailing semicolons 3514 for { 3515 if b, ok := jr.peekByte(); !ok || b != ';' { 3516 break 3517 } 3518 3519 jr.readByte() 3520 // ignore trailing whitespace and/or comments 3521 if err := jr.seekNext(); err != nil { 3522 return err 3523 } 3524 } 3525 3526 // beyond trailing whitespace and/or comments, any more bytes 3527 // make the whole input data invalid JSON 3528 if _, ok := jr.peekByte(); ok { 3529 return jr.improveError(errExtraBytes) 3530 } 3531 return nil 3532 } 3533 3534 // demandSyntax fails with an error when the next byte isn't the one given; 3535 // when it is, the byte is then read/skipped, and a nil error is returned 3536 func (jr *jsonReader) demandSyntax(syntax byte) error { 3537 chunk, err := jr.r.Peek(1) 3538 if err == io.EOF { 3539 return jr.improveError(errInputEarlyEnd) 3540 } 3541 if err != nil { 3542 return jr.improveError(err) 3543 } 3544 3545 if len(chunk) < 1 || chunk[0] != syntax { 3546 msg := `expected ` + string(rune(syntax)) 3547 return jr.improveError(errors.New(msg)) 3548 } 3549 3550 jr.readByte() 3551 return nil 3552 } 3553 3554 // updatePosInfo does what it says, given the byte just read separately 3555 func (jr *jsonReader) updatePosInfo(r rune) { 3556 if r == '\n' { 3557 jr.line += 1 3558 jr.pos = 1 3559 } else { 3560 jr.pos++ 3561 } 3562 } 3563 3564 // peekByte simplifies control-flow for various other funcs 3565 func (jr jsonReader) peekByte() (b byte, ok bool) { 3566 chunk, err := jr.r.Peek(1) 3567 if err == nil && len(chunk) >= 1 { 3568 return chunk[0], true 3569 } 3570 return 0, false 3571 } 3572 3573 // readByte does what it says, updating the reader's position info 3574 func (jr *jsonReader) readByte() (b byte, err error) { 3575 b, err = jr.r.ReadByte() 3576 if err == nil { 3577 jr.updatePosInfo(rune(b)) 3578 return b, nil 3579 } 3580 return b, jr.improveError(err) 3581 } 3582 3583 // readRune does what it says, updating the reader's position info 3584 func (jr *jsonReader) readRune() (r rune, err error) { 3585 r, _, err = jr.r.ReadRune() 3586 if err == nil { 3587 jr.updatePosInfo(r) 3588 return r, nil 3589 } 3590 return r, jr.improveError(err) 3591 } 3592 3593 // seekNext skips/seeks the next token, ignoring runs of whitespace symbols 3594 // and comments, either single-line (starting with //) or general (starting 3595 // with /* and ending with */) 3596 func (jr *jsonReader) seekNext() error { 3597 for { 3598 b, ok := jr.peekByte() 3599 if !ok { 3600 return nil 3601 } 3602 3603 // case ' ', '\t', '\f', '\v', '\r', '\n': 3604 if b <= 32 { 3605 // keep skipping whitespace bytes 3606 jr.readByte() 3607 continue 3608 } 3609 3610 if b == '#' { 3611 if err := jr.skipLine(); err != nil { 3612 return err 3613 } 3614 continue 3615 } 3616 3617 if b != '/' { 3618 // reached the next token 3619 return nil 3620 } 3621 3622 if err := jr.skipComment(); err != nil { 3623 return err 3624 } 3625 3626 // after comments, keep looking for more whitespace and/or comments 3627 } 3628 } 3629 3630 // skipComment helps func seekNext skip over comments, simplifying the latter 3631 // func's control-flow 3632 func (jr *jsonReader) skipComment() error { 3633 err := jr.demandSyntax('/') 3634 if err != nil { 3635 return err 3636 } 3637 3638 b, ok := jr.peekByte() 3639 if !ok { 3640 return nil 3641 } 3642 3643 switch b { 3644 case '/': 3645 // handle single-line comments 3646 return jr.skipLine() 3647 3648 case '*': 3649 // handle (potentially) multi-line comments 3650 return jr.skipGeneralComment() 3651 3652 default: 3653 return jr.improveError(errInvalidComment) 3654 } 3655 } 3656 3657 // skipLine handles single-line comments for func skipComment 3658 func (jr *jsonReader) skipLine() error { 3659 for { 3660 b, err := jr.readByte() 3661 if err == io.EOF { 3662 // end of input is fine in this case 3663 return nil 3664 } 3665 if err != nil { 3666 return err 3667 } 3668 3669 if b == '\n' { 3670 return nil 3671 } 3672 } 3673 } 3674 3675 // skipGeneralComment handles (potentially) multi-line comments for func 3676 // skipComment 3677 func (jr *jsonReader) skipGeneralComment() error { 3678 var prev byte 3679 for { 3680 b, err := jr.readByte() 3681 if err != nil { 3682 return jr.improveError(errCommentEarlyEnd) 3683 } 3684 3685 if prev == '*' && b == '/' { 3686 return nil 3687 } 3688 if b == '\n' { 3689 jr.line++ 3690 } 3691 prev = b 3692 } 3693 } 3694 3695 // skipUTF8BOM does what it says, if a UTF-8 BOM is present 3696 func (jr *jsonReader) skipUTF8BOM() { 3697 lead, err := jr.r.Peek(3) 3698 if err != nil { 3699 return 3700 } 3701 3702 if len(lead) > 2 && lead[0] == 0xef && lead[1] == 0xbb && lead[2] == 0xbf { 3703 jr.readByte() 3704 jr.readByte() 3705 jr.readByte() 3706 jr.pos += 3 3707 } 3708 } 3709 3710 // outputByte is a small wrapper on func WriteByte, which adapts any error 3711 // into a custom dummy output-error, which is in turn meant to be ignored, 3712 // being just an excuse to quit the app immediately and successfully 3713 func outputByte(w *bufio.Writer, b byte) error { 3714 err := w.WriteByte(b) 3715 if err == nil { 3716 return nil 3717 } 3718 return errNoMoreOutput 3719 } 3720 3721 func json0HandleArray(w *bufio.Writer, jr *jsonReader, start byte) error { 3722 if err := jr.demandSyntax(start); err != nil { 3723 return err 3724 } 3725 3726 var end byte = ']' 3727 if start == '(' { 3728 end = ')' 3729 } 3730 3731 w.WriteByte('[') 3732 3733 for n := 0; true; n++ { 3734 // there may be whitespace/comments before the next comma 3735 if err := jr.seekNext(); err != nil { 3736 return err 3737 } 3738 3739 // handle commas between values, as well as trailing ones 3740 comma := false 3741 b, _ := jr.peekByte() 3742 if b == ',' { 3743 jr.readByte() 3744 comma = true 3745 3746 // there may be whitespace/comments before an ending ']' 3747 if err := jr.seekNext(); err != nil { 3748 return err 3749 } 3750 b, _ = jr.peekByte() 3751 } 3752 3753 // handle end of array 3754 if b == end { 3755 jr.readByte() 3756 w.WriteByte(']') 3757 return nil 3758 } 3759 3760 // don't forget commas between adjacent values 3761 if n > 0 { 3762 if !comma { 3763 return errNoArrayComma 3764 } 3765 if err := outputByte(w, ','); err != nil { 3766 return err 3767 } 3768 } 3769 3770 // handle the next value 3771 if err := jr.seekNext(); err != nil { 3772 return err 3773 } 3774 if err := json0HandleValue(w, jr); err != nil { 3775 return err 3776 } 3777 } 3778 3779 // make the compiler happy 3780 return nil 3781 } 3782 3783 // json0HandleDigits helps various number-handling funcs do their job 3784 func json0HandleDigits(w *bufio.Writer, jr *jsonReader) error { 3785 if json0TrySimpleDigits(w, jr) { 3786 return nil 3787 } 3788 3789 for n := 0; true; n++ { 3790 b, _ := jr.peekByte() 3791 3792 // support `nice` long numbers by ignoring their underscores 3793 if b == '_' { 3794 jr.readByte() 3795 continue 3796 } 3797 3798 if '0' <= b && b <= '9' { 3799 jr.readByte() 3800 w.WriteByte(b) 3801 continue 3802 } 3803 3804 if n == 0 { 3805 return errNoDigits 3806 } 3807 return nil 3808 } 3809 3810 // make the compiler happy 3811 return nil 3812 } 3813 3814 // json0TrySimpleDigits tries to handle (more quickly) digit-runs where all 3815 // bytes are just digits: this is a very common case for numbers; returns 3816 // whether it succeeded, so this func's caller knows knows if it needs to do 3817 // anything, the slower way 3818 func json0TrySimpleDigits(w *bufio.Writer, jr *jsonReader) (gotIt bool) { 3819 chunk, _ := jr.r.Peek(json0ChunkPeekSize) 3820 3821 for i, b := range chunk { 3822 if '0' <= b && b <= '9' { 3823 continue 3824 } 3825 3826 if i == 0 || b == '_' { 3827 return false 3828 } 3829 3830 // bulk-writing the chunk is this func's whole point 3831 w.Write(chunk[:i]) 3832 3833 jr.r.Discard(i) 3834 jr.pos += i 3835 return true 3836 } 3837 3838 // maybe the digits-run is ok, but it's just longer than the chunk 3839 return false 3840 } 3841 3842 // json0HandleDot handles pseudo-JSON numbers which start with a decimal dot 3843 func json0HandleDot(w *bufio.Writer, jr *jsonReader) error { 3844 if err := jr.demandSyntax('.'); err != nil { 3845 return err 3846 } 3847 w.Write([]byte{'0', '.'}) 3848 return json0HandleDigits(w, jr) 3849 } 3850 3851 // json0HandleKey is used by func json0HandleObjects and generalizes func 3852 // json0HandleString, by allowing unquoted object keys; it's not used anywhere 3853 // else, as allowing unquoted string values is ambiguous with the actual 3854 // JSON-keyword values null, false, and true. 3855 func json0HandleKey(w *bufio.Writer, jr *jsonReader) error { 3856 quote, ok := jr.peekByte() 3857 if !ok { 3858 return jr.improveError(errStringEarlyEnd) 3859 } 3860 3861 if quote == '"' || quote == '\'' { 3862 return json0HandleString(w, jr, rune(quote)) 3863 } 3864 3865 w.WriteByte('"') 3866 for { 3867 if b, _ := jr.peekByte(); isIdentifier[b] { 3868 jr.readByte() 3869 w.WriteByte(b) 3870 continue 3871 } 3872 3873 w.WriteByte('"') 3874 return nil 3875 } 3876 } 3877 3878 // json0TrySimpleString tries to handle (more quickly) inner-strings where all 3879 // bytes are unescaped ASCII symbols: this is a very common case for strings, 3880 // and is almost always the case for object keys; returns whether it succeeded, 3881 // so this func's caller knows knows if it needs to do anything, the slower way 3882 func json0TrySimpleString(w *bufio.Writer, jr *jsonReader, quote rune) (gotIt bool) { 3883 end := -1 3884 chunk, _ := jr.r.Peek(json0ChunkPeekSize) 3885 3886 for i, b := range chunk { 3887 if 32 <= b && b <= 127 && b != '\\' && b != '\'' && b != '"' { 3888 continue 3889 } 3890 3891 if b == byte(quote) { 3892 end = i 3893 break 3894 } 3895 return false 3896 } 3897 3898 if end < 0 { 3899 return false 3900 } 3901 3902 // bulk-writing the chunk is this func's whole point 3903 w.WriteByte('"') 3904 w.Write(chunk) 3905 w.WriteByte('"') 3906 3907 jr.r.Discard(end + 1) 3908 jr.pos += end + 1 3909 return true 3910 } 3911 3912 // json0HandleKeyword is used by funcs json0HandleFalse, json0HandleNull, and 3913 // json0HandleTrue 3914 func json0HandleKeyword(w *bufio.Writer, jr *jsonReader, kw []byte) error { 3915 for rest := kw; len(rest) > 0; rest = rest[1:] { 3916 b, err := jr.readByte() 3917 if err == nil && b == rest[0] { 3918 // keywords given to this func have no line-feeds 3919 jr.pos++ 3920 continue 3921 } 3922 3923 msg := `expected JSON value ` + string(kw) 3924 return jr.improveError(errors.New(msg)) 3925 } 3926 3927 w.Write(kw) 3928 return nil 3929 } 3930 3931 // json0HandleNegative handles numbers starting with a negative sign for func 3932 // json0HandleValue 3933 func json0HandleNegative(w *bufio.Writer, jr *jsonReader) error { 3934 if err := jr.demandSyntax('-'); err != nil { 3935 return err 3936 } 3937 3938 w.WriteByte('-') 3939 if b, _ := jr.peekByte(); b == '.' { 3940 jr.readByte() 3941 w.Write([]byte{'0', '.'}) 3942 return json0HandleDigits(w, jr) 3943 } 3944 return json0HandleNumber(w, jr) 3945 } 3946 3947 // json0HandleNumber handles numeric values/tokens, including invalid-JSON 3948 // cases, such as values starting with a decimal dot 3949 func json0HandleNumber(w *bufio.Writer, jr *jsonReader) error { 3950 // handle integer digits 3951 if err := json0HandleDigits(w, jr); err != nil { 3952 return err 3953 } 3954 3955 // handle optional decimal digits, starting with a leading dot 3956 if b, _ := jr.peekByte(); b == '.' { 3957 jr.readByte() 3958 w.WriteByte('.') 3959 return json0HandleDigits(w, jr) 3960 } 3961 3962 // handle optional exponent digits 3963 if b, _ := jr.peekByte(); b == 'e' || b == 'E' { 3964 jr.readByte() 3965 w.WriteByte(b) 3966 b, _ = jr.peekByte() 3967 if b == '+' { 3968 jr.readByte() 3969 } else if b == '-' { 3970 w.WriteByte('-') 3971 jr.readByte() 3972 } 3973 return json0HandleDigits(w, jr) 3974 } 3975 3976 return nil 3977 } 3978 3979 func json0HandleObject(w *bufio.Writer, jr *jsonReader) error { 3980 if err := jr.demandSyntax('{'); err != nil { 3981 return err 3982 } 3983 w.WriteByte('{') 3984 3985 for npairs := 0; true; npairs++ { 3986 // there may be whitespace/comments before the next comma 3987 if err := jr.seekNext(); err != nil { 3988 return err 3989 } 3990 3991 // handle commas between key-value pairs, as well as trailing ones 3992 comma := false 3993 b, _ := jr.peekByte() 3994 if b == ',' { 3995 jr.readByte() 3996 comma = true 3997 3998 // there may be whitespace/comments before an ending '}' 3999 if err := jr.seekNext(); err != nil { 4000 return err 4001 } 4002 b, _ = jr.peekByte() 4003 } 4004 4005 // handle end of object 4006 if b == '}' { 4007 jr.readByte() 4008 w.WriteByte('}') 4009 return nil 4010 } 4011 4012 // don't forget commas between adjacent key-value pairs 4013 if npairs > 0 { 4014 if !comma { 4015 return errNoObjectComma 4016 } 4017 if err := outputByte(w, ','); err != nil { 4018 return err 4019 } 4020 } 4021 4022 // handle the next pair's key 4023 if err := jr.seekNext(); err != nil { 4024 return err 4025 } 4026 if err := json0HandleKey(w, jr); err != nil { 4027 return err 4028 } 4029 4030 // demand a colon right after the key 4031 if err := jr.seekNext(); err != nil { 4032 return err 4033 } 4034 if err := jr.demandSyntax(':'); err != nil { 4035 return err 4036 } 4037 w.WriteByte(':') 4038 4039 // handle the next pair's value 4040 if err := jr.seekNext(); err != nil { 4041 return err 4042 } 4043 if err := json0HandleValue(w, jr); err != nil { 4044 return err 4045 } 4046 } 4047 4048 // make the compiler happy 4049 return nil 4050 } 4051 4052 // json0HandlePositive handles numbers starting with a positive sign for func 4053 // json0HandleValue 4054 func json0HandlePositive(w *bufio.Writer, jr *jsonReader) error { 4055 if err := jr.demandSyntax('+'); err != nil { 4056 return err 4057 } 4058 4059 // valid JSON isn't supposed to have leading pluses on numbers, so 4060 // emit nothing for it, unlike for negative numbers 4061 4062 if b, _ := jr.peekByte(); b == '.' { 4063 jr.readByte() 4064 w.Write([]byte{'0', '.'}) 4065 return json0HandleDigits(w, jr) 4066 } 4067 return json0HandleNumber(w, jr) 4068 } 4069 4070 // json0HandleString handles strings for func json0HandleValue, and supports 4071 // both single-quotes and double-quotes, always emitting the latter in the 4072 // output, of course 4073 func json0HandleString(w *bufio.Writer, jr *jsonReader, quote rune) error { 4074 if quote != '"' && quote != '\'' { 4075 return errNoStringQuote 4076 } 4077 4078 jr.readByte() 4079 4080 // try the quicker no-escapes ASCII handler 4081 if json0TrySimpleString(w, jr, quote) { 4082 return nil 4083 } 4084 4085 // it's a non-trivial inner-string, so handle it byte-by-byte 4086 w.WriteByte('"') 4087 escaped := false 4088 4089 for { 4090 r, err := jr.readRune() 4091 if r == unicode.ReplacementChar { 4092 return jr.improveError(errInvalidRune) 4093 } 4094 if err != nil { 4095 if err == io.EOF { 4096 return jr.improveError(errStringEarlyEnd) 4097 } 4098 return jr.improveError(err) 4099 } 4100 4101 if !escaped { 4102 if r == '\\' { 4103 escaped = true 4104 continue 4105 } 4106 4107 // handle end of string 4108 if r == quote { 4109 return outputByte(w, '"') 4110 } 4111 4112 if r <= 127 { 4113 w.Write(escapedStringBytes[byte(r)]) 4114 } else { 4115 w.WriteRune(r) 4116 } 4117 continue 4118 } 4119 4120 // handle escaped items 4121 escaped = false 4122 4123 switch r { 4124 case 'u': 4125 // \u needs exactly 4 hex-digits to follow it 4126 w.Write([]byte{'\\', 'u'}) 4127 if err := json0CopyHex(w, 4, jr); err != nil { 4128 return jr.improveError(err) 4129 } 4130 4131 case 'x': 4132 // JSON only supports 4 escaped hex-digits, so pad the 2 4133 // expected hex-digits with 2 zeros 4134 w.Write([]byte{'\\', 'u', '0', '0'}) 4135 if err := json0CopyHex(w, 2, jr); err != nil { 4136 return jr.improveError(err) 4137 } 4138 4139 case 't', 'f', 'r', 'n', 'b', '\\', '"': 4140 // handle valid-JSON escaped string sequences 4141 w.WriteByte('\\') 4142 w.WriteByte(byte(r)) 4143 4144 case '\'': 4145 // escaped single-quotes aren't standard JSON, but they can 4146 // be handy when the input uses non-standard single-quoted 4147 // strings 4148 w.WriteByte('\'') 4149 4150 default: 4151 if r <= 127 { 4152 w.Write(escapedStringBytes[byte(r)]) 4153 } else { 4154 w.WriteRune(r) 4155 } 4156 } 4157 } 4158 } 4159 4160 // json0CopyHex handles a run of hex-digits for func json0HandleString, starting 4161 // right after the leading `\u` (or `\x`) part; this func doesn't `improve` 4162 // its errors with position info: that's up to the caller 4163 func json0CopyHex(w *bufio.Writer, n int, jr *jsonReader) error { 4164 for i := 0; i < n; i++ { 4165 b, err := jr.readByte() 4166 if err == io.EOF { 4167 return errStringEarlyEnd 4168 } 4169 if err != nil { 4170 return err 4171 } 4172 4173 if b >= 128 { 4174 return errInvalidHex 4175 } 4176 4177 if b := matchHex[b]; b != 0 { 4178 w.WriteByte(b) 4179 continue 4180 } 4181 4182 return errInvalidHex 4183 } 4184 4185 return nil 4186 } 4187 4188 // json0HandleValue is a generic JSON-token handler, which allows the recursive 4189 // behavior to handle any kind of JSON/pseudo-JSON input 4190 func json0HandleValue(w *bufio.Writer, jr *jsonReader) error { 4191 chunk, err := jr.r.Peek(1) 4192 if err == nil && len(chunk) >= 1 { 4193 return json0HandleValueDispatch(w, jr, chunk[0]) 4194 } 4195 4196 if err == io.EOF { 4197 return jr.improveError(errInputEarlyEnd) 4198 } 4199 return jr.improveError(errInputEarlyEnd) 4200 } 4201 4202 // json0HandleValueDispatch simplifies control-flow for func json0HandleValue 4203 func json0HandleValueDispatch(w *bufio.Writer, jr *jsonReader, b byte) error { 4204 switch b { 4205 case 'f': 4206 return json0HandleKeyword(w, jr, []byte{'f', 'a', 'l', 's', 'e'}) 4207 case 'n': 4208 return json0HandleKeyword(w, jr, []byte{'n', 'u', 'l', 'l'}) 4209 case 't': 4210 return json0HandleKeyword(w, jr, []byte{'t', 'r', 'u', 'e'}) 4211 case 'F': 4212 return json0HandleKeyword(w, jr, []byte{'F', 'a', 'l', 's', 'e'}) 4213 case 'N': 4214 return json0HandleKeyword(w, jr, []byte{'N', 'o', 'n', 'e'}) 4215 case 'T': 4216 return json0HandleKeyword(w, jr, []byte{'T', 'r', 'u', 'e'}) 4217 case '.': 4218 return json0HandleDot(w, jr) 4219 case '+': 4220 return json0HandlePositive(w, jr) 4221 case '-': 4222 return json0HandleNegative(w, jr) 4223 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 4224 return json0HandleNumber(w, jr) 4225 case '\'', '"': 4226 return json0HandleString(w, jr, rune(b)) 4227 case '[', '(': 4228 return json0HandleArray(w, jr, b) 4229 case '{': 4230 return json0HandleObject(w, jr) 4231 default: 4232 return jr.improveError(errInvalidToken) 4233 } 4234 } 4235 4236 // escapedStringBytes helps func json0HandleString treat string bytes quickly 4237 // and correctly, using their officially-supported JSON escape sequences 4238 // 4239 // https://www.rfc-editor.org/rfc/rfc8259#section-7 4240 var escapedStringBytes = [256][]byte{ 4241 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 4242 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 4243 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 4244 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 4245 {'\\', 'b'}, {'\\', 't'}, 4246 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 4247 {'\\', 'f'}, {'\\', 'r'}, 4248 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 4249 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 4250 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 4251 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 4252 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 4253 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 4254 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 4255 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 4256 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 4257 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 4258 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 4259 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 4260 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 4261 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 4262 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 4263 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 4264 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 4265 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 4266 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 4267 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 4268 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 4269 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 4270 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 4271 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 4272 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 4273 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 4274 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 4275 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 4276 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 4277 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 4278 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 4279 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 4280 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 4281 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 4282 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 4283 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 4284 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 4285 } 4286 4287 const json2Info = ` 4288 json2 [filepath...] 4289 4290 JSON-2 indents valid JSON input into multi-line JSON which uses 2 spaces for 4291 each indentation level. 4292 ` 4293 4294 func json2Main() { 4295 args := os.Args[1:] 4296 4297 if len(args) > 0 { 4298 switch args[0] { 4299 case `-h`, `--h`, `-help`, `--help`: 4300 os.Stdout.WriteString(himaInfo[1:]) 4301 return 4302 } 4303 } 4304 4305 if len(args) > 0 && args[0] == `--` { 4306 args = args[1:] 4307 } 4308 4309 if len(args) > 1 { 4310 handleError(nil, errors.New(`multiple inputs not allowed`)) 4311 return 4312 } 4313 4314 // figure out whether input should come from a named file or from stdin 4315 var paths [1]string 4316 paths[0] = `-` 4317 if len(args) > 0 { 4318 paths[0] = args[0] 4319 } 4320 4321 easyboxRun(paths[:], func(w *bufio.Writer, r io.Reader, name string) error { 4322 return json2(w, r) 4323 }) 4324 } 4325 4326 func json2(w *bufio.Writer, r io.Reader) error { 4327 dec := json.NewDecoder(r) 4328 // avoid parsing numbers, so unusually-long numbers are kept verbatim, 4329 // even if JSON parsers aren't required to guarantee such input-fidelity 4330 // for numbers 4331 dec.UseNumber() 4332 4333 t, err := dec.Token() 4334 if err == io.EOF { 4335 return errors.New(`input has no JSON values`) 4336 } 4337 4338 if err = json2HandleToken(w, dec, t, 0, 0); err != nil { 4339 return err 4340 } 4341 // don't forget ending the last line for the last value 4342 w.WriteByte('\n') 4343 4344 _, err = dec.Token() 4345 if err == io.EOF { 4346 // input is over, so it's a success 4347 return nil 4348 } 4349 4350 if err == nil { 4351 // a successful `read` is a failure, as it means there are 4352 // trailing JSON tokens 4353 return errors.New(`unexpected trailing data`) 4354 } 4355 4356 // any other error, perhaps some invalid-JSON-syntax-type error 4357 return err 4358 } 4359 4360 // json2HandleToken handles recursion for func json2 4361 func json2HandleToken(w *bufio.Writer, dec *json.Decoder, t json.Token, pre, level int) error { 4362 switch t := t.(type) { 4363 case json.Delim: 4364 switch t { 4365 case json.Delim('['): 4366 return json2HandleArray(w, dec, pre, level) 4367 case json.Delim('{'): 4368 return json2HandleObject(w, dec, pre, level) 4369 default: 4370 return errors.New(`unsupported JSON syntax ` + string(t)) 4371 } 4372 4373 case nil: 4374 writeSpaces(w, 2*pre) 4375 w.WriteString(`null`) 4376 return nil 4377 4378 case bool: 4379 writeSpaces(w, 2*pre) 4380 if t { 4381 w.WriteString(`true`) 4382 } else { 4383 w.WriteString(`false`) 4384 } 4385 return nil 4386 4387 case json.Number: 4388 writeSpaces(w, 2*pre) 4389 w.WriteString(t.String()) 4390 return nil 4391 4392 case string: 4393 return json2HandleString(w, t, pre) 4394 4395 default: 4396 // return fmt.Errorf(`unsupported token type %T`, t) 4397 return errors.New(`invalid JSON token`) 4398 } 4399 } 4400 4401 func json2HandleArray(w *bufio.Writer, dec *json.Decoder, pre, level int) error { 4402 for i := 0; true; i++ { 4403 t, err := dec.Token() 4404 if err != nil { 4405 return err 4406 } 4407 4408 if t == json.Delim(']') { 4409 if i == 0 { 4410 writeSpaces(w, 2*pre) 4411 w.WriteByte('[') 4412 w.WriteByte(']') 4413 } else { 4414 w.WriteByte('\n') 4415 writeSpaces(w, 2*level) 4416 w.WriteByte(']') 4417 } 4418 return nil 4419 } 4420 4421 if i == 0 { 4422 writeSpaces(w, 2*pre) 4423 w.WriteByte('[') 4424 w.WriteByte('\n') 4425 } else { 4426 w.WriteByte(',') 4427 w.WriteByte('\n') 4428 if err := w.Flush(); err != nil { 4429 // a write error may be the consequence of stdout being closed, 4430 // perhaps by another app along a pipe 4431 return errNoMoreOutput 4432 } 4433 } 4434 4435 err = json2HandleToken(w, dec, t, level+1, level+1) 4436 if err != nil { 4437 return err 4438 } 4439 } 4440 4441 // make the compiler happy 4442 return nil 4443 } 4444 4445 func json2HandleObject(w *bufio.Writer, dec *json.Decoder, pre, level int) error { 4446 for i := 0; true; i++ { 4447 t, err := dec.Token() 4448 if err != nil { 4449 return err 4450 } 4451 4452 if t == json.Delim('}') { 4453 if i == 0 { 4454 writeSpaces(w, 2*pre) 4455 w.WriteByte('{') 4456 w.WriteByte('}') 4457 } else { 4458 w.WriteByte('\n') 4459 writeSpaces(w, 2*level) 4460 w.WriteByte('}') 4461 } 4462 return nil 4463 } 4464 4465 if i == 0 { 4466 writeSpaces(w, 2*pre) 4467 w.WriteByte('{') 4468 w.WriteByte('\n') 4469 } else { 4470 w.WriteByte(',') 4471 w.WriteByte('\n') 4472 if err := w.Flush(); err != nil { 4473 // a write error may be the consequence of stdout being closed, 4474 // perhaps by another app along a pipe 4475 return errNoMoreOutput 4476 } 4477 } 4478 4479 k, ok := t.(string) 4480 if !ok { 4481 return errors.New(`expected a string for a key-value pair`) 4482 } 4483 4484 err = json2HandleString(w, k, level+1) 4485 if err != nil { 4486 return err 4487 } 4488 4489 w.WriteString(": ") 4490 4491 t, err = dec.Token() 4492 if err == io.EOF { 4493 return errors.New(`expected a value for a key-value pair`) 4494 } 4495 4496 err = json2HandleToken(w, dec, t, 0, level+1) 4497 if err != nil { 4498 return err 4499 } 4500 } 4501 4502 // make the compiler happy 4503 return nil 4504 } 4505 4506 func json2HandleString(w *bufio.Writer, s string, level int) error { 4507 writeSpaces(w, 2*level) 4508 w.WriteByte('"') 4509 for i := range s { 4510 w.Write(escapedStringBytes[s[i]]) 4511 } 4512 w.WriteByte('"') 4513 return nil 4514 } 4515 4516 const jsonlInfo = ` 4517 jsonl [options...] [filepath...] 4518 4519 JSON Lines turns valid JSON-input arrays into separate JSON lines, one for 4520 each top-level item. Non-arrays result in a single JSON-line. 4521 4522 When not given a filepath to load, standard input is used instead. Every 4523 output line is always a single top-level item from the input. 4524 ` 4525 4526 func jsonlMain() { 4527 buffered := false 4528 args := os.Args[1:] 4529 4530 if len(args) > 0 { 4531 switch args[0] { 4532 case `-b`, `--b`, `-buffered`, `--buffered`: 4533 buffered = true 4534 args = args[1:] 4535 4536 case `-h`, `--h`, `-help`, `--help`: 4537 os.Stdout.WriteString(plainInfo[1:]) 4538 return 4539 } 4540 } 4541 4542 if len(args) > 0 && args[0] == `--` { 4543 args = args[1:] 4544 } 4545 4546 liveLines := !buffered 4547 if !buffered { 4548 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 4549 liveLines = false 4550 } 4551 } 4552 4553 easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { 4554 return jsonl(w, r, liveLines) 4555 }) 4556 } 4557 4558 func jsonl(w *bufio.Writer, r io.Reader, live bool) error { 4559 dec := json.NewDecoder(r) 4560 // avoid parsing numbers, so unusually-long numbers are kept verbatim, 4561 // even if JSON parsers aren't required to guarantee such input-fidelity 4562 // for numbers 4563 dec.UseNumber() 4564 4565 t, err := dec.Token() 4566 if err == io.EOF { 4567 // return errors.New(`input has no JSON values`) 4568 return nil 4569 } 4570 4571 if t == json.Delim('[') { 4572 if err := jsonlHandleTopLevelArray(w, dec, live); err != nil { 4573 return err 4574 } 4575 } else { 4576 if err := jsonlHandleToken(w, dec, t); err != nil { 4577 return err 4578 } 4579 w.WriteByte('\n') 4580 if live { 4581 w.Flush() 4582 } 4583 } 4584 4585 _, err = dec.Token() 4586 if err == io.EOF { 4587 // input is over, so it's a success 4588 return nil 4589 } 4590 4591 if err == nil { 4592 // a successful `read` is a failure, as it means there are 4593 // trailing JSON tokens 4594 return errors.New(`unexpected trailing data`) 4595 } 4596 4597 // any other error, perhaps some invalid-JSON-syntax-type error 4598 return err 4599 } 4600 4601 func jsonlHandleToken(w *bufio.Writer, dec *json.Decoder, t json.Token) error { 4602 switch t := t.(type) { 4603 case json.Delim: 4604 switch t { 4605 case json.Delim('['): 4606 return jsonlHandleArray(w, dec) 4607 case json.Delim('{'): 4608 return jsonlHandleObject(w, dec) 4609 default: 4610 return errors.New(`unsupported JSON syntax ` + string(t)) 4611 } 4612 4613 case nil: 4614 w.WriteString(`null`) 4615 return nil 4616 4617 case bool: 4618 if t { 4619 w.WriteString(`true`) 4620 } else { 4621 w.WriteString(`false`) 4622 } 4623 return nil 4624 4625 case json.Number: 4626 w.WriteString(t.String()) 4627 return nil 4628 4629 case string: 4630 return jsonlHandleString(w, t) 4631 4632 default: 4633 // return fmt.Errorf(`unsupported token type %T`, t) 4634 return errors.New(`invalid JSON token`) 4635 } 4636 } 4637 4638 func jsonlHandleTopLevelArray(w *bufio.Writer, dec *json.Decoder, live bool) error { 4639 for i := 0; true; i++ { 4640 t, err := dec.Token() 4641 if err == io.EOF { 4642 return nil 4643 } 4644 4645 if err != nil { 4646 return err 4647 } 4648 4649 if t == json.Delim(']') { 4650 return nil 4651 } 4652 4653 err = jsonlHandleToken(w, dec, t) 4654 if err != nil { 4655 return err 4656 } 4657 4658 w.WriteByte('\n') 4659 if !live { 4660 continue 4661 } 4662 4663 if err := w.Flush(); err != nil { 4664 // a write error may be the consequence of stdout being closed, 4665 // perhaps by another app along a pipe 4666 return errNoMoreOutput 4667 } 4668 } 4669 4670 // make the compiler happy 4671 return nil 4672 } 4673 4674 func jsonlHandleArray(w *bufio.Writer, dec *json.Decoder) error { 4675 w.WriteByte('[') 4676 4677 for i := 0; true; i++ { 4678 t, err := dec.Token() 4679 if err == io.EOF { 4680 w.WriteByte(']') 4681 return nil 4682 } 4683 4684 if err != nil { 4685 return err 4686 } 4687 4688 if t == json.Delim(']') { 4689 w.WriteByte(']') 4690 return nil 4691 } 4692 4693 if i > 0 { 4694 _, err := w.WriteString(", ") 4695 if err != nil { 4696 return errNoMoreOutput 4697 } 4698 } 4699 4700 err = jsonlHandleToken(w, dec, t) 4701 if err != nil { 4702 return err 4703 } 4704 } 4705 4706 // make the compiler happy 4707 return nil 4708 } 4709 4710 func jsonlHandleObject(w *bufio.Writer, dec *json.Decoder) error { 4711 w.WriteByte('{') 4712 4713 for i := 0; true; i++ { 4714 t, err := dec.Token() 4715 if err == io.EOF { 4716 w.WriteByte('}') 4717 return nil 4718 } 4719 4720 if err != nil { 4721 return err 4722 } 4723 4724 if t == json.Delim('}') { 4725 w.WriteByte('}') 4726 return nil 4727 } 4728 4729 if i > 0 { 4730 _, err := w.WriteString(", ") 4731 if err != nil { 4732 return errNoMoreOutput 4733 } 4734 } 4735 4736 k, ok := t.(string) 4737 if !ok { 4738 return errors.New(`expected a string for a key-value pair`) 4739 } 4740 4741 err = jsonlHandleString(w, k) 4742 if err != nil { 4743 return err 4744 } 4745 4746 w.WriteString(": ") 4747 4748 t, err = dec.Token() 4749 if err == io.EOF { 4750 return errors.New(`expected a value for a key-value pair`) 4751 } 4752 4753 err = jsonlHandleToken(w, dec, t) 4754 if err != nil { 4755 return err 4756 } 4757 } 4758 4759 // make the compiler happy 4760 return nil 4761 } 4762 4763 func jsonlHandleString(w *bufio.Writer, s string) error { 4764 w.WriteByte('"') 4765 for i := range s { 4766 w.Write(escapedStringBytes[s[i]]) 4767 } 4768 w.WriteByte('"') 4769 return nil 4770 } 4771 4772 const jsonsInfo = ` 4773 jsons [options...] [filenames...] 4774 4775 JSON Strings turns TSV (tab-separated values) data into a JSON array of 4776 objects whose values are strings or nulls, the latter being used for 4777 missing trailing values. 4778 ` 4779 4780 type jsonsRunConfig struct { 4781 lines int 4782 keys []string 4783 live bool 4784 } 4785 4786 func jsonsMain() { 4787 buffered := false 4788 args := os.Args[1:] 4789 4790 if len(args) > 0 { 4791 switch args[0] { 4792 case `-b`, `--b`, `-buffered`, `--buffered`: 4793 buffered = true 4794 args = args[1:] 4795 4796 case `-h`, `--h`, `-help`, `--help`: 4797 os.Stdout.WriteString(himaInfo[1:]) 4798 return 4799 } 4800 } 4801 4802 if len(args) > 0 && args[0] == `--` { 4803 args = args[1:] 4804 } 4805 4806 var cfg jsonsRunConfig 4807 cfg.live = !buffered 4808 if !buffered { 4809 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 4810 cfg.live = false 4811 } 4812 } 4813 4814 easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { 4815 return jsons(w, r, &cfg) 4816 }) 4817 4818 if cfg.lines < 2 { 4819 os.Stdout.WriteString("[]\n") 4820 } else { 4821 os.Stdout.WriteString("\n]\n") 4822 } 4823 } 4824 4825 func jsons(w *bufio.Writer, r io.Reader, cfg *jsonsRunConfig) error { 4826 sc := bufio.NewScanner(r) 4827 sc.Buffer(nil, maxLineBufSize) 4828 4829 for i := 0; sc.Scan(); i++ { 4830 s := sc.Text() 4831 if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") { 4832 s = s[3:] 4833 } 4834 4835 if cfg.lines == 0 { 4836 cfg.keys = jsonsEscapeKeys(s) 4837 cfg.lines++ 4838 continue 4839 } 4840 4841 if cfg.lines == 1 { 4842 w.WriteString("[\n ") 4843 } else { 4844 if _, err := w.WriteString(",\n "); err != nil { 4845 return errNoMoreOutput 4846 } 4847 } 4848 4849 jsonsEmitRow(w, s, cfg.keys) 4850 cfg.lines++ 4851 4852 if !cfg.live { 4853 continue 4854 } 4855 4856 if err := w.Flush(); err != nil { 4857 return errNoMoreOutput 4858 } 4859 } 4860 4861 return sc.Err() 4862 } 4863 4864 func jsonsEmitRow(w *bufio.Writer, line string, keys []string) { 4865 j := 0 4866 w.WriteByte('{') 4867 4868 loopTSV(line, func(i int, s string) { 4869 j = i 4870 if i > 0 { 4871 w.WriteString(", ") 4872 } 4873 4874 w.WriteString(keys[i]) 4875 w.WriteString(": \"") 4876 4877 for _, r := range s { 4878 if r == '\\' || r == '"' { 4879 w.WriteByte('\\') 4880 } 4881 w.WriteRune(r) 4882 } 4883 w.WriteByte('"') 4884 }) 4885 4886 for i := j + 1; i < len(keys); i++ { 4887 if i > 0 { 4888 w.WriteString(", ") 4889 } 4890 w.WriteString(keys[i]) 4891 w.WriteString(": null") 4892 } 4893 w.WriteByte('}') 4894 } 4895 4896 func jsonsEscapeKeys(line string) []string { 4897 var keys []string 4898 var sb strings.Builder 4899 4900 loopTSV(line, func(i int, s string) { 4901 sb.WriteByte('"') 4902 for _, r := range s { 4903 if r == '\\' || r == '"' { 4904 sb.WriteByte('\\') 4905 } 4906 sb.WriteRune(r) 4907 } 4908 sb.WriteByte('"') 4909 4910 keys = append(keys, sb.String()) 4911 sb.Reset() 4912 }) 4913 4914 return keys 4915 } 4916 4917 const matchInfo = ` 4918 match [options...] [regular expressions...] 4919 4920 Only keep lines which match any of the extended-mode regular expressions 4921 given. When not given any regex, match non-empty lines by default. 4922 4923 The options are, available both in single and double-dash versions 4924 4925 -h, -help show this help message 4926 -i, -ins match regexes case-insensitively 4927 -l, -links add a regex to match HTTP/HTTPS links case-insensitively 4928 ` 4929 4930 func matchMain() { 4931 nerr := 0 4932 buffered := false 4933 sensitive := true 4934 args := os.Args[1:] 4935 4936 out: 4937 for len(args) > 0 { 4938 switch args[0] { 4939 case `-b`, `--b`, `-buffered`, `--buffered`: 4940 buffered = true 4941 args = args[1:] 4942 4943 case `-i`, `--i`, `-ins`, `--ins`: 4944 sensitive = false 4945 args = args[1:] 4946 4947 case `-h`, `--h`, `-help`, `--help`: 4948 os.Stdout.WriteString(matchInfo[1:]) 4949 return 4950 4951 default: 4952 break out 4953 } 4954 } 4955 4956 if len(args) > 0 && args[0] == `--` { 4957 args = args[1:] 4958 } 4959 4960 liveLines := !buffered 4961 if !buffered { 4962 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 4963 liveLines = false 4964 } 4965 } 4966 4967 if len(args) == 0 { 4968 args = []string{`.`} 4969 } 4970 4971 exprs := make([]*regexp.Regexp, 0, len(args)) 4972 4973 for _, src := range args { 4974 var err error 4975 var exp *regexp.Regexp 4976 if !sensitive { 4977 exp, err = regexp.Compile(`(?i)` + src) 4978 } else { 4979 exp, err = regexp.Compile(src) 4980 } 4981 4982 if err != nil { 4983 os.Stderr.WriteString(err.Error()) 4984 os.Stderr.WriteString("\n") 4985 nerr++ 4986 } 4987 4988 exprs = append(exprs, exp) 4989 } 4990 4991 if nerr > 0 { 4992 os.Exit(1) 4993 } 4994 4995 sc := bufio.NewScanner(os.Stdin) 4996 sc.Buffer(nil, maxLineBufSize) 4997 bw := bufio.NewWriter(os.Stdout) 4998 defer bw.Flush() 4999 5000 for i := 0; sc.Scan(); i++ { 5001 s := sc.Bytes() 5002 if i == 0 && hasPrefixBOM(s) { 5003 s = s[3:] 5004 } 5005 5006 if match(s, exprs) { 5007 bw.Write(s) 5008 bw.WriteByte('\n') 5009 5010 if !liveLines { 5011 continue 5012 } 5013 5014 if err := bw.Flush(); err != nil { 5015 return 5016 } 5017 } 5018 } 5019 } 5020 5021 const ncolInfo = ` 5022 ncol [options...] [filenames...] 5023 5024 Nice COLumns realigns and styles data tables using ANSI color sequences. In 5025 particular, all auto-detected numbers are styled so they're easier to read 5026 at a glance. Input tables can be either lines of space-separated values or 5027 tab-separated values, and are auto-detected using the first non-empty line. 5028 5029 When not given filepaths to read data from, this tool reads from standard 5030 input by default. 5031 5032 The options are, available both in single and double-dash versions 5033 5034 -h show this help message 5035 -help show this help message 5036 5037 -no-sums avoid showing a final row with column sums 5038 -unsummed avoid showing a final row with column sums 5039 5040 -no-tiles avoid showing color-coded tiles at the start of lines 5041 -untiled avoid showing color-coded tiles at the start of lines 5042 ` 5043 5044 const columnGap = 2 5045 5046 // altDigitStyle is used to make 4+ digit-runs easier to read 5047 const altDigitStyle = "\x1b[38;2;168;168;168m" 5048 5049 func ncolMain() { 5050 sums := true 5051 tiles := true 5052 args := os.Args[1:] 5053 5054 out: 5055 for len(args) > 0 { 5056 switch args[0] { 5057 case `-h`, `--h`, `-help`, `--help`: 5058 os.Stdout.WriteString(ncolInfo[1:]) 5059 return 5060 5061 case 5062 `-no-sums`, `--no-sums`, `-no-totals`, `--no-totals`, 5063 `-unsummed`, `--unsummed`, `-untotaled`, `--untotaled`, 5064 `-untotalled`, `--untotalled`: 5065 sums = false 5066 args = args[1:] 5067 continue 5068 5069 case `-no-tiles`, `--no-tiles`, `-untiled`, `--untiled`: 5070 tiles = false 5071 args = args[1:] 5072 continue 5073 5074 default: 5075 break out 5076 } 5077 } 5078 5079 if len(args) > 0 && args[0] == `--` { 5080 args = args[1:] 5081 } 5082 5083 var res table 5084 res.ShowTiles = tiles 5085 res.ShowSums = sums 5086 5087 if err := ncolRun(args, &res); err != nil { 5088 os.Stderr.WriteString(err.Error()) 5089 os.Stderr.WriteString("\n") 5090 os.Exit(1) 5091 } 5092 } 5093 5094 // table has all summary info gathered from the data, along with the row 5095 // themselves, stored as lines/strings 5096 type table struct { 5097 Columns int 5098 5099 Rows []string 5100 5101 MaxWidth []int 5102 5103 MaxDotDecimals []int 5104 5105 Numeric []int 5106 5107 Sums []float64 5108 5109 LoopItems func(line string, items int, t *table, f itemFunc) int 5110 5111 sb strings.Builder 5112 5113 ShowTiles bool 5114 5115 ShowSums bool 5116 } 5117 5118 type itemFunc func(i int, s string, t *table) 5119 5120 func ncolRun(paths []string, res *table) error { 5121 for _, p := range paths { 5122 if err := ncolHandleFile(res, p); err != nil { 5123 return err 5124 } 5125 } 5126 5127 if len(paths) == 0 { 5128 if err := ncolHandleReader(res, os.Stdin); err != nil { 5129 return err 5130 } 5131 } 5132 5133 bw := bufio.NewWriterSize(os.Stdout, bufSize) 5134 defer bw.Flush() 5135 ncol(bw, res) 5136 return nil 5137 } 5138 5139 func ncolHandleFile(res *table, path string) error { 5140 f, err := os.Open(path) 5141 if err != nil { 5142 // on windows, file-not-found error messages may mention `CreateFile`, 5143 // even when trying to open files in read-only mode 5144 return errors.New(`can't open file named ` + path) 5145 } 5146 defer f.Close() 5147 return ncolHandleReader(res, f) 5148 } 5149 5150 func ncolHandleReader(t *table, r io.Reader) error { 5151 sc := bufio.NewScanner(r) 5152 sc.Buffer(nil, maxLineBufSize) 5153 5154 for i := 0; sc.Scan(); i++ { 5155 s := sc.Text() 5156 if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") { 5157 s = s[3:] 5158 } 5159 5160 if len(s) == 0 { 5161 continue 5162 } 5163 5164 t.Rows = append(t.Rows, s) 5165 5166 if t.Columns == 0 { 5167 if t.LoopItems == nil { 5168 if strings.IndexByte(s, '\t') >= 0 { 5169 t.LoopItems = loopItemsTSV 5170 } else { 5171 t.LoopItems = loopItemsSSV 5172 } 5173 } 5174 5175 const maxInt = int(^uint(0) >> 1) 5176 t.Columns = t.LoopItems(s, maxInt, t, doNothing) 5177 } 5178 5179 t.LoopItems(s, t.Columns, t, ncolUpdateItem) 5180 } 5181 5182 return sc.Err() 5183 } 5184 5185 // doNothing is given to LoopItems to count items, while doing nothing else 5186 func doNothing(i int, s string, t *table) { 5187 } 5188 5189 func ncolUpdateItem(i int, s string, t *table) { 5190 // ensure column-info-slices have enough room 5191 if i >= len(t.MaxWidth) { 5192 t.MaxWidth = append(t.MaxWidth, 0) 5193 t.MaxDotDecimals = append(t.MaxDotDecimals, 0) 5194 t.Numeric = append(t.Numeric, 0) 5195 t.Sums = append(t.Sums, 0) 5196 } 5197 5198 // keep track of widest rune-counts for each column 5199 w := countWidth(s) 5200 if t.MaxWidth[i] < w { 5201 t.MaxWidth[i] = w 5202 } 5203 5204 // update stats for numeric items 5205 if checkNumeric(s, &(t.sb)) { 5206 dd := countDotDecimals(s) 5207 if t.MaxDotDecimals[i] < dd { 5208 t.MaxDotDecimals[i] = dd 5209 } 5210 5211 t.Numeric[i]++ 5212 f, _ := strconv.ParseFloat(t.sb.String(), 64) 5213 t.Sums[i] += f 5214 } 5215 } 5216 5217 // loopItemsSSV loops over a line's items, allocation-free style; when given 5218 // empty strings, the callback func is never called 5219 func loopItemsSSV(s string, max int, t *table, f itemFunc) int { 5220 i := 0 5221 s = trimTrailingSpaces(s) 5222 5223 for { 5224 s = trimLeadingSpaces(s) 5225 if len(s) == 0 { 5226 return i 5227 } 5228 5229 if i+1 == max { 5230 f(i, s, t) 5231 return i + 1 5232 } 5233 5234 j := strings.IndexByte(s, ' ') 5235 if j < 0 { 5236 f(i, s, t) 5237 return i + 1 5238 } 5239 5240 f(i, s[:j], t) 5241 s = s[j+1:] 5242 i++ 5243 } 5244 } 5245 5246 // loopItemsTSV loops over a line's tab-separated items, allocation-free style; 5247 // when given empty strings, the callback func is never called 5248 func loopItemsTSV(s string, max int, t *table, f itemFunc) int { 5249 if len(s) == 0 { 5250 return 0 5251 } 5252 5253 i := 0 5254 5255 for { 5256 if i+1 == max { 5257 f(i, s, t) 5258 return i + 1 5259 } 5260 5261 j := strings.IndexByte(s, '\t') 5262 if j < 0 { 5263 f(i, s, t) 5264 return i + 1 5265 } 5266 5267 f(i, s[:j], t) 5268 s = s[j+1:] 5269 i++ 5270 } 5271 } 5272 5273 // checkNumeric checks if a string is valid/useable as a number 5274 func checkNumeric(s string, sb *strings.Builder) bool { 5275 if len(s) == 0 { 5276 return false 5277 } 5278 5279 sb.Reset() 5280 5281 s = skipLeadingEscapeSequences(s) 5282 if len(s) > 0 && (s[0] == '+' || s[0] == '-') { 5283 sb.WriteByte(s[0]) 5284 s = s[1:] 5285 } 5286 5287 s = skipLeadingEscapeSequences(s) 5288 if len(s) == 0 { 5289 return false 5290 } 5291 if b := s[0]; b == '.' { 5292 sb.WriteByte(b) 5293 return checkDigits(s[1:], sb) 5294 } 5295 5296 digits := 0 5297 5298 for { 5299 s = skipLeadingEscapeSequences(s) 5300 if len(s) == 0 { 5301 break 5302 } 5303 5304 b := s[0] 5305 sb.WriteByte(b) 5306 5307 if b == '.' { 5308 return checkDigits(s[1:], sb) 5309 } 5310 5311 if !('0' <= b && b <= '9') { 5312 return false 5313 } 5314 5315 digits++ 5316 s = s[1:] 5317 } 5318 5319 s = skipLeadingEscapeSequences(s) 5320 return len(s) == 0 && digits > 0 5321 } 5322 5323 func checkDigits(s string, sb *strings.Builder) bool { 5324 if len(s) == 0 { 5325 return false 5326 } 5327 5328 digits := 0 5329 5330 for { 5331 s = skipLeadingEscapeSequences(s) 5332 if len(s) == 0 { 5333 break 5334 } 5335 5336 if b := s[0]; '0' <= b && b <= '9' { 5337 sb.WriteByte(b) 5338 s = s[1:] 5339 digits++ 5340 } else { 5341 return false 5342 } 5343 } 5344 5345 s = skipLeadingEscapeSequences(s) 5346 return len(s) == 0 && digits > 0 5347 } 5348 5349 func ncol(w *bufio.Writer, t *table) { 5350 // make sums row first, as final alignments are usually affected by these 5351 var sums []string 5352 if t.ShowSums { 5353 sums = make([]string, 0, t.Columns) 5354 } 5355 5356 for i := 0; i < t.Columns && t.ShowSums; i++ { 5357 s := `-` 5358 width := 1 5359 5360 if t.Numeric[i] > 0 { 5361 decs := t.MaxDotDecimals[i] 5362 if decs > 0 { 5363 decs-- 5364 } 5365 5366 var buf [64]byte 5367 s = string(strconv.AppendFloat(buf[:0], t.Sums[i], 'f', decs, 64)) 5368 width = len(s) 5369 } 5370 5371 if t.MaxWidth[i] < width { 5372 t.MaxWidth[i] = width 5373 } 5374 sums = append(sums, s) 5375 } 5376 5377 due := 0 5378 showItem := func(i int, s string, t *table) { 5379 if i > 0 { 5380 due += columnGap 5381 } 5382 5383 if checkNumeric(s, &(t.sb)) { 5384 dd := countDotDecimals(s) 5385 rpad := t.MaxDotDecimals[i] - dd 5386 width := countWidth(s) 5387 lpad := t.MaxWidth[i] - (width + rpad) + due 5388 writeSpaces(w, lpad) 5389 f, _ := strconv.ParseFloat(t.sb.String(), 64) 5390 writeNumericItem(w, s, numericStyle(f)) 5391 due = rpad 5392 return 5393 } 5394 5395 writeSpaces(w, due) 5396 w.WriteString(s) 5397 due = t.MaxWidth[i] - countWidth(s) 5398 } 5399 5400 writeTile := func(i int, s string, t *table) { 5401 if len(s) == 0 { 5402 w.WriteString("\x1b[0m○") 5403 return 5404 } 5405 5406 if checkNumeric(s, &(t.sb)) { 5407 f, _ := strconv.ParseFloat(t.sb.String(), 64) 5408 w.WriteString(numericStyle(f)) 5409 w.WriteString("■") 5410 return 5411 } 5412 5413 if s[0] == ' ' || s[len(s)-1] == ' ' { 5414 w.WriteString("\x1b[38;2;196;160;0m■") 5415 return 5416 } 5417 5418 w.WriteString("\x1b[38;2;128;128;128m■") 5419 } 5420 5421 // show realigned rows 5422 5423 for _, line := range t.Rows { 5424 due = 0 5425 if t.ShowTiles { 5426 end := t.LoopItems(line, t.Columns, t, writeTile) 5427 if end < len(t.MaxWidth)-1 { 5428 w.WriteString("\x1b[0m") 5429 } 5430 for i := end; i < len(t.MaxWidth); i++ { 5431 w.WriteString("×") 5432 } 5433 w.WriteString("\x1b[0m") 5434 due += columnGap 5435 } 5436 5437 t.LoopItems(line, t.Columns, t, showItem) 5438 if w.WriteByte('\n') != nil { 5439 return 5440 } 5441 } 5442 5443 if t.Columns > 0 && t.ShowSums { 5444 realignSums(w, t, sums) 5445 } 5446 } 5447 5448 func realignSums(w *bufio.Writer, t *table, sums []string) { 5449 due := 0 5450 if t.ShowTiles { 5451 due += t.Columns + columnGap 5452 } 5453 5454 for i, s := range sums { 5455 if i > 0 { 5456 due += columnGap 5457 } 5458 5459 if t.Numeric[i] == 0 { 5460 writeSpaces(w, due) 5461 w.WriteString(s) 5462 due = t.MaxWidth[i] - countWidth(s) 5463 continue 5464 } 5465 5466 lpad := t.MaxWidth[i] - len(s) + due 5467 writeSpaces(w, lpad) 5468 writeNumericItem(w, s, numericStyle(t.Sums[i])) 5469 due = 0 5470 } 5471 5472 w.WriteByte('\n') 5473 } 5474 5475 func writeRowTiles(w *bufio.Writer, s string, t *table, writeTile itemFunc) { 5476 end := t.LoopItems(s, t.Columns, t, writeTile) 5477 5478 if end < len(t.MaxWidth)-1 { 5479 w.WriteString("\x1b[0m") 5480 } 5481 for i := end + 1; i < len(t.MaxWidth); i++ { 5482 w.WriteString("×") 5483 } 5484 w.WriteString("\x1b[0m") 5485 } 5486 5487 func numericStyle(f float64) string { 5488 if f > 0 { 5489 if float64(int64(f)) == f { 5490 return "\x1b[38;2;0;135;0m" 5491 } 5492 return "\x1b[38;2;0;155;95m" 5493 } 5494 if f < 0 { 5495 if float64(int64(f)) == f { 5496 return "\x1b[38;2;204;0;0m" 5497 } 5498 return "\x1b[38;2;215;95;95m" 5499 } 5500 if f == 0 { 5501 return "\x1b[38;2;0;95;215m" 5502 } 5503 return "\x1b[38;2;128;128;128m" 5504 } 5505 5506 func writeNumericItem(w *bufio.Writer, s string, startStyle string) { 5507 w.WriteString(startStyle) 5508 if len(s) > 0 && (s[0] == '-' || s[0] == '+') { 5509 w.WriteByte(s[0]) 5510 s = s[1:] 5511 } 5512 5513 dot := strings.IndexByte(s, '.') 5514 if dot < 0 { 5515 restyleDigits(w, s, altDigitStyle) 5516 w.WriteString("\x1b[0m") 5517 return 5518 } 5519 5520 if len(s[:dot]) > 3 { 5521 restyleDigits(w, s[:dot], altDigitStyle) 5522 w.WriteString("\x1b[0m") 5523 w.WriteString(startStyle) 5524 w.WriteByte('.') 5525 } else { 5526 w.WriteString(s[:dot]) 5527 w.WriteByte('.') 5528 } 5529 5530 rest := s[dot+1:] 5531 restyleDigits(w, rest, altDigitStyle) 5532 if len(rest) < 4 { 5533 w.WriteString("\x1b[0m") 5534 } 5535 } 5536 5537 // restyleDigits renders a run of digits as alternating styled/unstyled runs 5538 // of 3 digits, which greatly improves readability, and is the only purpose 5539 // of this app; string is assumed to be all decimal digits 5540 func restyleDigits(w *bufio.Writer, digits string, altStyle string) { 5541 if len(digits) < 4 { 5542 // digit sequence is short, so emit it as is 5543 w.WriteString(digits) 5544 return 5545 } 5546 5547 // separate leading 0..2 digits which don't align with the 3-digit groups 5548 i := len(digits) % 3 5549 // emit leading digits unstyled, if there are any 5550 w.WriteString(digits[:i]) 5551 // the rest is guaranteed to have a length which is a multiple of 3 5552 digits = digits[i:] 5553 5554 // start by styling, unless there were no leading digits 5555 style := i != 0 5556 5557 for len(digits) > 0 { 5558 if style { 5559 w.WriteString(altStyle) 5560 w.WriteString(digits[:3]) 5561 w.WriteString("\x1b[0m") 5562 } else { 5563 w.WriteString(digits[:3]) 5564 } 5565 5566 // advance to the next triple: the start of this func is supposed 5567 // to guarantee this step always works 5568 digits = digits[3:] 5569 5570 // alternate between styled and unstyled 3-digit groups 5571 style = !style 5572 } 5573 } 5574 5575 const njsonInfo = ` 5576 njson [filepath...] 5577 5578 Nice JSON shows JSON data as ANSI-styled indented lines, using 2 spaces for 5579 each indentation level. 5580 ` 5581 5582 // indent is how many spaces each indentation level uses 5583 const njsonIndent = 2 5584 5585 const ( 5586 // njsonBoolStyle is bluish, and very distinct from all other colors used 5587 njsonBoolStyle = "\x1b[38;2;95;175;215m" 5588 5589 // njsonKeyStyle is magenta, and very distinct from normal strings 5590 njsonKeyStyle = "\x1b[38;2;135;95;255m" 5591 5592 // njsonNullStyle is a light-gray, just like syntax elements, but the word 5593 // `null` is wide enough to stand out from syntax items at a glance 5594 njsonNullStyle = njsonSyntaxStyle 5595 5596 // njsonPositiveNumberStyle is a nice green 5597 njsonPositiveNumberStyle = "\x1b[38;2;0;135;95m" 5598 5599 // njsonNegativeNumberStyle is a nice red 5600 njsonNegativeNumberStyle = "\x1b[38;2;204;0;0m" 5601 5602 // njsonZeroNumberStyle is a nice blue 5603 njsonZeroNumberStyle = "\x1b[38;2;0;95;215m" 5604 5605 // njsonStringStyle used to be bluish, but it's better to keep it plain, 5606 // which also minimizes how many different colors the output can show 5607 njsonStringStyle = `` 5608 5609 // njsonSyntaxStyle is a light-gray, not too light, not too dark 5610 njsonSyntaxStyle = "\x1b[38;2;168;168;168m" 5611 ) 5612 5613 func njsonMain() { 5614 args := os.Args[1:] 5615 5616 if len(args) > 0 { 5617 switch args[0] { 5618 case `-h`, `--h`, `-help`, `--help`: 5619 os.Stdout.WriteString(himaInfo[1:]) 5620 return 5621 } 5622 } 5623 5624 if len(args) > 0 && args[0] == `--` { 5625 args = args[1:] 5626 } 5627 5628 if len(args) > 1 { 5629 handleError(nil, errors.New(`multiple inputs not allowed`)) 5630 return 5631 } 5632 5633 // figure out whether input should come from a named file or from stdin 5634 var paths [1]string 5635 paths[0] = `-` 5636 if len(args) > 0 { 5637 paths[0] = args[0] 5638 } 5639 5640 easyboxRun(paths[:], func(w *bufio.Writer, r io.Reader, name string) error { 5641 return njson(w, r) 5642 }) 5643 } 5644 5645 func njson(w *bufio.Writer, r io.Reader) error { 5646 dec := json.NewDecoder(r) 5647 // using string-like json.Number values instead of float64 ones avoids 5648 // unneeded reformatting of numbers; reformatting parsed float64 values 5649 // can potentially even drop/change decimals, causing the output not to 5650 // match the input digits exactly, which is best to avoid 5651 dec.UseNumber() 5652 5653 t, err := dec.Token() 5654 if err == io.EOF { 5655 return errors.New(`empty input isn't valid JSON`) 5656 } 5657 if err != nil { 5658 return err 5659 } 5660 5661 if err := njsonHandleToken(w, dec, t, 0, 0); err != nil { 5662 return err 5663 } 5664 // don't forget to end the last output line 5665 w.WriteByte('\n') 5666 5667 if _, err := dec.Token(); err != io.EOF { 5668 return errors.New(`unexpected trailing JSON data`) 5669 } 5670 return nil 5671 } 5672 5673 func njsonHandleToken(w *bufio.Writer, d *json.Decoder, t json.Token, pre, level int) error { 5674 switch t := t.(type) { 5675 case json.Delim: 5676 switch t { 5677 case json.Delim('['): 5678 return njsonHandleArray(w, d, pre, level) 5679 5680 case json.Delim('{'): 5681 return njsonHandleObject(w, d, pre, level) 5682 5683 default: 5684 // return fmt.Errorf(`unsupported JSON delimiter %v`, t) 5685 return errors.New(`unsupported JSON delimiter`) 5686 } 5687 5688 case nil: 5689 writeSpaces(w, njsonIndent*pre) 5690 w.WriteString(njsonNullStyle + "null\x1b[0m") 5691 return nil 5692 5693 case bool: 5694 writeSpaces(w, njsonIndent*pre) 5695 if t { 5696 w.WriteString(njsonBoolStyle + "true\x1b[0m") 5697 } else { 5698 w.WriteString(njsonBoolStyle + "false\x1b[0m") 5699 } 5700 return nil 5701 5702 case string: 5703 return njsonHandleString(w, t, pre) 5704 5705 case json.Number: 5706 return njsonHandleNumber(w, t, pre) 5707 5708 default: 5709 // return fmt.Errorf(`unsupported token type %T`, t) 5710 return errors.New(`unsupported token type`) 5711 } 5712 } 5713 5714 func njsonHandleArray(w *bufio.Writer, d *json.Decoder, pre, level int) error { 5715 for i := 0; true; i++ { 5716 t, err := d.Token() 5717 if err != nil { 5718 return err 5719 } 5720 5721 if t == json.Delim(']') { 5722 if i == 0 { 5723 writeSpaces(w, njsonIndent*pre) 5724 w.WriteString(njsonSyntaxStyle + "[]\x1b[0m") 5725 } else { 5726 w.WriteString("\n") 5727 writeSpaces(w, njsonIndent*level) 5728 w.WriteString(njsonSyntaxStyle + "]\x1b[0m") 5729 } 5730 return nil 5731 } 5732 5733 if i == 0 { 5734 writeSpaces(w, njsonIndent*pre) 5735 w.WriteString(njsonSyntaxStyle + "[\x1b[0m\n") 5736 } else { 5737 // this is a good spot to check for early-quit opportunities 5738 w.WriteString(njsonSyntaxStyle + ",\x1b[0m\n") 5739 if err := w.Flush(); err != nil { 5740 // a write error may be the consequence of stdout being closed, 5741 // perhaps by another app along a pipe 5742 return errNoMoreOutput 5743 } 5744 } 5745 5746 if err := njsonHandleToken(w, d, t, level+1, level+1); err != nil { 5747 return err 5748 } 5749 } 5750 5751 // make the compiler happy 5752 return nil 5753 } 5754 5755 func njsonHandleKey(w *bufio.Writer, s string, pre int) error { 5756 writeSpaces(w, njsonIndent*pre) 5757 w.WriteString(njsonSyntaxStyle + "\"\x1b[0m" + njsonKeyStyle) 5758 w.WriteString(s) 5759 w.WriteString(njsonSyntaxStyle + "\":\x1b[0m ") 5760 return nil 5761 } 5762 5763 func njsonHandleNumber(w *bufio.Writer, n json.Number, pre int) error { 5764 writeSpaces(w, njsonIndent*pre) 5765 f, _ := n.Float64() 5766 if f > 0 { 5767 w.WriteString(njsonPositiveNumberStyle) 5768 } else if f < 0 { 5769 w.WriteString(njsonNegativeNumberStyle) 5770 } else { 5771 w.WriteString(njsonZeroNumberStyle) 5772 } 5773 w.WriteString(n.String()) 5774 w.WriteString("\x1b[0m") 5775 return nil 5776 } 5777 5778 func njsonHandleObject(w *bufio.Writer, d *json.Decoder, pre, level int) error { 5779 for i := 0; true; i++ { 5780 t, err := d.Token() 5781 if err != nil { 5782 return err 5783 } 5784 5785 if t == json.Delim('}') { 5786 if i == 0 { 5787 writeSpaces(w, njsonIndent*pre) 5788 w.WriteString(njsonSyntaxStyle + "{}\x1b[0m") 5789 } else { 5790 w.WriteString("\n") 5791 writeSpaces(w, njsonIndent*level) 5792 w.WriteString(njsonSyntaxStyle + "}\x1b[0m") 5793 } 5794 return nil 5795 } 5796 5797 if i == 0 { 5798 writeSpaces(w, njsonIndent*pre) 5799 w.WriteString(njsonSyntaxStyle + "{\x1b[0m\n") 5800 } else { 5801 // this is a good spot to check for early-quit opportunities 5802 w.WriteString(njsonSyntaxStyle + ",\x1b[0m\n") 5803 if err := w.Flush(); err != nil { 5804 // a write error may be the consequence of stdout being closed, 5805 // perhaps by another app along a pipe 5806 return errNoMoreOutput 5807 } 5808 } 5809 5810 // the stdlib's JSON parser is supposed to complain about non-string 5811 // keys anyway, but make sure just in case 5812 k, ok := t.(string) 5813 if !ok { 5814 return errors.New(`expected key to be a string`) 5815 } 5816 if err := njsonHandleKey(w, k, level+1); err != nil { 5817 return err 5818 } 5819 5820 // handle value 5821 t, err = d.Token() 5822 if err != nil { 5823 return err 5824 } 5825 if err := njsonHandleToken(w, d, t, 0, level+1); err != nil { 5826 return err 5827 } 5828 } 5829 5830 // make the compiler happy 5831 return nil 5832 } 5833 5834 func njsonHandleString(w *bufio.Writer, s string, pre int) error { 5835 writeSpaces(w, njsonIndent*pre) 5836 w.WriteString(njsonSyntaxStyle + "\"\x1b[0m" + njsonStringStyle) 5837 for i := range s { 5838 w.Write(escapedStringBytes[s[i]]) 5839 } 5840 w.WriteString(njsonSyntaxStyle + "\"\x1b[0m") 5841 return nil 5842 } 5843 5844 const nnInfo = ` 5845 nn [options...] [file...] 5846 5847 5848 Nice Numbers is an app which renders the UTF-8 text it's given to make long 5849 numbers much easier to read. It does so by alternating 3-digit groups which 5850 are colored using ANSI-codes with plain/unstyled 3-digit groups. 5851 5852 Unlike the common practice of inserting commas between 3-digit groups, this 5853 trick doesn't widen the original text, keeping alignments across lines the 5854 same. 5855 5856 Input is assumed to be UTF-8, and all CRLF byte-pairs are turned into line 5857 feeds. 5858 5859 All (optional) leading options start with either single or double-dash, 5860 and most of them change the style/color used. Some of the options are, 5861 shown in their single-dash form: 5862 5863 -h show this help message 5864 -help show this help message 5865 5866 -b use a blue color 5867 -blue use a blue color 5868 -bold bold-style digits 5869 -g use a green color 5870 -gray use a gray color (default) 5871 -green use a green color 5872 -hi use a highlighting/inverse style 5873 -m use a magenta color 5874 -magenta use a magenta color 5875 -o use an orange color 5876 -orange use an orange color 5877 -r use a red color 5878 -red use a red color 5879 -u underline digits 5880 -underline underline digits 5881 ` 5882 5883 func nnMain() { 5884 args := os.Args[1:] 5885 5886 if len(args) > 0 { 5887 switch args[0] { 5888 case `-h`, `--h`, `-help`, `--help`: 5889 os.Stdout.WriteString(nnInfo[1:]) 5890 return 5891 } 5892 } 5893 5894 options := true 5895 if len(args) > 0 && args[0] == `--` { 5896 options = false 5897 args = args[1:] 5898 } 5899 5900 style, _ := lookupStyle(`gray`) 5901 5902 // if the first argument is 1 or 2 dashes followed by a supported 5903 // style-name, change the style used 5904 if options && len(args) > 0 && strings.HasPrefix(args[0], `-`) { 5905 name := args[0] 5906 name = strings.TrimPrefix(name, `-`) 5907 name = strings.TrimPrefix(name, `-`) 5908 args = args[1:] 5909 5910 // check if the `dedashed` argument is a supported style-name 5911 if s, ok := lookupStyle(name); ok { 5912 style = s 5913 } else { 5914 os.Stderr.WriteString(`invalid style name `) 5915 os.Stderr.WriteString(name) 5916 os.Stderr.WriteString("\n") 5917 os.Exit(1) 5918 } 5919 } 5920 5921 easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { 5922 return restyle(w, r, style) 5923 }) 5924 } 5925 5926 func restyle(w *bufio.Writer, r io.Reader, style string) error { 5927 sc := bufio.NewScanner(r) 5928 sc.Buffer(nil, maxLineBufSize) 5929 5930 for i := 0; sc.Scan(); i++ { 5931 s := sc.Text() 5932 if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") { 5933 s = s[3:] 5934 } 5935 5936 restyleLine(w, s, style) 5937 w.WriteByte('\n') 5938 if err := w.Flush(); err != nil { 5939 // a write error may be the consequence of stdout being closed, 5940 // perhaps by another app along a pipe 5941 return errNoMoreOutput 5942 } 5943 } 5944 return sc.Err() 5945 } 5946 5947 func lookupStyle(name string) (style string, ok bool) { 5948 if alias, ok := styleAliases[name]; ok { 5949 name = alias 5950 } 5951 5952 style, ok = styles[name] 5953 return style, ok 5954 } 5955 5956 // restyleLine renders the line given, using ANSI-styles to make any long 5957 // numbers in it more legible; this func doesn't emit a line-feed, which 5958 // is up to its caller 5959 func restyleLine(w *bufio.Writer, line string, style string) { 5960 for len(line) > 0 { 5961 i := indexDigit(line) 5962 if i < 0 { 5963 // no (more) digits to style for sure 5964 w.WriteString(line) 5965 return 5966 } 5967 5968 // emit line before current digit-run 5969 w.WriteString(line[:i]) 5970 // advance to the start of the current digit-run 5971 line = line[i:] 5972 5973 // see where the digit-run ends 5974 j := indexNonDigit(line) 5975 if j < 0 { 5976 // the digit-run goes until the end 5977 restyleDigits(w, line, style) 5978 return 5979 } 5980 5981 // emit styled digit-run 5982 restyleDigits(w, line[:j], style) 5983 // skip right past the end of the digit-run 5984 line = line[j:] 5985 } 5986 } 5987 5988 const plainInfo = ` 5989 plain [options...] [file...] 5990 5991 5992 Turn potentially ANSI-styled plain-text into actual plain-text. 5993 5994 Input is assumed to be UTF-8, and all CRLF byte-pairs are turned into line 5995 feeds. 5996 5997 All (optional) leading options start with either single or double-dash: 5998 5999 -h show this help message 6000 -help show this help message 6001 ` 6002 6003 func plainMain() { 6004 buffered := false 6005 args := os.Args[1:] 6006 6007 if len(args) > 0 { 6008 switch args[0] { 6009 case `-b`, `--b`, `-buffered`, `--buffered`: 6010 buffered = true 6011 args = args[1:] 6012 6013 case `-h`, `--h`, `-help`, `--help`: 6014 os.Stdout.WriteString(plainInfo[1:]) 6015 return 6016 } 6017 } 6018 6019 if len(args) > 0 && args[0] == `--` { 6020 args = args[1:] 6021 } 6022 6023 liveLines := !buffered 6024 if !buffered { 6025 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 6026 liveLines = false 6027 } 6028 } 6029 6030 easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { 6031 return plain(w, r, liveLines) 6032 }) 6033 } 6034 6035 func plain(w *bufio.Writer, r io.Reader, live bool) error { 6036 sc := bufio.NewScanner(r) 6037 sc.Buffer(nil, maxLineBufSize) 6038 6039 for i := 0; sc.Scan(); i++ { 6040 s := sc.Bytes() 6041 if i == 0 && hasPrefixBOM(s) { 6042 s = s[3:] 6043 } 6044 6045 for line := s; len(line) > 0; { 6046 i, j := indexEscapeSequence(line) 6047 if i < 0 { 6048 w.Write(line) 6049 break 6050 } 6051 if j < 0 { 6052 j = len(line) 6053 } 6054 6055 if i > 0 { 6056 w.Write(line[:i]) 6057 } 6058 6059 line = line[j:] 6060 } 6061 6062 if w.WriteByte('\n') != nil { 6063 return errNoMoreOutput 6064 } 6065 6066 if !live { 6067 continue 6068 } 6069 6070 if err := w.Flush(); err != nil { 6071 return errNoMoreOutput 6072 } 6073 } 6074 6075 return sc.Err() 6076 } 6077 6078 const primesInfo = ` 6079 primes [options...] [count...] 6080 6081 6082 Show the first few prime numbers, starting from the lowest and showing one 6083 per line. When not given how many primes to find, the default is 1 million. 6084 6085 All (optional) leading options start with either single or double-dash: 6086 6087 -h show this help message 6088 -help show this help message 6089 ` 6090 6091 func primesMain() { 6092 howMany := 1_000_000 6093 if len(os.Args) > 1 { 6094 switch os.Args[1] { 6095 case `-h`, `--h`, `-help`, `--help`: 6096 os.Stdout.WriteString(primesInfo[1:]) 6097 return 6098 } 6099 6100 n, err := strconv.Atoi(os.Args[1]) 6101 if err != nil { 6102 os.Stderr.WriteString(err.Error()) 6103 os.Stderr.WriteString("\n") 6104 os.Exit(1) 6105 } 6106 6107 if n < 0 { 6108 n = 0 6109 } 6110 howMany = n 6111 } 6112 6113 primes(howMany) 6114 } 6115 6116 func primes(left int) { 6117 bw := bufio.NewWriterSize(os.Stdout, bufSize) 6118 defer bw.Flush() 6119 6120 // 24 bytes are always enough for any 64-bit integer 6121 var buf [24]byte 6122 6123 // 2 is the only even prime number 6124 if left > 0 { 6125 bw.WriteString("2\n") 6126 left-- 6127 } 6128 6129 for n := uint64(3); left > 0; n += 2 { 6130 if oddPrime(n) { 6131 bw.Write(strconv.AppendUint(buf[:0], n, 10)) 6132 if err := bw.WriteByte('\n'); err != nil { 6133 // assume errors come from closed stdout pipes 6134 return 6135 } 6136 left-- 6137 } 6138 } 6139 } 6140 6141 // oddPrime assumes the number given to it is odd 6142 func oddPrime(n uint64) bool { 6143 max := uint64(math.Sqrt(float64(n))) 6144 for div := uint64(3); div <= max; div += 2 { 6145 if n%div == 0 { 6146 return false 6147 } 6148 } 6149 return true 6150 } 6151 6152 const realignInfo = ` 6153 realign [options...] [filenames...] 6154 6155 Realign all detected columns, right-aligning any detected numbers in any 6156 column. ANSI style-codes are also kept as given. 6157 6158 The only option available is to show this help message, using any of 6159 "-h", "--h", "-help", or "--help", without the quotes. 6160 ` 6161 6162 func realignMain() { 6163 args := os.Args[1:] 6164 6165 if len(args) > 0 { 6166 switch args[0] { 6167 case `-h`, `--h`, `-help`, `--help`: 6168 os.Stdout.WriteString(realignInfo[1:]) 6169 return 6170 6171 case `--`: 6172 args = args[1:] 6173 } 6174 } 6175 6176 if err := realignRun(args); err != nil { 6177 os.Stderr.WriteString(err.Error()) 6178 os.Stderr.WriteString("\n") 6179 os.Exit(1) 6180 } 6181 } 6182 6183 func realignRun(paths []string) error { 6184 var res table 6185 6186 for _, p := range paths { 6187 if err := realignHandleFile(&res, p); err != nil { 6188 return err 6189 } 6190 } 6191 6192 if len(paths) == 0 { 6193 if err := realignHandleReader(&res, os.Stdin); err != nil { 6194 return err 6195 } 6196 } 6197 6198 bw := bufio.NewWriterSize(os.Stdout, bufSize) 6199 defer bw.Flush() 6200 realign(bw, res) 6201 return nil 6202 } 6203 6204 func realignHandleFile(res *table, path string) error { 6205 f, err := os.Open(path) 6206 if err != nil { 6207 // on windows, file-not-found error messages may mention `CreateFile`, 6208 // even when trying to open files in read-only mode 6209 return errors.New(`can't open file named ` + path) 6210 } 6211 defer f.Close() 6212 return realignHandleReader(res, f) 6213 } 6214 6215 func realignHandleReader(t *table, r io.Reader) error { 6216 sc := bufio.NewScanner(r) 6217 sc.Buffer(nil, maxLineBufSize) 6218 6219 for i := 0; sc.Scan(); i++ { 6220 s := sc.Text() 6221 if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") { 6222 s = s[3:] 6223 } 6224 6225 if len(s) == 0 { 6226 if len(t.Rows) > 0 { 6227 t.Rows = append(t.Rows, ``) 6228 } 6229 continue 6230 } 6231 6232 t.Rows = append(t.Rows, s) 6233 6234 if t.Columns == 0 { 6235 if t.LoopItems == nil { 6236 if strings.IndexByte(s, '\t') >= 0 { 6237 t.LoopItems = loopItemsTSV 6238 } else { 6239 t.LoopItems = loopItemsSSV 6240 } 6241 } 6242 6243 const maxInt = int(^uint(0) >> 1) 6244 t.LoopItems(s, maxInt, t, updateColumnCount) 6245 } 6246 6247 t.LoopItems(s, t.Columns, t, realignUpdateItem) 6248 } 6249 6250 return sc.Err() 6251 } 6252 6253 func updateColumnCount(i int, s string, t *table) { 6254 t.Columns = i + 1 6255 } 6256 6257 func realignUpdateItem(i int, s string, t *table) { 6258 // ensure column-info-slices have enough room 6259 if i >= len(t.MaxWidth) { 6260 t.MaxWidth = append(t.MaxWidth, 0) 6261 t.MaxDotDecimals = append(t.MaxDotDecimals, 0) 6262 } 6263 6264 // keep track of widest rune-counts for each column 6265 w := countWidth(s) 6266 if t.MaxWidth[i] < w { 6267 t.MaxWidth[i] = w 6268 } 6269 6270 // update stats for numeric items 6271 if isNumeric(s) { 6272 dd := countDotDecimals(s) 6273 if t.MaxDotDecimals[i] < dd { 6274 t.MaxDotDecimals[i] = dd 6275 } 6276 } 6277 } 6278 6279 // isNumeric checks if a string is valid/useable as a number 6280 func isNumeric(s string) bool { 6281 if len(s) == 0 { 6282 return false 6283 } 6284 6285 s = skipLeadingEscapeSequences(s) 6286 if len(s) > 0 && (s[0] == '+' || s[0] == '-') { 6287 s = s[1:] 6288 } 6289 6290 s = skipLeadingEscapeSequences(s) 6291 if len(s) == 0 { 6292 return false 6293 } 6294 if s[0] == '.' { 6295 return isDigits(s[1:]) 6296 } 6297 6298 digits := 0 6299 6300 for { 6301 s = skipLeadingEscapeSequences(s) 6302 if len(s) == 0 { 6303 break 6304 } 6305 6306 if s[0] == '.' { 6307 return isDigits(s[1:]) 6308 } 6309 6310 if !('0' <= s[0] && s[0] <= '9') { 6311 return false 6312 } 6313 6314 digits++ 6315 s = s[1:] 6316 } 6317 6318 s = skipLeadingEscapeSequences(s) 6319 return len(s) == 0 && digits > 0 6320 } 6321 6322 func isDigits(s string) bool { 6323 if len(s) == 0 { 6324 return false 6325 } 6326 6327 digits := 0 6328 6329 for { 6330 s = skipLeadingEscapeSequences(s) 6331 if len(s) == 0 { 6332 break 6333 } 6334 6335 if '0' <= s[0] && s[0] <= '9' { 6336 s = s[1:] 6337 digits++ 6338 } else { 6339 return false 6340 } 6341 } 6342 6343 s = skipLeadingEscapeSequences(s) 6344 return len(s) == 0 && digits > 0 6345 } 6346 6347 func realign(w *bufio.Writer, t table) { 6348 due := 0 6349 showItem := func(i int, s string, t *table) { 6350 if i > 0 { 6351 due += 2 6352 } 6353 6354 if isNumeric(s) { 6355 dd := countDotDecimals(s) 6356 rpad := t.MaxDotDecimals[i] - dd 6357 width := countWidth(s) 6358 lpad := t.MaxWidth[i] - (width + rpad) + due 6359 writeSpaces(w, lpad) 6360 w.WriteString(s) 6361 due = rpad 6362 return 6363 } 6364 6365 writeSpaces(w, due) 6366 w.WriteString(s) 6367 due = t.MaxWidth[i] - countWidth(s) 6368 } 6369 6370 for _, line := range t.Rows { 6371 due = 0 6372 if len(line) > 0 { 6373 t.LoopItems(line, t.Columns, &t, showItem) 6374 } 6375 if w.WriteByte('\n') != nil { 6376 break 6377 } 6378 } 6379 } 6380 6381 const squeezeInfo = ` 6382 squeeze [filenames...] 6383 6384 Ignore leading/trailing spaces (and carriage-returns) on lines, also turning 6385 all runs of multiple consecutive spaces into single spaces. Spaces around 6386 tabs are ignored as well. 6387 ` 6388 6389 func squeezeMain() { 6390 buffered := false 6391 args := os.Args[1:] 6392 6393 if len(args) > 0 { 6394 switch args[0] { 6395 case `-b`, `--b`, `-buffered`, `--buffered`: 6396 buffered = true 6397 args = args[1:] 6398 6399 case `-h`, `--h`, `-help`, `--help`: 6400 os.Stdout.WriteString(squeezeInfo[1:]) 6401 return 6402 } 6403 } 6404 6405 if len(args) > 0 && args[0] == `--` { 6406 args = args[1:] 6407 } 6408 6409 liveLines := !buffered 6410 if !buffered { 6411 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 6412 liveLines = false 6413 } 6414 } 6415 6416 easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { 6417 return squeeze(w, r, liveLines) 6418 }) 6419 } 6420 6421 func squeeze(w *bufio.Writer, r io.Reader, live bool) error { 6422 sc := bufio.NewScanner(r) 6423 sc.Buffer(nil, maxLineBufSize) 6424 6425 for i := 0; sc.Scan(); i++ { 6426 s := sc.Bytes() 6427 if i == 0 && bytes.HasPrefix(s, []byte{0xef, 0xbb, 0xbf}) { 6428 s = s[3:] 6429 } 6430 6431 writeSqueezed(w, s) 6432 if w.WriteByte('\n') != nil { 6433 return errNoMoreOutput 6434 } 6435 6436 if !live { 6437 continue 6438 } 6439 6440 if err := w.Flush(); err != nil { 6441 return errNoMoreOutput 6442 } 6443 } 6444 6445 return sc.Err() 6446 } 6447 6448 func writeSqueezed(w *bufio.Writer, s []byte) { 6449 // ignore leading spaces 6450 for len(s) > 0 && s[0] == ' ' { 6451 s = s[1:] 6452 } 6453 6454 // ignore trailing spaces 6455 for len(s) > 0 && s[len(s)-1] == ' ' { 6456 s = s[:len(s)-1] 6457 } 6458 6459 i := 0 6460 space := false 6461 6462 for i < len(s) { 6463 switch b := s[i]; b { 6464 case ' ': 6465 space = true 6466 i++ 6467 6468 case '\t': 6469 space = false 6470 i++ 6471 for i < len(s) && s[i] == ' ' { 6472 i++ 6473 } 6474 w.WriteByte('\t') 6475 6476 default: 6477 if space { 6478 w.WriteByte(' ') 6479 space = false 6480 } 6481 w.WriteByte(b) 6482 } 6483 } 6484 } 6485 6486 const tcatlInfo = ` 6487 tcatl [options...] [file...] 6488 6489 6490 Title and Concatenate lines emits lines from all the named sources given, 6491 preceding each file's contents with its name, using an ANSI reverse style. 6492 6493 The name "-" stands for the standard input. When no names are given, the 6494 standard input is used by default. 6495 6496 All (optional) leading options start with either single or double-dash: 6497 6498 -h show this help message 6499 -help show this help message 6500 ` 6501 6502 func tcatlMain() { 6503 args := os.Args[1:] 6504 if len(args) > 0 { 6505 switch args[0] { 6506 case `-h`, `--h`, `-help`, `--help`: 6507 os.Stdout.WriteString(tcatlInfo[1:]) 6508 return 6509 } 6510 } 6511 6512 if len(args) > 0 && args[0] == `--` { 6513 args = args[1:] 6514 } 6515 6516 easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { 6517 fmt.Fprintf(w, "\x1b[7m%-80s\x1b[0m\n", name) 6518 return catl(w, r, false) 6519 }) 6520 } 6521 6522 const utfateInfo = ` 6523 utfate [options...] [file...] 6524 6525 This app turns plain-text input into UTF-8. Supported input formats are 6526 6527 - ASCII 6528 - UTF-8 6529 - UTF-8 with a leading BOM 6530 - UTF-16 BE 6531 - UTF-16 LE 6532 - UTF-32 BE 6533 - UTF-32 LE 6534 6535 All (optional) leading options start with either single or double-dash: 6536 6537 -h show this help message 6538 -help show this help message 6539 ` 6540 6541 func utfateMain() { 6542 args := os.Args[1:] 6543 6544 if len(args) > 0 { 6545 switch args[0] { 6546 case `-h`, `--h`, `-help`, `--help`: 6547 os.Stdout.WriteString(fixlinesInfo[1:]) 6548 return 6549 } 6550 } 6551 6552 if len(args) > 0 && args[0] == `--` { 6553 args = args[1:] 6554 } 6555 6556 if len(os.Args) > 1 { 6557 switch os.Args[1] { 6558 case `-h`, `--h`, `-help`, `--help`: 6559 os.Stdout.WriteString(utfateInfo[1:]) 6560 return 6561 } 6562 } 6563 6564 easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { 6565 return utfate(w, r) 6566 }) 6567 } 6568 6569 func utfate(w io.Writer, r io.Reader) error { 6570 br := bufio.NewReaderSize(r, bufSize) 6571 bw := bufio.NewWriterSize(w, bufSize) 6572 defer bw.Flush() 6573 6574 lead, err := br.Peek(4) 6575 if err != nil && err != io.EOF { 6576 return err 6577 } 6578 6579 if bytes.HasPrefix(lead, []byte{'\x00', '\x00', '\xfe', '\xff'}) { 6580 br.Discard(4) 6581 return utf32toUTF8(bw, br, binary.BigEndian) 6582 } 6583 6584 if bytes.HasPrefix(lead, []byte{'\xff', '\xfe', '\x00', '\x00'}) { 6585 br.Discard(4) 6586 return utf32toUTF8(bw, br, binary.LittleEndian) 6587 } 6588 6589 if bytes.HasPrefix(lead, []byte{'\xfe', '\xff'}) { 6590 br.Discard(2) 6591 return utf16toUTF8(bw, br, readBytePairBE) 6592 } 6593 6594 if bytes.HasPrefix(lead, []byte{'\xff', '\xfe'}) { 6595 br.Discard(2) 6596 return utf16toUTF8(bw, br, readBytePairLE) 6597 } 6598 6599 if bytes.HasPrefix(lead, []byte{'\xef', '\xbb', '\xbf'}) { 6600 br.Discard(3) 6601 return handleUTF8(bw, br) 6602 } 6603 6604 return handleUTF8(bw, br) 6605 } 6606 6607 func handleUTF8(w *bufio.Writer, r *bufio.Reader) error { 6608 for { 6609 c, _, err := r.ReadRune() 6610 if c == unicode.ReplacementChar { 6611 return errors.New(`invalid UTF-8 stream`) 6612 } 6613 if err == io.EOF { 6614 return nil 6615 } 6616 if err != nil { 6617 return err 6618 } 6619 6620 if _, err := w.WriteRune(c); err != nil { 6621 return errNoMoreOutput 6622 } 6623 } 6624 } 6625 6626 // readPairFunc narrows source-code lines below 6627 type readPairFunc func(*bufio.Reader) (byte, byte, error) 6628 6629 // utf16toUTF8 handles UTF-16 inputs for func utfate 6630 func utf16toUTF8(w *bufio.Writer, r *bufio.Reader, read2 readPairFunc) error { 6631 for { 6632 a, b, err := read2(r) 6633 if err == io.EOF { 6634 return nil 6635 } 6636 if err != nil { 6637 return err 6638 } 6639 6640 c := rune(256*int(a) + int(b)) 6641 if utf16.IsSurrogate(c) { 6642 a, b, err := read2(r) 6643 if err == io.EOF { 6644 return nil 6645 } 6646 if err != nil { 6647 return err 6648 } 6649 6650 next := rune(256*int(a) + int(b)) 6651 c = utf16.DecodeRune(c, next) 6652 } 6653 6654 if _, err := w.WriteRune(c); err != nil { 6655 return errNoMoreOutput 6656 } 6657 } 6658 } 6659 6660 // readBytePairBE gets you a pair of bytes in big-endian (original) order 6661 func readBytePairBE(br *bufio.Reader) (byte, byte, error) { 6662 a, err := br.ReadByte() 6663 if err != nil { 6664 return a, 0, err 6665 } 6666 6667 b, err := br.ReadByte() 6668 return a, b, err 6669 } 6670 6671 // readBytePairLE gets you a pair of bytes in little-endian order 6672 func readBytePairLE(br *bufio.Reader) (byte, byte, error) { 6673 a, b, err := readBytePairBE(br) 6674 return b, a, err 6675 } 6676 6677 // utf32toUTF8 handles UTF-32 inputs for func utfate 6678 func utf32toUTF8(w *bufio.Writer, r *bufio.Reader, o binary.ByteOrder) error { 6679 var n uint32 6680 for { 6681 err := binary.Read(r, o, &n) 6682 if err == io.EOF { 6683 return nil 6684 } 6685 if err != nil { 6686 return err 6687 } 6688 6689 if _, err := w.WriteRune(rune(n)); err != nil { 6690 return errNoMoreOutput 6691 } 6692 } 6693 }