File: tu/bytes.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "compress/bzip2" 7 "compress/gzip" 8 "crypto/rand" 9 "encoding/base64" 10 "encoding/hex" 11 "hash" 12 "io" 13 "regexp" 14 "unicode/utf8" 15 ) 16 17 const linksExpr = `https?://[A-Za-z0-9+_.:%-]+(/[A-Za-z0-9+_.%/,#?&=-]*)*` 18 19 var linksMatch = regexp.MustCompile(linksExpr) 20 21 // isSymbolASCII helps the `strings` tool do its job quickly 22 var isSymbolASCII = [256]bool{ 23 false, false, false, false, false, false, false, false, 24 false, true, false, false, false, false, false, false, 25 false, false, false, false, false, false, false, false, 26 false, false, false, false, false, false, false, false, 27 true, true, true, true, true, true, true, true, 28 true, true, true, true, true, true, true, true, 29 true, true, true, true, true, true, true, true, 30 true, true, true, true, true, true, true, true, 31 true, true, true, true, true, true, true, true, 32 true, true, true, true, true, true, true, true, 33 true, true, true, true, true, true, true, true, 34 true, true, true, true, true, true, true, true, 35 true, true, true, true, true, true, true, true, 36 true, true, true, true, true, true, true, true, 37 true, true, true, true, true, true, true, true, 38 true, true, true, true, true, true, true, false, 39 } 40 41 // uriUnescapedASCII marks which ASCII bytes don't need escaping 42 var uriUnescapedASCII = [256]bool{ 43 '0': true, '1': true, '2': true, '3': true, '4': true, 44 '5': true, '6': true, '7': true, '8': true, '9': true, 45 46 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 47 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 48 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 49 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 50 'Y': true, 'Z': true, 51 52 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 53 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 54 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 55 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 56 'y': true, 'z': true, 57 58 '-': true, '_': true, '.': true, '~': true, 59 '/': true, 60 } 61 62 // latin1 is a direct byte-translation table from Latin-1 bytes into UTF-8 63 // runes, used by func vulgarize 64 var latin1 = [256]rune{ 65 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 66 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 67 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 68 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 69 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 70 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 71 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 72 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 73 74 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 75 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 76 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 77 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 78 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 79 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 80 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 81 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 82 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 83 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 84 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 85 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 86 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, utf8.RuneError, 87 88 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 89 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 90 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 91 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 92 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 93 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 94 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 95 utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError, 96 97 ' ', '¡', '¢', '£', '¤', '¥', '¦', '§', 98 '¨', '©', 'ª', '«', '¬', '', '®', '¯', 99 '°', '±', '²', '³', '´', 'µ', '¶', '·', 100 '¸', '¹', 'º', '»', '¼', '½', '¾', '¿', 101 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 102 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 103 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×', 104 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 105 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 106 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 107 'ð', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö', '÷', 108 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'þ', 'ÿ', 109 } 110 111 func after(s, what []byte) []byte { 112 if i := bytes.Index(s, what); i >= 0 { 113 return s[i+len(what):] 114 } 115 return nil 116 } 117 118 func afterLast(s, what []byte) []byte { 119 if i := bytes.LastIndex(s, what); i >= 0 { 120 return s[i+len(what):] 121 } 122 return nil 123 } 124 125 func before(s, what []byte) []byte { 126 if i := bytes.Index(s, what); i >= 0 { 127 return s[:i] 128 } 129 return s 130 } 131 132 func beforeLast(s, what []byte) []byte { 133 if i := bytes.LastIndex(s, what); i >= 0 { 134 return s[:i] 135 } 136 return s 137 } 138 139 func bytesTool(w *bufio.Writer, r io.Reader, args []string) error { 140 return handleNamedInputs(args, r, func(path string, r io.Reader) error { 141 _, err := io.Copy(w, r) 142 return adaptWriteError(err) 143 }) 144 } 145 146 // blowTabsLine expands tabs, using the tab-stop count given; non-positive 147 // tab-stops cause it to ignore tabs altogether 148 func blowTabsLine(w *bufio.Writer, line []byte, tabStop int) { 149 if tabStop < 1 { 150 dropTabsLine(w, line) 151 return 152 } 153 154 n := 0 155 156 for _, b := range line { 157 if b == '\t' { 158 writeSpaces(w, tabStop-n%tabStop) 159 n = 0 160 continue 161 } 162 163 w.WriteByte(b) 164 n++ 165 } 166 } 167 168 // dropPunctuationLine ignores all common punctuation symbols 169 func dropPunctuationLine(w *bufio.Writer, line []byte) { 170 for len(line) > 0 { 171 i := bytes.IndexAny(line, `,.<>;:'"/?[]{}|\!@#$%^&*()-+=~`+"`") 172 if i < 0 { 173 w.Write(line) 174 break 175 } 176 177 w.Write(line[:i]) 178 line = line[i+1:] 179 } 180 } 181 182 // dropTabsLine ignores all tabs 183 func dropTabsLine(w *bufio.Writer, line []byte) { 184 for len(line) > 0 { 185 i := bytes.IndexByte(line, '\t') 186 if i < 0 { 187 w.Write(line) 188 break 189 } 190 191 w.Write(line[:i]) 192 line = line[i+1:] 193 } 194 } 195 196 // debase64 decodes base64-encoded text into its corresponding bytes 197 func debase64(w *bufio.Writer, r io.Reader) error { 198 r, err := trimStartDataURI(r) 199 if err != nil { 200 return err 201 } 202 203 dec := base64.NewDecoder(base64.StdEncoding, r) 204 return copyBytes(w, dec) 205 } 206 207 // trimStartDataURI is used by func debase64 208 func trimStartDataURI(r io.Reader) (io.Reader, error) { 209 var buf [64]byte 210 n, err := r.Read(buf[:]) 211 start := buf[:n] 212 213 if bytes.HasPrefix(start, []byte{'d', 'a', 't', 'a', ':'}) { 214 i := bytes.IndexByte(start, ',') 215 if i < 0 { 216 i = n - 1 217 } 218 start = start[i+1:] 219 } 220 221 if err == io.EOF { 222 return bytes.NewReader(start), nil 223 } 224 if err != nil { 225 return nil, err 226 } 227 228 return io.MultiReader(bytes.NewReader(start), r), nil 229 } 230 231 func debzip2(w *bufio.Writer, r io.Reader) error { 232 dec := bzip2.NewReader(r) 233 _, err := io.Copy(w, dec) 234 return adaptWriteError(err) 235 } 236 237 // dedent ignores up to n leading spaces in the byte-slice given 238 func dedent(s []byte, n int) []byte { 239 for len(s) > 0 && n > 0 && s[0] == ' ' { 240 s = s[1:] 241 n-- 242 } 243 return s 244 } 245 246 func degzip(w *bufio.Writer, r io.Reader) error { 247 dec, err := gzip.NewReader(r) 248 if err != nil { 249 return err 250 } 251 _, err = io.Copy(w, dec) 252 return adaptWriteError(err) 253 } 254 255 func gzipBytes(w *bufio.Writer, r io.Reader) error { 256 enc := gzip.NewWriter(w) 257 defer enc.Flush() 258 _, err := io.Copy(enc, r) 259 return adaptWriteError(err) 260 } 261 262 // hasPrefixByte is a simpler, single-byte version of bytes.HasPrefix 263 func hasPrefixByte(b []byte, prefix byte) bool { 264 return len(b) > 0 && b[0] == prefix 265 } 266 267 // hasPrefixFold is a case-insensitive bytes.HasPrefix 268 func hasPrefixFold(s []byte, prefix []byte) bool { 269 n := len(prefix) 270 return len(s) >= n && bytes.EqualFold(s[:n], prefix) 271 } 272 273 // indexSpaces tries to find the first all-spaces slice in the string given; 274 // the indices returned are negative when no spaces are found 275 func indexSpaces(s []byte) (i, j int) { 276 i = bytes.IndexByte(s, ' ') 277 if i < 0 { 278 return -1, -1 279 } 280 281 j = indexNonSpace(s[i:]) 282 if j < 0 { 283 return i, len(s) 284 } 285 286 j += i 287 return i, j 288 } 289 290 func indexNonSpace(s []byte) int { 291 for i, b := range s { 292 if b == ' ' { 293 continue 294 } 295 return i 296 } 297 298 return -1 299 } 300 301 func junk(w *bufio.Writer, r io.Reader, n int) error { 302 var buf [bufferSize]byte 303 304 for n > 0 { 305 got, err := rand.Read(buf[:]) 306 if err != nil { 307 return err 308 } 309 310 if got > n { 311 got = n 312 } 313 n -= got 314 315 _, err = w.Write(buf[:got]) 316 if err = adaptWriteError(err); err != nil { 317 return err 318 } 319 } 320 321 return nil 322 } 323 324 // truncateRunes ensures byte-slices can't exceed the rune-count given, unless 325 // it's negative; negative counts result in empty byte-slices 326 func truncateRunes(s []byte, max int) []byte { 327 if max < 1 { 328 return nil 329 } 330 331 if len(s) < max { 332 return s 333 } 334 335 i := 0 336 for i < len(s) && max > 0 { 337 _, size := utf8.DecodeRune(s[i:]) 338 i += size 339 max-- 340 } 341 342 return s[:i] 343 } 344 345 // reflowLine handles byte-slices for func reflow 346 func reflowLine(w *bufio.Writer, line []byte, max int) { 347 if len(line) < max { 348 w.Write(line) 349 return 350 } 351 352 n := 0 353 spaces := 0 354 355 for len(line) > 0 { 356 i, j := indexSpaces(line) 357 if i < 0 { 358 pre := utf8.RuneCount(line) 359 if n+spaces+pre > max { 360 w.WriteByte('\n') 361 n = 0 362 } 363 364 if !(n == 0 && spaces == 1) { 365 writeSpaces(w, spaces) 366 } 367 w.Write(line) 368 return 369 } 370 371 pre := utf8.RuneCount(line[:i]) 372 nspaces := j - i 373 374 if n+spaces+pre > max { 375 w.WriteByte('\n') 376 n = 0 377 } 378 379 if !(n == 0 && spaces == 1) { 380 writeSpaces(w, spaces) 381 } 382 383 w.Write(line[:i]) 384 line = line[j:] 385 n += spaces + pre 386 spaces = nspaces 387 388 if n >= max { 389 w.WriteByte('\n') 390 n = 0 391 } 392 } 393 } 394 395 func since(s, what []byte) []byte { 396 if i := bytes.Index(s, what); i >= 0 { 397 return s[i:] 398 } 399 return nil 400 } 401 402 func sinceLast(s, what []byte) []byte { 403 if i := bytes.LastIndex(s, what); i >= 0 { 404 return s[i:] 405 } 406 return nil 407 } 408 409 // squeezeChunk handles squeezing substrings between tabs for func squeeze 410 func squeezeChunk(w *bufio.Writer, chunk []byte) { 411 spaces := false 412 chunk = trimSpaces(chunk) 413 414 for len(chunk) > 0 { 415 r, size := utf8.DecodeRune(chunk) 416 417 if r == ' ' { 418 spaces = true 419 chunk = chunk[1:] 420 continue 421 } 422 423 if spaces { 424 w.WriteByte(' ') 425 spaces = false 426 } 427 428 w.WriteRune(r) 429 chunk = chunk[size:] 430 } 431 } 432 433 func stringsTool(w *bufio.Writer, r io.Reader) error { 434 ascii := false 435 var buf [bufferSize]byte 436 437 for { 438 n, err := r.Read(buf[:]) 439 if n < 1 { 440 if err == io.EOF { 441 err = nil 442 } 443 if ascii { 444 if err == nil { 445 return endLine(w) 446 } 447 endLine(w) 448 } 449 return err 450 } 451 452 for _, b := range buf[:n] { 453 if isSymbolASCII[b] { 454 ascii = true 455 w.WriteByte(b) 456 continue 457 } 458 459 if ascii { 460 ascii = false 461 if err := endLine(w); err != nil { 462 return err 463 } 464 } 465 } 466 } 467 } 468 469 func trimLead(line []byte) []byte { 470 for len(line) > 0 && line[0] == ' ' { 471 line = line[1:] 472 } 473 return line 474 } 475 476 func trimLeadWhitespace(line []byte) []byte { 477 for len(line) > 0 { 478 switch line[0] { 479 case ' ', '\t', '\r', '\n': 480 line = line[1:] 481 default: 482 return line 483 } 484 } 485 486 return line 487 } 488 489 func trimSpaces(line []byte) []byte { 490 return trimTrail(trimLead(line)) 491 } 492 493 func trimTrail(line []byte) []byte { 494 for len(line) > 0 && line[len(line)-1] == ' ' { 495 line = line[:len(line)-1] 496 } 497 return line 498 } 499 500 func unHex(w *bufio.Writer, r io.Reader) error { 501 dec := hex.NewDecoder(r) 502 return copyBytes(w, dec) 503 } 504 505 func until(s, what []byte) []byte { 506 if i := bytes.Index(s, what); i >= 0 { 507 return s[:i+len(what)] 508 } 509 return s 510 } 511 512 func untilLast(s, what []byte) []byte { 513 if i := bytes.LastIndex(s, what); i >= 0 { 514 return s[:i+len(what)] 515 } 516 return s 517 } 518 519 // vulgarize turns Latin-1-encoded bytes into UTF-8 runes; the name is taken 520 // from the word `vulgarization`, which refers to the historical evolution 521 // from latin into various modern languages over the centuries 522 func vulgarize(w *bufio.Writer, r io.Reader) error { 523 var buf [16 * kb]byte 524 br := bufio.NewReader(r) 525 526 for { 527 n, err := br.Read(buf[:]) 528 if n < 1 { 529 if err == io.EOF { 530 return nil 531 } 532 return err 533 } 534 535 for _, b := range buf[:n] { 536 r := latin1[b] 537 if r == utf8.RuneError { 538 r = 0 539 } 540 541 _, err := w.WriteRune(r) 542 if err := adaptWriteError(err); err != nil { 543 return err 544 } 545 } 546 } 547 } 548 549 // hashBytes is a general checksum-calculator-runner of input bytes 550 func hashBytes(w io.Writer, r io.Reader, h hash.Hash) error { 551 if err := copyBytes(h, r); err != nil { 552 return err 553 } 554 555 _, err := w.Write(h.Sum(nil)) 556 return adaptWriteError(err) 557 } 558 559 // loopFields is a no-allocation alterative to looping over the results of 560 // func bytes.Fields 561 func loopFields(s []byte, handle func(i int, s []byte) (keepGoing bool)) { 562 // the presence of any tab determines the line as TSV 563 if bytes.IndexByte(s, '\t') >= 0 { 564 loopTSV(s, handle) 565 return 566 } 567 568 // without any tabs, behave more like func bytes.Fields 569 s = trimSpaces(s) 570 571 for i := 0; len(s) > 0; i++ { 572 start, stop := indexSpaces(s) 573 if start < 0 { 574 handle(i, s) 575 break 576 } 577 578 if !handle(i, s[:start]) { 579 break 580 } 581 s = s[stop:] 582 } 583 } 584 585 func loopLinks(s []byte, handle func(i int, s []byte) (keepGoing bool)) { 586 for i := 0; len(s) > 0; i++ { 587 m := linksMatch.FindIndex(s) 588 if m == nil { 589 break 590 } 591 592 end := m[1] 593 if s[end] == '.' { 594 end-- 595 } 596 597 if !handle(i, s[m[0]:end]) { 598 break 599 } 600 s = s[end:] 601 } 602 } 603 604 func loopNumbers(s []byte, handle func(i int, s []byte) (keepGoing bool)) { 605 for i := 0; len(s) > 0; i++ { 606 j, k := indexNumber(s) 607 if j < 0 { 608 break 609 } 610 611 if !handle(i, s[j:k]) { 612 break 613 } 614 s = s[k:] 615 } 616 } 617 618 // loopTSV is a no-allocation alterative to looping over the results of 619 // func strings.Split 620 func loopTSV(s []byte, handle func(i int, s []byte) (keepGoing bool)) { 621 for i := 0; len(s) > 0; i++ { 622 j := bytes.IndexByte(s, '\t') 623 if j < 0 { 624 handle(i, s) 625 break 626 } 627 628 if !handle(i, s[:j]) { 629 break 630 } 631 s = s[j+1:] 632 } 633 } 634 635 func indexNumber(s []byte) (i, j int) { 636 start := bytes.IndexAny(s, `+-0123456789`) 637 if start < 0 { 638 return -1, -1 639 } 640 641 prefix := 0 642 switch s[start] { 643 case '+', '-': 644 prefix = 1 645 } 646 647 c := countLeadingDigits(s[start+prefix:]) 648 if c < 1 { 649 return -1, -1 650 } 651 652 if start+prefix+c >= len(s) { 653 return start + prefix, len(s) 654 } 655 656 if s[start+prefix+c] == '.' { 657 decs := countLeadingDigits(s[start+prefix+c+1:]) 658 if decs < 1 { 659 return start + prefix, start + prefix + c 660 } 661 return start + prefix, start + prefix + c + 1 + decs 662 } 663 664 return start + prefix, start + prefix + c 665 } 666 667 func countLeadingDigits(s []byte) int { 668 n := 0 669 670 for _, b := range s { 671 switch b { 672 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 673 n++ 674 default: 675 return n 676 } 677 } 678 679 return n 680 } 681 682 // unstyledWidth counts how many runes aren't part of ANSI-style sequences in 683 // the string given 684 func unstyledWidth(s []byte) int { 685 // n counts the number of runes which aren't part of ANSI-style sequences 686 n := 0 687 688 for len(s) > 0 { 689 i := bytes.Index(s, []byte{'\x1b', '['}) 690 if i < 0 { 691 // no more ANSI-style sequences 692 return n + utf8.RuneCount(s) 693 } 694 695 // update width-counter, and skip right past the ANSI-style intro 696 n += utf8.RuneCount(s[:i]) 697 s = s[i:] 698 699 i = indexEndANSI(s) 700 if i < 0 { 701 // no matching end of ANSI-sequence, so ignore rest of the string 702 return n 703 } 704 705 // skip right past the next `m` byte 706 s = s[i+1:] 707 } 708 709 return n 710 } 711 712 func indexEndANSI(s []byte) int { 713 for i := 0; i < len(s); i++ { 714 b := s[i] 715 716 if 'A' <= b && b <= 'H' { 717 return i 718 } 719 720 switch b { 721 case 'J', 'K', 'S', 'T', 'm': 722 return i 723 } 724 } 725 726 // no match found 727 return -1 728 } File: tu/coby.go 1 package main 2 3 import ( 4 "bufio" 5 "errors" 6 "io" 7 "io/fs" 8 "os" 9 "path/filepath" 10 "runtime" 11 "strconv" 12 "sync" 13 ) 14 15 // cobyEvent has what the output-reporting task needs to show the results of a 16 // task which has just completed, perhaps unsuccessfully 17 type cobyEvent struct { 18 // Index points to the task's entry in the results-slice 19 Index int 20 21 // Err is the completed task's error, or lack of 22 Err error 23 } 24 25 func coby(w *bufio.Writer, r io.Reader, args []string) error { 26 // header is the first output line 27 var header = []string{ 28 `name`, 29 `bytes`, 30 `runes`, 31 `lines`, 32 `lf`, 33 `crlf`, 34 `spaces`, 35 `tabs`, 36 `trails`, 37 `nulls`, 38 `fulls`, 39 `highs`, 40 } 41 42 // show first/heading line right away, to let users know things are 43 // happening 44 for i, s := range header { 45 if i > 0 { 46 w.WriteByte('\t') 47 } 48 w.WriteString(s) 49 } 50 // assume an error means later stages/apps in a pipe had enough input and 51 // quit successfully, so quit successfully too 52 if err := endLine(w); err != nil { 53 return err 54 } 55 56 // names has all filepaths given, ignoring repetitions 57 names, ok := findAllFiles(unique(args)) 58 if !ok { 59 return errGeneric 60 } 61 if len(names) == 0 { 62 names = []string{`-`} 63 } 64 65 // results has all its items updated concurrently: this is safe to do, 66 // as the tasks update values in separate indices of this slice, and 67 // when an item is ready to show, its values aren't changing anymore 68 results := make([]cobyStats, len(names)) 69 70 events := make(chan cobyEvent) 71 go cobyHandleInputs(names, r, results, events) 72 return cobyHandleOutput(w, results, events) 73 } 74 75 // cobyHandleInputs launches all the tasks which do the actual work, limiting 76 // how many inputs are being worked on at the same time 77 func cobyHandleInputs(names []string, r io.Reader, results []cobyStats, events chan cobyEvent) { 78 // allow output-reporter task to end, and thus the app 79 defer close(events) 80 81 // permissions limits how many worker tasks can be active at the same 82 // time: when given many filepaths to work on, rate-limiting avoids 83 // a massive number of concurrent tasks which read and process input 84 permissions := make(chan struct{}, runtime.NumCPU()) 85 defer close(permissions) 86 87 var inputs sync.WaitGroup 88 for i := range names { 89 // wait until some concurrency-room is available 90 permissions <- struct{}{} 91 inputs.Add(1) 92 93 go func(i int) { 94 defer inputs.Done() 95 err := cobyHandleInput(&results[i], names[i], r) 96 events <- cobyEvent{i, err} 97 <-permissions 98 }(i) 99 } 100 101 // wait for all inputs, before closing the `events` channel 102 inputs.Wait() 103 } 104 105 // cobyHandleInput handles each work-item for func cobyHandleInputs 106 func cobyHandleInput(res *cobyStats, path string, r io.Reader) error { 107 res.name = path 108 109 if path == `-` { 110 return res.updateStats(r) 111 } 112 113 f, err := os.Open(path) 114 if err != nil { 115 res.result = resultError 116 // on windows, file-not-found error messages may mention `CreateFile`, 117 // even when trying to open files in read-only mode 118 return errors.New(`can't open file named ` + path) 119 } 120 defer f.Close() 121 122 return res.updateStats(f) 123 } 124 125 // cobyHandleOutput asynchronously updates output as results are known, whether 126 // it's errors or successful results; returns whether it succeeded, which 127 // means no errors happened 128 func cobyHandleOutput(w io.Writer, results []cobyStats, events chan cobyEvent) error { 129 ok := true 130 bw := bufio.NewWriter(w) 131 defer bw.Flush() 132 133 bw.Flush() 134 135 // keep track of which tasks are over, so that on each event all leading 136 // results which are ready are shown: all of this ensures prompt output 137 // updates as soon as results come in, while keeping the original order 138 // of the names/filepaths given 139 resultsLeft := results 140 141 for v := range events { 142 if v.Err != nil { 143 ok = false 144 bw.Flush() 145 showError(v.Err) 146 147 // stay in the current loop, in case this failure was keeping 148 // previous successes from showing up 149 } 150 151 n := countLeadingReady(resultsLeft) 152 153 for _, res := range resultsLeft[:n] { 154 if err := cobyShowResult(bw, res); err != nil { 155 // assume later stages/apps in a pipe had enough input and 156 // quit successfully, so quit successfully too 157 return nil 158 } 159 } 160 resultsLeft = resultsLeft[n:] 161 162 // flush output-buffer only if anything new was shown 163 if n > 0 { 164 bw.Flush() 165 } 166 } 167 168 if ok { 169 return nil 170 } 171 return errGeneric 172 } 173 174 // cobyShowResult does what it says 175 func cobyShowResult(w *bufio.Writer, res cobyStats) error { 176 if res.result == resultError { 177 return nil 178 } 179 180 var buf [64]byte 181 w.WriteString(res.name) 182 w.Write([]byte{'\t'}) 183 w.Write(strconv.AppendUint(buf[:0], uint64(res.bytes), 10)) 184 w.Write([]byte{'\t'}) 185 w.Write(strconv.AppendUint(buf[:0], uint64(res.runes), 10)) 186 w.Write([]byte{'\t'}) 187 w.Write(strconv.AppendUint(buf[:0], uint64(res.lines), 10)) 188 w.Write([]byte{'\t'}) 189 w.Write(strconv.AppendUint(buf[:0], uint64(res.lf), 10)) 190 w.Write([]byte{'\t'}) 191 w.Write(strconv.AppendUint(buf[:0], uint64(res.crlf), 10)) 192 w.Write([]byte{'\t'}) 193 w.Write(strconv.AppendUint(buf[:0], uint64(res.spaces), 10)) 194 w.Write([]byte{'\t'}) 195 w.Write(strconv.AppendUint(buf[:0], uint64(res.tabs), 10)) 196 w.Write([]byte{'\t'}) 197 w.Write(strconv.AppendUint(buf[:0], uint64(res.trailing), 10)) 198 w.Write([]byte{'\t'}) 199 w.Write(strconv.AppendUint(buf[:0], uint64(res.nulls), 10)) 200 w.Write([]byte{'\t'}) 201 w.Write(strconv.AppendUint(buf[:0], uint64(res.fulls), 10)) 202 w.Write([]byte{'\t'}) 203 w.Write(strconv.AppendUint(buf[:0], uint64(res.highs), 10)) 204 _, err := w.Write([]byte{'\n'}) 205 return err 206 } 207 208 // unique ensures items only appear once in the result, keeping the original 209 // slice unchanged 210 func unique(src []string) []string { 211 var unique []string 212 got := make(map[string]struct{}) 213 for _, s := range src { 214 if _, ok := got[s]; ok { 215 continue 216 } 217 unique = append(unique, s) 218 got[s] = struct{}{} 219 } 220 return unique 221 } 222 223 // findAllFiles does what it says, given a mix of file/folder paths, finding 224 // all files recursively in the case of folders 225 func findAllFiles(paths []string) (found []string, ok bool) { 226 var unique []string 227 got := make(map[string]struct{}) 228 ok = true 229 230 for _, root := range paths { 231 // a dash means standard input 232 if root == `-` { 233 if _, ok := got[root]; ok { 234 continue 235 } 236 237 unique = append(unique, root) 238 got[root] = struct{}{} 239 continue 240 } 241 242 _, err := os.Stat(root) 243 if os.IsNotExist(err) { 244 ok = false 245 // on windows, file-not-found error messages may mention `CreateFile`, 246 // even when trying to open files in read-only mode 247 err := errors.New(`can't find file/folder named ` + root) 248 showError(err) 249 continue 250 } 251 252 err = filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { 253 if err != nil { 254 return err 255 } 256 257 if d.IsDir() { 258 return nil 259 } 260 261 if _, ok := got[path]; ok { 262 return nil 263 } 264 265 unique = append(unique, path) 266 got[path] = struct{}{} 267 return nil 268 }) 269 270 if err != nil { 271 ok = false 272 showError(err) 273 } 274 } 275 276 return unique, ok 277 } 278 279 // isZero enables branchless-counting, when xor-compared bytes are used 280 // as indices for it 281 var isZero = [256]byte{1} 282 283 // counter makes it easy to change the int-size of almost all counters 284 type counter int 285 286 // statResult constrains possible result-states/values in type stats 287 type statResult int 288 289 const ( 290 // resultPending is the default not-yet-ready result-status 291 resultPending = statResult(0) 292 293 // resultError signals result should show as an error, instead of data 294 resultError = statResult(1) 295 296 // resultSuccess means result can be shown 297 resultSuccess = statResult(2) 298 ) 299 300 // cobyStats has all the size-stats for some input, as well as a way to 301 // skip showing results, in case of an error such as `file not found` 302 type cobyStats struct { 303 // bytes counts all bytes read 304 bytes int 305 306 // lines counts lines, and is 0 only when the byte-count is also 0 307 lines counter 308 309 // runes counts utf-8 sequences, each of which can use up to 4 bytes and 310 // is usually a complete symbol: `emoji` country-flags are commonly-used 311 // counter-examples, as these `symbols` need 2 runes, using 8 bytes each 312 runes counter 313 314 // maxWidth is maximum byte-width of lines, excluding carriage-returns 315 // and/or line-feeds 316 maxWidth counter 317 318 // nulls counts all-bits-off bytes 319 nulls counter 320 321 // fulls counts all-bits-on bytes 322 fulls counter 323 324 // highs counts bytes with their `top` (highest-order) bit on 325 highs counter 326 327 // spaces counts ASCII spaces 328 spaces counter 329 330 // tabs counts ASCII tabs 331 tabs counter 332 333 // trailing counts lines with trailing spaces in them 334 trailing counter 335 336 // lf counts ASCII line-feeds as their own byte-values: this means its 337 // value will always be at least the same as field `crlf` 338 lf counter 339 340 // crlf counts ASCII CRLF byte-pairs 341 crlf counter 342 343 // name is the filepath of the file/source these stats are about 344 name string 345 346 // results keeps track of whether results are valid and/or ready 347 result statResult 348 } 349 350 // updateStats does what it says, reading everything from a reader 351 func (res *cobyStats) updateStats(r io.Reader) error { 352 err := res.updateUsing(r) 353 if err == io.EOF { 354 err = nil 355 } 356 357 if err == nil { 358 res.result = resultSuccess 359 } else { 360 res.result = resultError 361 } 362 return err 363 } 364 365 // updateUsing helps func updateStats do its job 366 func (res *cobyStats) updateUsing(r io.Reader) error { 367 var width counter 368 var highRun int 369 var prev1, prev2 byte 370 var buf [16 * 1024]byte 371 var tallies [256]uint64 372 373 for { 374 n, err := r.Read(buf[:]) 375 if n < 1 { 376 if err == io.EOF { 377 res.tabs = counter(tallies['\t']) 378 res.spaces = counter(tallies[' ']) 379 res.lf = counter(tallies['\n']) 380 res.nulls = counter(tallies[0]) 381 res.fulls = counter(tallies[255]) 382 for i := 128; i < 256; i++ { 383 res.highs += counter(tallies[i]) 384 } 385 return res.handleEnd(width, prev1, highRun) 386 } 387 return err 388 } 389 390 res.bytes += n 391 chunk := buf[:n] 392 393 for _, b := range chunk { 394 // count values without branching, because it's fun 395 tallies[b]++ 396 397 // handle non-ASCII runes, assuming input is valid UTF-8 398 if b >= 128 { 399 if highRun < 3 { 400 highRun++ 401 } else { 402 highRun = 0 403 res.runes++ 404 width++ 405 } 406 407 prev2 = prev1 408 prev1 = b 409 continue 410 } 411 412 // handle line-feeds 413 if b == '\n' { 414 res.lines++ 415 416 crlf := count(prev1, '\r') 417 res.crlf += crlf 418 419 // count lines with trailing spaces, whether these end with 420 // a CRLF byte-pair or just a line-feed byte 421 res.trailing += count(prev1, ' ') 422 res.trailing += crlf & count(prev2, ' ') 423 424 // exclude any CR from the current line's width-count 425 width -= crlf 426 if res.maxWidth < width { 427 res.maxWidth = width 428 } 429 430 prev2 = prev1 431 prev1 = b 432 433 res.runes++ 434 highRun = 0 435 width = 0 436 continue 437 } 438 439 prev2 = prev1 440 prev1 = b 441 442 res.runes++ 443 highRun = 0 444 width++ 445 } 446 } 447 } 448 449 // handleEnd fixes/finalizes stats when input data end; this func is only 450 // meant to be used by func updateStats, since it takes some of the latter's 451 // local variables 452 func (res *cobyStats) handleEnd(width counter, prev1 byte, highRun int) error { 453 if prev1 == ' ' { 454 res.trailing++ 455 } 456 457 if res.maxWidth < width { 458 res.maxWidth = width 459 } 460 461 // avoid reporting 0 lines with a non-0 byte-count: this is unlike the 462 // standard cmd-line tool `wc` 463 if res.bytes > 0 && prev1 != '\n' { 464 res.lines++ 465 } 466 467 if highRun > 0 { 468 res.runes++ 469 } 470 return nil 471 } 472 473 // count checks if 2 bytes are the same, returning either 0 or 1, which can 474 // be added directly/branchlessly to totals 475 func count(x, y byte) counter { 476 return counter(isZero[x^y]) 477 } 478 479 // countLeadingReady finds how many items are ready to show at the start of a 480 // results-slice, which ensures output matches the original item-order 481 func countLeadingReady(values []cobyStats) int { 482 for i, v := range values { 483 if v.result == resultPending { 484 return i 485 } 486 } 487 return len(values) 488 } File: tu/coby_test.go 1 package main 2 3 import ( 4 "strings" 5 "testing" 6 ) 7 8 func TestCount(t *testing.T) { 9 for x := 0; x < 256; x++ { 10 for y := 0; y < 256; y++ { 11 var exp counter 12 if x == y { 13 exp = 1 14 } 15 16 if got := count(byte(x), byte(y)); got != exp { 17 t.Fatalf(`%d, %d: expected %v, but got %v`, x, y, exp, got) 18 return 19 } 20 } 21 } 22 } 23 24 func TestCountLeadingReady(t *testing.T) { 25 for size := 0; size <= 20; size++ { 26 for exp := 0; exp < size; exp++ { 27 values := make([]cobyStats, size) 28 for i := 0; i < exp; i++ { 29 v := resultSuccess 30 if i%2 == 1 { 31 v = resultError 32 } 33 values[i].result = v 34 } 35 36 if got := countLeadingReady(values); got != exp { 37 const fs = `size %d: expected %d, instead of %d` 38 t.Fatalf(fs, size, exp, got) 39 } 40 } 41 } 42 } 43 44 func TestStats(t *testing.T) { 45 var tests = []struct { 46 Input string 47 Expected cobyStats 48 }{ 49 { 50 ``, 51 cobyStats{}, 52 }, 53 { 54 `abc`, 55 cobyStats{lines: 1, runes: 3, maxWidth: 3}, 56 }, 57 { 58 "abc\tdef\r\n", 59 cobyStats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1}, 60 }, 61 { 62 "abc\tdef\r\n", 63 cobyStats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1}, 64 }, 65 { 66 "abc\tdef \r\n123\t456 789 ", 67 cobyStats{ 68 lines: 2, runes: 23, maxWidth: 13, 69 spaces: 4, tabs: 2, trailing: 2, lf: 1, crlf: 1, 70 }, 71 }, 72 } 73 74 for _, tc := range tests { 75 t.Run(tc.Input, func(t *testing.T) { 76 var got cobyStats 77 err := got.updateStats(strings.NewReader(tc.Input)) 78 if err != nil { 79 t.Error(err) 80 return 81 } 82 83 tc.Expected.bytes = len(tc.Input) 84 tc.Expected.result = resultSuccess 85 if got != tc.Expected { 86 t.Fatalf("expected\n%#v,\ngot\n%#v", tc.Expected, got) 87 return 88 } 89 }) 90 } 91 } File: tu/files.go 1 package main 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "io/fs" 9 "os" 10 "path/filepath" 11 "runtime" 12 "sort" 13 "strings" 14 "sync" 15 ) 16 17 // walkFolders is a general multi-folder recursion handler, which standardizes 18 // such behavior across multiple filesystem-related tools 19 func walkFolders(top []string, fn func(path string, info fs.DirEntry) error) error { 20 if len(top) == 0 { 21 return filepath.WalkDir(`.`, func(p string, d fs.DirEntry, e error) error { 22 if e != nil { 23 return e 24 } 25 return fn(p, d) 26 }) 27 } 28 29 for _, path := range top { 30 err := filepath.WalkDir(path, func(p string, d fs.DirEntry, e error) error { 31 if e != nil { 32 return e 33 } 34 return fn(p, d) 35 }) 36 37 if err != nil { 38 return err 39 } 40 } 41 42 return nil 43 } 44 45 // allFiles finds all files recursively, from the top-level folders given 46 func allFiles(w *bufio.Writer, r io.Reader, names []string) error { 47 return walkFolders(names, func(path string, info fs.DirEntry) error { 48 if info.IsDir() { 49 return nil 50 } 51 w.WriteString(unixSlashes(path)) 52 return endLine(w) 53 }) 54 } 55 56 // allFolders finds all folders recursively, from the top-level folders given 57 func allFolders(w *bufio.Writer, r io.Reader, names []string) error { 58 return walkFolders(names, func(path string, info fs.DirEntry) error { 59 if !info.IsDir() { 60 return nil 61 } 62 63 s := unixSlashes(path) 64 w.WriteString(s) 65 // ensure folder paths end with a slash 66 if !strings.HasSuffix(s, `/`) { 67 w.WriteString(`/`) 68 } 69 return endLine(w) 70 }) 71 } 72 73 // bigFiles finds all files which have at least the number of bytes given 74 // recursively, from all the top-folders given 75 func bigFiles(w *bufio.Writer, r io.Reader, args []string) error { 76 if len(args) > 2 { 77 return errors.New(`only up to 2 args are allowed`) 78 } 79 80 n, err := demandIntegerArg(args) 81 if err != nil { 82 return err 83 } 84 min := int64(n) 85 86 return walkFolders(args[1:], func(path string, info fs.DirEntry) error { 87 if info.IsDir() { 88 return nil 89 } 90 91 st, err := info.Info() 92 if err != nil { 93 return err 94 } 95 if st.Size() < min { 96 return nil 97 } 98 99 w.WriteString(unixSlashes(path)) 100 return endLine(w) 101 }) 102 } 103 104 // emptyFiles finds all empty files recursively, from all the top-folders given 105 func emptyFiles(w *bufio.Writer, r io.Reader, names []string) error { 106 return walkFolders(names, func(path string, info fs.DirEntry) error { 107 if info.IsDir() { 108 return nil 109 } 110 111 st, err := info.Info() 112 if err != nil { 113 return err 114 } 115 if st.Size() > 0 { 116 return nil 117 } 118 119 w.WriteString(unixSlashes(path)) 120 return endLine(w) 121 }) 122 } 123 124 // emptyFolders finds all empty folders recursively, from all the top-folders 125 // given 126 func emptyFolders(w *bufio.Writer, r io.Reader, names []string) error { 127 return walkFolders(names, func(path string, info fs.DirEntry) error { 128 if !info.IsDir() { 129 return nil 130 } 131 132 ent, err := os.ReadDir(path) 133 if err != nil { 134 return err 135 } 136 137 if len(ent) > 0 { 138 return nil 139 } 140 141 w.WriteString(unixSlashes(path)) 142 return endLine(w) 143 }) 144 } 145 146 // groupByFileSize does what it says, emitting paragraphs, where each first 147 // line is the file-size, followed by all file-paths of that size 148 func groupByFileSize(w *bufio.Writer, r io.Reader, names []string) error { 149 bySize := make(map[int64][]string) 150 151 err := walkFolders(names, func(path string, info fs.DirEntry) error { 152 if info.IsDir() { 153 return nil 154 } 155 156 st, err := info.Info() 157 if err != nil { 158 return err 159 } 160 161 n := st.Size() 162 bySize[n] = append(bySize[n], path) 163 return nil 164 }) 165 166 if err != nil { 167 return err 168 } 169 170 // reverse-sorted keys 171 sizes := make([]int64, 0, len(bySize)) 172 for k := range bySize { 173 sizes = append(sizes, k) 174 } 175 sort.Slice(sizes, func(i, j int) bool { 176 return sizes[i] > sizes[j] 177 }) 178 179 for i, sz := range sizes { 180 if i > 0 { 181 err := endLine(w) 182 if err != nil { 183 return err 184 } 185 } 186 187 writeInt64(w, sz) 188 if err := endLine(w); err != nil { 189 return err 190 } 191 192 names := bySize[sz] 193 sort.Strings(names) 194 195 for _, s := range names { 196 w.WriteString(unixSlashes(s)) 197 if err := endLine(w); err != nil { 198 return err 199 } 200 } 201 } 202 203 return nil 204 } 205 206 // smallFiles finds all files which have fewer than the number of bytes given 207 // recursively, from all the top-folders given 208 func smallFiles(w *bufio.Writer, r io.Reader, args []string) error { 209 if len(args) > 2 { 210 return errors.New(`only up to 2 args are allowed`) 211 } 212 213 n, err := demandIntegerArg(args) 214 if err != nil { 215 return err 216 } 217 maxp1 := int64(n) 218 219 return walkFolders(args[1:], func(path string, info fs.DirEntry) error { 220 if info.IsDir() { 221 return nil 222 } 223 224 st, err := info.Info() 225 if err != nil { 226 return err 227 } 228 if st.Size() >= maxp1 { 229 return nil 230 } 231 232 w.WriteString(unixSlashes(path)) 233 return endLine(w) 234 }) 235 } 236 237 // topFiles finds all top-level files, from the folders given 238 func topFiles(w *bufio.Writer, r io.Reader, names []string) error { 239 for _, path := range names { 240 entries, err := os.ReadDir(path) 241 if err != nil { 242 return err 243 } 244 245 // ensure path is usable for unix-style full-path output 246 path = strings.TrimSuffix(path, `/`) 247 path = strings.TrimSuffix(path, `\`) 248 path = unixSlashes(path) 249 250 for _, e := range entries { 251 if e.IsDir() { 252 continue 253 } 254 255 w.WriteString(path) 256 w.WriteString(`/`) 257 w.WriteString(e.Name()) 258 err := endLine(w) 259 if err != nil { 260 return err 261 } 262 } 263 } 264 265 return nil 266 } 267 268 // topFolders finds all top-level folders, from the folders given 269 func topFolders(w *bufio.Writer, r io.Reader, names []string) error { 270 for _, path := range names { 271 entries, err := os.ReadDir(path) 272 if err != nil { 273 return err 274 } 275 276 // ensure path is usable for unix-style full-path output 277 path = strings.TrimSuffix(path, `/`) 278 path = strings.TrimSuffix(path, `\`) 279 path = unixSlashes(path) 280 281 for _, e := range entries { 282 if !e.IsDir() { 283 continue 284 } 285 286 w.WriteString(path) 287 w.WriteString(`/`) 288 w.WriteString(e.Name()) 289 err := endLine(w) 290 if err != nil { 291 return err 292 } 293 } 294 } 295 296 return nil 297 } 298 299 // sourceStats keeps track of all sorts of filetype-related source-code info 300 type sourceStats struct { 301 // files counts how many files were used for these stats 302 files int 303 304 // bytes counts bytes for all files 305 bytes int64 306 307 // lines counts all lines 308 lines int 309 310 // code counts non-empty non-comment lines 311 code int 312 313 // empty counts empty lines 314 empty int 315 316 // comments counts lines part of multi-line comments, as well 317 // as lines which are just single-line comments 318 comments int 319 } 320 321 // sourceSettings is used to deal with source-code-related stats, and holds 322 // all sorts of looked-up info about filetypes, as part of an internal table 323 type sourceSettings struct { 324 // kind is the full language name, as opposed to its file extension 325 kind string 326 327 // lineComment is the string which starts a comment lasting for the rest 328 // of its line 329 lineComment string 330 331 // startComment is the string which starts a potentially multi-line comment 332 startComment string 333 334 // endComment is what ends a potentially multi-line comment 335 endComment string 336 } 337 338 // update handles a source-code input line, and must be given a pointer to a 339 // boolean which keeps track of multi-line-comment status across input lines 340 func (stats *sourceStats) update(line string, ss sourceSettings, mlComment *bool) error { 341 // count all lines in general 342 stats.lines++ 343 344 // handle she-bang-style first lines for `commentable` file types 345 if stats.lines == 1 && ss.lineComment != `` && strings.HasPrefix(line, `#!`) { 346 stats.comments++ 347 return nil 348 } 349 350 // handle rest of multi-line comments 351 if *mlComment { 352 stats.comments++ 353 if strings.Contains(line, ss.endComment) { 354 *mlComment = false 355 } 356 return nil 357 } 358 359 tr := strings.TrimSpace(line) 360 361 // count empty(ish) lines 362 if tr == `` { 363 stats.empty++ 364 return nil 365 } 366 367 // handle lines with just single-line comments on them 368 if ss.lineComment != `` && strings.HasPrefix(tr, ss.lineComment) { 369 stats.comments++ 370 return nil 371 } 372 373 // handle the start of multi-line comments, and perhaps their 374 // end, when that's on the same line 375 if ss.startComment != `` && strings.HasPrefix(tr, ss.startComment) { 376 stats.comments++ 377 *mlComment = true 378 if strings.Contains(tr, ss.endComment) { 379 *mlComment = false 380 } 381 return nil 382 } 383 384 // count regular lines of code 385 stats.code++ 386 return nil 387 } 388 389 // add updates existing source-stats results with those from the next file 390 func (ss sourceStats) add(v sourceStats) sourceStats { 391 ss.files++ 392 ss.bytes += v.bytes 393 ss.lines += v.lines 394 ss.code += v.code 395 ss.empty += v.empty 396 ss.comments += v.comments 397 return ss 398 } 399 400 var ext2set = map[string]sourceSettings{ 401 `.awk`: {`awk`, `#`, ``, ``}, 402 `.bash`: {`shell`, `#`, ``, ``}, 403 `.c`: {`c/c++`, `//`, `/*`, `*/`}, 404 `.cc`: {`c/c++`, `//`, `/*`, `*/`}, 405 `.cpp`: {`c/c++`, `//`, `/*`, `*/`}, 406 `.css`: {`css`, ``, `/*`, `*/`}, 407 `.csv`: {`csv`, ``, ``, ``}, 408 `.cxx`: {`c/c++`, `//`, `/*`, `*/`}, 409 `.go`: {`go`, `//`, `/*`, `*/`}, 410 `.h`: {`c/c++`, `//`, `/*`, `*/`}, 411 `.hh`: {`c/c++`, `//`, `/*`, `*/`}, 412 `.hpp`: {`c/c++`, `//`, `/*`, `*/`}, 413 `.htm`: {`html`, ``, `<!--`, `-->`}, 414 `.html`: {`html`, ``, `<!--`, `-->`}, 415 `.hxx`: {`c/c++`, `//`, `/*`, `*/`}, 416 `.java`: {`java`, `//`, `/*`, `*/`}, 417 `.js`: {`javascript`, `//`, `/*`, `*/`}, 418 `.json`: {`json`, `//`, ``, ``}, 419 `.lua`: {`lua`, `--`, ``, ``}, 420 `.md`: {`markdown`, ``, ``, ``}, 421 `.py`: {`python`, `#`, ``, ``}, 422 `.pyw`: {`python`, `#`, ``, ``}, 423 `.rb`: {`ruby`, `#`, ``, ``}, 424 `.rs`: {`rust`, `//`, `/*`, `*/`}, 425 `.sh`: {`shell`, `#`, ``, ``}, 426 `.svg`: {`svg`, ``, `<!--`, `-->`}, 427 `.text`: {`text`, ``, ``, ``}, 428 `.ts`: {`typescript`, `//`, `/*`, `*/`}, 429 `.tsv`: {`tsv`, ``, ``, ``}, 430 `.txt`: {`text`, ``, ``, ``}, 431 } 432 433 // locTool directly implements the `loc` tool 434 func locTool(w *bufio.Writer, r io.Reader, names []string) error { 435 if len(names) == 0 { 436 names = []string{`.`} 437 } 438 439 var mut sync.Mutex 440 res := make(map[string]sourceStats) 441 442 max := runtime.NumCPU() 443 permissions := make(chan struct{}, max) 444 defer close(permissions) 445 446 err := walkFolders(names, func(path string, info fs.DirEntry) error { 447 ext := filepath.Ext(path) 448 ext = strings.ToLower(ext) 449 ss, ok := ext2set[ext] 450 if !ok { 451 return nil 452 } 453 454 permissions <- struct{}{} 455 defer func() { <-permissions }() 456 stats, err := locFile(path) 457 if err != nil { 458 return err 459 } 460 461 // update stats for the filetype 462 mut.Lock() 463 defer mut.Unlock() 464 res[ss.kind] = res[ss.kind].add(stats) 465 return nil 466 }) 467 468 if err != nil { 469 return err 470 } 471 472 keys := make([]string, 0, len(res)) 473 for k := range res { 474 keys = append(keys, k) 475 } 476 477 // reverse-sort keys by total lines, from most to least 478 sort.SliceStable(keys, func(i, j int) bool { 479 return res[keys[i]].lines > res[keys[j]].lines 480 }) 481 482 // emit results as TSV lines 483 w.WriteString("type\tfiles\tlines\tcode\tcomments\tempty\tbytes") 484 if err := endLine(w); err != nil { 485 return err 486 } 487 488 for _, k := range keys { 489 v := res[k] 490 fmt.Fprintf(w, "%s\t%d\t%d\t%d\t%d\t%d\t%d", 491 k, v.files, v.lines, v.code, v.comments, v.empty, v.bytes) 492 493 if err := endLine(w); err != nil { 494 return err 495 } 496 } 497 498 return nil 499 } 500 501 func locFile(path string) (sourceStats, error) { 502 var stats sourceStats 503 504 // find filetype, avoiding files without a supported extension 505 ext := filepath.Ext(path) 506 ext = strings.ToLower(ext) 507 ss, ok := ext2set[ext] 508 if !ok { 509 return stats, nil 510 } 511 512 // get the file-size ready to use in the result stats 513 st, err := os.Stat(path) 514 if err != nil { 515 return stats, err 516 } 517 518 // gather source-code-related stats 519 mlComment := false 520 stats.bytes = st.Size() 521 err = handleNamedInput(path, nil, func(r io.Reader) error { 522 return loopLinesString(r, func(line string) error { 523 return stats.update(line, ss, &mlComment) 524 }) 525 }) 526 return stats, err 527 } File: tu/filetypes.go 1 package main 2 3 import "bytes" 4 5 // all the MIME types used/recognized in this package 6 const ( 7 aiff = `audio/aiff` 8 au = `audio/basic` 9 avi = `video/avi` 10 avif = `image/avif` 11 bmp = `image/x-bmp` 12 caf = `audio/x-caf` 13 cur = `image/vnd.microsoft.icon` 14 css = `text/css` 15 csv_ = `text/csv` 16 djvu = `image/x-djvu` 17 elf = `application/x-elf` 18 exe = `application/vnd.microsoft.portable-executable` 19 flac = `audio/x-flac` 20 gif = `image/gif` 21 gz = `application/gzip` 22 heic = `image/heic` 23 htm = `text/html` 24 html = `text/html` 25 ico = `image/x-icon` 26 iso = `application/octet-stream` 27 jpg = `image/jpeg` 28 jpeg = `image/jpeg` 29 js = `application/javascript` 30 json = `application/json` 31 m4a = `audio/aac` 32 m4v = `video/x-m4v` 33 mid = `audio/midi` 34 mov = `video/quicktime` 35 mp4 = `video/mp4` 36 mp3 = `audio/mpeg` 37 mpg = `video/mpeg` 38 ogg = `audio/ogg` 39 opus = `audio/opus` 40 pdf = `application/pdf` 41 png = `image/png` 42 ps = `application/postscript` 43 psd = `image/vnd.adobe.photoshop` 44 rtf = `application/rtf` 45 sqlite3 = `application/x-sqlite3` 46 svg = `image/svg+xml` 47 text = `text/plain` 48 tiff = `image/tiff` 49 tsv = `text/tsv` 50 wasm = `application/wasm` 51 wav = `audio/x-wav` 52 webp = `image/webp` 53 webm = `video/webm` 54 xml = `application/xml` 55 zip = `application/zip` 56 zst = `application/zstd` 57 ) 58 59 // formatDescriptor ties a file-header pattern to its data-format type 60 type formatDescriptor struct { 61 Header []byte 62 Type string 63 } 64 65 // can be anything: ensure this value differs from all other literal bytes 66 // in the generic-headers table: failing that, its value could cause subtle 67 // type-misdetection bugs 68 const cba = 0xFD // 253, which is > 127, the highest-valued ascii symbol 69 70 // dash-streamed m4a format 71 var m4aDash = []byte{ 72 cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 73 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', 74 } 75 76 // format markers with leading wildcards, which should be checked before the 77 // normal ones: this is to prevent mismatches with the latter types, even 78 // though you can make probabilistic arguments which suggest these mismatches 79 // should be very unlikely in practice 80 var specialHeaders = []formatDescriptor{ 81 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a}, 82 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a}, 83 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4}, 84 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4}, 85 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v}, 86 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov}, 87 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic}, 88 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif}, 89 {m4aDash, m4a}, 90 } 91 92 // sqlite3 database format 93 var sqlite3db = []byte{ 94 'S', 'Q', 'L', 'i', 't', 'e', ' ', 95 'f', 'o', 'r', 'm', 'a', 't', ' ', '3', 96 000, 97 } 98 99 // windows-variant bitmap file-header, which is followed by a byte-counter for 100 // the 40-byte infoheader which follows that 101 var winbmp = []byte{ 102 'B', 'M', cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, 40, 103 } 104 105 // deja-vu document format 106 var djv = []byte{ 107 'A', 'T', '&', 'T', 'F', 'O', 'R', 'M', cba, cba, cba, cba, 'D', 'J', 'V', 108 } 109 110 var doctypeHTML = []byte{ 111 '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E', ' ', 'h', 't', 'm', 'l', '>', 112 } 113 114 // hdrDispatch groups format-description-groups by their first byte, thus 115 // shortening total lookups for some data header: notice how the `ftyp` data 116 // formats aren't handled here, since these can start with any byte, instead 117 // of the literal value of the any-byte markers they use 118 var hdrDispatch = [256][]formatDescriptor{ 119 { 120 {[]byte{000, 000, 001, 0xBA}, mpg}, 121 {[]byte{000, 000, 001, 0xB3}, mpg}, 122 {[]byte{000, 000, 001, 000}, ico}, 123 {[]byte{000, 000, 002, 000}, cur}, 124 {[]byte{000, 'a', 's', 'm'}, wasm}, 125 }, // 0 126 nil, // 1 127 nil, // 2 128 nil, // 3 129 nil, // 4 130 nil, // 5 131 nil, // 6 132 nil, // 7 133 nil, // 8 134 nil, // 9 135 nil, // 10 136 nil, // 11 137 nil, // 12 138 nil, // 13 139 nil, // 14 140 nil, // 15 141 nil, // 16 142 nil, // 17 143 nil, // 18 144 nil, // 19 145 nil, // 20 146 nil, // 21 147 nil, // 22 148 nil, // 23 149 nil, // 24 150 nil, // 25 151 { 152 {[]byte{0x1A, 0x45, 0xDF, 0xA3}, webm}, 153 }, // 26 154 nil, // 27 155 nil, // 28 156 nil, // 29 157 nil, // 30 158 { 159 // {[]byte{0x1F, 0x8B, 0x08, 0x08}, gz}, 160 {[]byte{0x1F, 0x8B, 0x08}, gz}, 161 }, // 31 162 nil, // 32 163 nil, // 33 ! 164 nil, // 34 " 165 { 166 {[]byte{'#', '!', ' '}, text}, 167 {[]byte{'#', '!', '/'}, text}, 168 }, // 35 # 169 nil, // 36 $ 170 { 171 {[]byte{'%', 'P', 'D', 'F'}, pdf}, 172 {[]byte{'%', '!', 'P', 'S'}, ps}, 173 }, // 37 % 174 nil, // 38 & 175 nil, // 39 ' 176 { 177 {[]byte{0x28, 0xB5, 0x2F, 0xFD}, zst}, 178 }, // 40 ( 179 nil, // 41 ) 180 nil, // 42 * 181 nil, // 43 + 182 nil, // 44 , 183 nil, // 45 - 184 { 185 {[]byte{'.', 's', 'n', 'd'}, au}, 186 }, // 46 . 187 nil, // 47 / 188 nil, // 48 0 189 nil, // 49 1 190 nil, // 50 2 191 nil, // 51 3 192 nil, // 52 4 193 nil, // 53 5 194 nil, // 54 6 195 nil, // 55 7 196 { 197 {[]byte{'8', 'B', 'P', 'S'}, psd}, 198 }, // 56 8 199 nil, // 57 9 200 nil, // 58 : 201 nil, // 59 ; 202 { 203 // func checkDoc is better for these, since it's case-insensitive 204 {doctypeHTML, html}, 205 {[]byte{'<', 's', 'v', 'g'}, svg}, 206 {[]byte{'<', 'h', 't', 'm', 'l', '>'}, html}, 207 {[]byte{'<', 'h', 'e', 'a', 'd', '>'}, html}, 208 {[]byte{'<', 'b', 'o', 'd', 'y', '>'}, html}, 209 {[]byte{'<', '?', 'x', 'm', 'l'}, xml}, 210 }, // 60 < 211 nil, // 61 = 212 nil, // 62 > 213 nil, // 63 ? 214 nil, // 64 @ 215 { 216 {djv, djvu}, 217 }, // 65 A 218 { 219 {winbmp, bmp}, 220 }, // 66 B 221 nil, // 67 C 222 nil, // 68 D 223 nil, // 69 E 224 { 225 {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff}, 226 {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff}, 227 }, // 70 F 228 { 229 {[]byte{'G', 'I', 'F', '8', '7', 'a'}, gif}, 230 {[]byte{'G', 'I', 'F', '8', '9', 'a'}, gif}, 231 }, // 71 G 232 nil, // 72 H 233 { 234 {[]byte{'I', 'D', '3', 2}, mp3}, // ID3-format metadata 235 {[]byte{'I', 'D', '3', 3}, mp3}, // ID3-format metadata 236 {[]byte{'I', 'D', '3', 4}, mp3}, // ID3-format metadata 237 {[]byte{'I', 'I', '*', 000}, tiff}, 238 }, // 73 I 239 nil, // 74 J 240 nil, // 75 K 241 nil, // 76 L 242 { 243 {[]byte{'M', 'M', 000, '*'}, tiff}, 244 {[]byte{'M', 'T', 'h', 'd'}, mid}, 245 {[]byte{'M', 'Z', cba, 000, cba, 000}, exe}, 246 // {[]byte{'M', 'Z', 0x90, 000, 003, 000}, exe}, 247 // {[]byte{'M', 'Z', 0x78, 000, 001, 000}, exe}, 248 // {[]byte{'M', 'Z', 'P', 000, 002, 000}, exe}, 249 }, // 77 M 250 nil, // 78 N 251 { 252 {[]byte{'O', 'g', 'g', 'S'}, ogg}, 253 }, // 79 O 254 { 255 {[]byte{'P', 'K', 003, 004}, zip}, 256 }, // 80 P 257 nil, // 81 Q 258 { 259 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp}, 260 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav}, 261 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi}, 262 }, // 82 R 263 { 264 {sqlite3db, sqlite3}, 265 }, // 83 S 266 nil, // 84 T 267 nil, // 85 U 268 nil, // 86 V 269 nil, // 87 W 270 nil, // 88 X 271 nil, // 89 Y 272 nil, // 90 Z 273 nil, // 91 [ 274 nil, // 92 \ 275 nil, // 93 ] 276 nil, // 94 ^ 277 nil, // 95 _ 278 nil, // 96 ` 279 nil, // 97 a 280 nil, // 98 b 281 { 282 {[]byte{'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf}, 283 }, // 99 c 284 nil, // 100 d 285 nil, // 101 e 286 { 287 {[]byte{'f', 'L', 'a', 'C'}, flac}, 288 }, // 102 f 289 nil, // 103 g 290 nil, // 104 h 291 nil, // 105 i 292 nil, // 106 j 293 nil, // 107 k 294 nil, // 108 l 295 nil, // 109 m 296 nil, // 110 n 297 nil, // 111 o 298 nil, // 112 p 299 nil, // 113 q 300 nil, // 114 r 301 nil, // 115 s 302 nil, // 116 t 303 nil, // 117 u 304 nil, // 118 v 305 nil, // 119 w 306 nil, // 120 x 307 nil, // 121 y 308 nil, // 122 z 309 { 310 {[]byte{'{', '\\', 'r', 't', 'f'}, rtf}, 311 }, // 123 { 312 nil, // 124 | 313 nil, // 125 } 314 nil, // 126 315 { 316 {[]byte{127, 'E', 'L', 'F'}, elf}, 317 }, // 127 318 nil, // 128 319 nil, // 129 320 nil, // 130 321 nil, // 131 322 nil, // 132 323 nil, // 133 324 nil, // 134 325 nil, // 135 326 nil, // 136 327 { 328 {[]byte{0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png}, 329 }, // 137 330 nil, // 138 331 nil, // 139 332 nil, // 140 333 nil, // 141 334 nil, // 142 335 nil, // 143 336 nil, // 144 337 nil, // 145 338 nil, // 146 339 nil, // 147 340 nil, // 148 341 nil, // 149 342 nil, // 150 343 nil, // 151 344 nil, // 152 345 nil, // 153 346 nil, // 154 347 nil, // 155 348 nil, // 156 349 nil, // 157 350 nil, // 158 351 nil, // 159 352 nil, // 160 353 nil, // 161 354 nil, // 162 355 nil, // 163 356 nil, // 164 357 nil, // 165 358 nil, // 166 359 nil, // 167 360 nil, // 168 361 nil, // 169 362 nil, // 170 363 nil, // 171 364 nil, // 172 365 nil, // 173 366 nil, // 174 367 nil, // 175 368 nil, // 176 369 nil, // 177 370 nil, // 178 371 nil, // 179 372 nil, // 180 373 nil, // 181 374 nil, // 182 375 nil, // 183 376 nil, // 184 377 nil, // 185 378 nil, // 186 379 nil, // 187 380 nil, // 188 381 nil, // 189 382 nil, // 190 383 nil, // 191 384 nil, // 192 385 nil, // 193 386 nil, // 194 387 nil, // 195 388 nil, // 196 389 nil, // 197 390 nil, // 198 391 nil, // 199 392 nil, // 200 393 nil, // 201 394 nil, // 202 395 nil, // 203 396 nil, // 204 397 nil, // 205 398 nil, // 206 399 nil, // 207 400 nil, // 208 401 nil, // 209 402 nil, // 210 403 nil, // 211 404 nil, // 212 405 nil, // 213 406 nil, // 214 407 nil, // 215 408 nil, // 216 409 nil, // 217 410 nil, // 218 411 nil, // 219 412 nil, // 220 413 nil, // 221 414 nil, // 222 415 nil, // 223 416 nil, // 224 417 nil, // 225 418 nil, // 226 419 nil, // 227 420 nil, // 228 421 nil, // 229 422 nil, // 230 423 nil, // 231 424 nil, // 232 425 nil, // 233 426 nil, // 234 427 nil, // 235 428 nil, // 236 429 nil, // 237 430 nil, // 238 431 nil, // 239 432 nil, // 240 433 nil, // 241 434 nil, // 242 435 nil, // 243 436 nil, // 244 437 nil, // 245 438 nil, // 246 439 nil, // 247 440 nil, // 248 441 nil, // 249 442 nil, // 250 443 nil, // 251 444 nil, // 252 445 nil, // 253 446 nil, // 254 447 { 448 {[]byte{0xFF, 0xD8, 0xFF}, jpg}, 449 {[]byte{0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3}, 450 {[]byte{0xFF, 0xFB}, mp3}, 451 }, // 255 452 } 453 454 // detectMIME guesses the first appropriate MIME type from the first few 455 // data bytes given: 24 bytes are enough to detect all supported types 456 func detectMIME(b []byte) (mimeType string, ok bool) { 457 t, ok := detectType(b) 458 if ok { 459 return t, true 460 } 461 return ``, false 462 } 463 464 // detectType guesses the first appropriate file type for the data given: 465 // here the type is a a filename extension without the leading dot 466 func detectType(b []byte) (dotlessExt string, ok bool) { 467 // empty data, so there's no way to detect anything 468 if len(b) == 0 { 469 return ``, false 470 } 471 472 // check for plain-text web-document formats case-insensitively 473 kind, ok := checkDoc(b) 474 if ok { 475 return kind, true 476 } 477 478 // check data formats which allow any byte at the start 479 kind, ok = checkSpecial(b) 480 if ok { 481 return kind, true 482 } 483 484 // check all other supported data formats 485 headers := hdrDispatch[b[0]] 486 for _, t := range headers { 487 if hasPrefixPattern(b[1:], t.Header[1:], cba) { 488 return t.Type, true 489 } 490 } 491 492 // unrecognized data format 493 return ``, false 494 } 495 496 // checkDoc tries to guess if the bytes given are the start of HTML, SVG, 497 // XML, or JSON data 498 func checkDoc(b []byte) (kind string, ok bool) { 499 // ignore leading whitespaces 500 b = trimLeadWhitespace(b) 501 502 // can't detect anything with empty data 503 if len(b) == 0 { 504 return ``, false 505 } 506 507 // handle HTML/SVG/XML documents 508 if hasPrefixByte(b, '<') { 509 if hasPrefixFold(b, []byte{'<', '?', 'x', 'm', 'l'}) { 510 if bytes.Contains(b, []byte{'<', 's', 'v', 'g'}) { 511 return svg, true 512 } 513 return xml, true 514 } 515 516 headers := hdrDispatch['<'] 517 for _, v := range headers { 518 if hasPrefixFold(b, v.Header) { 519 return v.Type, true 520 } 521 } 522 return ``, false 523 } 524 525 // handle JSON with top-level arrays 526 if hasPrefixByte(b, '[') { 527 // match [", or [[, or [{, ignoring spaces between 528 b = trimLeadWhitespace(b[1:]) 529 if len(b) > 0 { 530 switch b[0] { 531 case '"', '[', '{': 532 return json, true 533 } 534 } 535 return ``, false 536 } 537 538 // handle JSON with top-level objects 539 if hasPrefixByte(b, '{') { 540 // match {", ignoring spaces between: after {, the only valid syntax 541 // which can follow is the opening quote for the expected object-key 542 b = trimLeadWhitespace(b[1:]) 543 if hasPrefixByte(b, '"') { 544 return json, true 545 } 546 return ``, false 547 } 548 549 // checking for a quoted string, any of the JSON keywords, or even a 550 // number seems too ambiguous to declare the data valid JSON 551 552 // no web-document format detected 553 return ``, false 554 } 555 556 // checkSpecial handles special file-format headers, which should be checked 557 // before the normal file-type headers, since the first-byte dispatch algo 558 // doesn't work for these 559 func checkSpecial(b []byte) (kind string, ok bool) { 560 if len(b) >= 8 && bytes.Index(b, []byte{'f', 't', 'y', 'p'}) == 4 { 561 for _, t := range specialHeaders { 562 if hasPrefixPattern(b[4:], t.Header[4:], cba) { 563 return t.Type, true 564 } 565 } 566 } 567 return ``, false 568 } 569 570 // hasPrefixPattern works like bytes.HasPrefix, except it allows for a special 571 // value to signal any byte is allowed on specific spots 572 func hasPrefixPattern(what []byte, pat []byte, wildcard byte) bool { 573 // if the data are shorter than the pattern to match, there's no match 574 if len(what) < len(pat) { 575 return false 576 } 577 578 // use a slice which ensures the pattern length is never exceeded 579 what = what[:len(pat)] 580 581 for i, x := range what { 582 y := pat[i] 583 if x != y && y != wildcard { 584 return false 585 } 586 } 587 return true 588 } File: tu/filetypes_test.go 1 package main 2 3 import ( 4 "bytes" 5 "strconv" 6 "testing" 7 ) 8 9 func TestData(t *testing.T) { 10 t.Run(`could-be-anything constant`, func(t *testing.T) { 11 if len(hdrDispatch[cba]) != 0 { 12 const fs = `chosen constant %d collides with header entries` 13 t.Fatalf(fs, cba) 14 } 15 }) 16 17 for i, v := range hdrDispatch { 18 t.Run(`dispatch @ `+strconv.Itoa(i), func(t *testing.T) { 19 const fs = `expected leading byte to be %d, but got %d instead` 20 for _, e := range v { 21 if e.Header[0] != byte(i) { 22 t.Fatalf(fs, i, e.Header[0]) 23 return 24 } 25 } 26 }) 27 } 28 } 29 30 func TestCheckDoc(t *testing.T) { 31 const ( 32 lf = "\n" 33 crlf = "\r\n" 34 tab = "\t" 35 xmlIntro = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` 36 ) 37 38 tests := []struct { 39 Input string 40 Expected string 41 }{ 42 {``, ``}, 43 {`{"abc":123}`, json}, 44 {`[` + lf + ` {"abc":123}`, json}, 45 {`[` + lf + ` {"abc":123}`, json}, 46 {`[` + crlf + tab + `{"abc":123}`, json}, 47 48 {``, ``}, 49 {`<?xml?>`, xml}, 50 {`<?xml?><records>`, xml}, 51 {`<?xml?>` + lf + `<records>`, xml}, 52 {`<?xml?><svg>`, svg}, 53 {`<?xml?>` + crlf + `<svg>`, svg}, 54 {xmlIntro + lf + `<svg`, svg}, 55 {xmlIntro + crlf + `<svg`, svg}, 56 } 57 58 for _, tc := range tests { 59 t.Run(tc.Input, func(t *testing.T) { 60 res, _ := checkDoc([]byte(tc.Input)) 61 if res != tc.Expected { 62 t.Fatalf(`got %v, expected %v instead`, res, tc.Expected) 63 } 64 }) 65 } 66 } 67 68 func TestHasPrefixPattern(t *testing.T) { 69 var ( 70 data = []byte{ 71 'R', 'I', 'F', 'F', 0xf0, 0xba, 0xc8, 0x2b, 'A', 'V', 'I', ' ', 72 } 73 pat = []byte{ 74 'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' ', 75 } 76 ) 77 78 if !hasPrefixPattern(data, pat, cba) { 79 t.Fatal(`wildcard pattern not working`) 80 } 81 } 82 83 func BenchmarkHasPrefixMatch(b *testing.B) { 84 var ( 85 data = []byte{ 86 'R', 'I', 'F', 'F', 0xf0, 0xba, 0xc8, 0x2b, 'A', 'V', 'I', ' ', 87 } 88 pat = []byte{ 89 'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' ', 90 } 91 ) 92 93 b.ReportAllocs() 94 b.ResetTimer() 95 96 for i := 0; i < b.N; i++ { 97 if !bytes.HasPrefix(data, pat) { 98 b.Fatal(`pattern was specifically chosen to match, but didn't`) 99 } 100 } 101 } 102 103 func BenchmarkHasPrefixPatternMatch(b *testing.B) { 104 var ( 105 data = []byte{ 106 'R', 'I', 'F', 'F', 0xf0, 0xba, 0xc8, 0x2b, 'A', 'V', 'I', ' ', 107 } 108 pat = []byte{ 109 'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' ', 110 } 111 ) 112 113 b.ReportAllocs() 114 b.ResetTimer() 115 116 for i := 0; i < b.N; i++ { 117 if !hasPrefixPattern(data, pat, cba) { 118 b.Fatal(`pattern was specifically chosen to match, but didn't`) 119 } 120 } 121 } File: tu/fractions.go 1 package main 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "go/ast" 8 "go/parser" 9 "go/token" 10 "io" 11 "math" 12 "math/big" 13 "strconv" 14 "strings" 15 ) 16 17 func fractions(w *bufio.Writer, r io.Reader, args []string) error { 18 for _, src := range args { 19 expr, err := parser.ParseExpr(src) 20 if err != nil { 21 return err 22 } 23 24 res, err := fracEval{}.eval(expr) 25 if err != nil { 26 return err 27 } 28 29 w.WriteString(res.String()) 30 if err := endLine(w); err != nil { 31 return err 32 } 33 } 34 return nil 35 } 36 37 // fracConstants can't use integer values, as some are too big to fit 64 bits 38 var fracConstants = map[string]string{ 39 `k`: `1000`, 40 `kilo`: `1000`, 41 `mega`: `1000000`, 42 `giga`: `1000000000`, 43 `tera`: `1000000000000`, 44 `exa`: `1000000000000000`, 45 46 `kb`: `1024`, 47 `mb`: `1048576`, 48 `gb`: `1073741824`, 49 `tb`: `1099511627776`, 50 `eb`: `1125899906842624`, 51 52 `kib`: `1024`, 53 `mib`: `1048576`, 54 `gib`: `1073741824`, 55 `tib`: `1099511627776`, 56 `eib`: `1125899906842624`, 57 58 `mol`: `602214076000000000000000`, 59 `mole`: `602214076000000000000000`, 60 } 61 62 var fracUnary = map[string]func(x *big.Rat) (*big.Rat, error){ 63 `+`: func(x *big.Rat) (*big.Rat, error) { return x, nil }, 64 `-`: func(x *big.Rat) (*big.Rat, error) { return x.Neg(x), nil }, 65 `abs`: func(x *big.Rat) (*big.Rat, error) { return x.Abs(x), nil }, 66 `bits`: fracBits, 67 `cube`: fracCube, 68 `d`: fracDenom, 69 `den`: fracDenom, 70 `denom`: fracDenom, 71 `denominator`: fracDenom, 72 `digits`: fracDigits, 73 `fac`: fracFactorial, 74 `fact`: fracFactorial, 75 `factorial`: fracFactorial, 76 `inv`: func(x *big.Rat) (*big.Rat, error) { return x.Inv(x), nil }, 77 `inverse`: func(x *big.Rat) (*big.Rat, error) { return x.Inv(x), nil }, 78 `invert`: func(x *big.Rat) (*big.Rat, error) { return x.Inv(x), nil }, 79 `n`: fracNumer, 80 `neg`: func(x *big.Rat) (*big.Rat, error) { return x.Neg(x), nil }, 81 `negate`: func(x *big.Rat) (*big.Rat, error) { return x.Neg(x), nil }, 82 `num`: fracNumer, 83 `numer`: fracNumer, 84 `numerator`: fracNumer, 85 `pow2`: fracPower2, 86 `power2`: fracPower2, 87 `pow10`: fracPower10, 88 `power10`: fracPower10, 89 `sgn`: fracSign, 90 `sign`: fracSign, 91 `sqr`: func(x *big.Rat) (*big.Rat, error) { return x.Mul(x, x), nil }, 92 `square`: func(x *big.Rat) (*big.Rat, error) { return x.Mul(x, x), nil }, 93 } 94 95 var fracBinary = map[string]func(x, y *big.Rat) (*big.Rat, error){ 96 `+`: func(x, y *big.Rat) (*big.Rat, error) { return x.Add(x, y), nil }, 97 `-`: func(x, y *big.Rat) (*big.Rat, error) { return x.Sub(x, y), nil }, 98 `*`: func(x, y *big.Rat) (*big.Rat, error) { return x.Mul(x, y), nil }, 99 `/`: fracDiv, 100 `%`: fracRem, 101 102 `c`: fracChoose, 103 `choose`: fracChoose, 104 `comb`: fracChoose, 105 `combin`: fracChoose, 106 `combinations`: fracChoose, 107 `p`: fracPermute, 108 `per`: fracPermute, 109 `perm`: fracPermute, 110 `permut`: fracPermute, 111 `permuts`: fracPermute, 112 `permutations`: fracPermute, 113 `pow`: fracPower, 114 `power`: fracPower, 115 } 116 117 func fracNumer(x *big.Rat) (*big.Rat, error) { 118 return big.NewRat(0, 1).SetFrac(x.Num(), big.NewInt(1)), nil 119 } 120 121 func fracDenom(x *big.Rat) (*big.Rat, error) { 122 return big.NewRat(0, 1).SetFrac(x.Denom(), big.NewInt(1)), nil 123 } 124 125 func fracBits(x *big.Rat) (*big.Rat, error) { 126 if !x.IsInt() || x.Sign() < 0 { 127 return nil, fmt.Errorf(`only non-negative integers are allowed`) 128 } 129 if x.Sign() == 0 { 130 return big.NewRat(1, 1), nil 131 } 132 return big.NewRat(int64(x.Num().BitLen()), 1), nil 133 } 134 135 func fracCube(x *big.Rat) (*big.Rat, error) { 136 cube := big.NewRat(1, 1) 137 cube = cube.Mul(cube, x) 138 cube = cube.Mul(cube, x) 139 cube = cube.Mul(cube, x) 140 return cube, nil 141 } 142 143 func fracDigits(x *big.Rat) (*big.Rat, error) { 144 if !x.IsInt() || x.Sign() < 0 { 145 return nil, fmt.Errorf(`only non-negative integers are allowed`) 146 } 147 if x.Sign() == 0 { 148 return big.NewRat(1, 1), nil 149 } 150 bits := int64(x.Num().BitLen()) 151 d := int64(math.Ceil(math.Log10(math.Exp2(float64(bits))))) 152 return big.NewRat(d, 1), nil 153 } 154 155 func fracDiv(x, y *big.Rat) (*big.Rat, error) { 156 if y.Sign() == 0 { 157 return nil, errors.New(`can't divide by zero`) 158 } 159 return x.Quo(x, y), nil 160 } 161 162 func fracRem(x, y *big.Rat) (*big.Rat, error) { 163 if y.Sign() == 0 { 164 return nil, errors.New(`can't divide by zero`) 165 } 166 167 if !x.IsInt() || !y.IsInt() { 168 return nil, errors.New(`remainder only supports integers`) 169 } 170 171 rem := big.NewInt(1) 172 rem = rem.Rem(x.Num(), y.Num()) 173 return big.NewRat(0, 1).SetFrac(rem, big.NewInt(1)), nil 174 } 175 176 func fracSign(x *big.Rat) (*big.Rat, error) { 177 return big.NewRat(int64(x.Sign()), 1), nil 178 } 179 180 func fracFactorial(x *big.Rat) (*big.Rat, error) { 181 if x.Sign() < 0 { 182 return nil, errors.New(`factorial isn't defined for negative numbers`) 183 } 184 185 if !x.IsInt() { 186 return nil, errors.New(`factorial is only defined for integer numbers`) 187 } 188 189 return fracIntFactorial(x.Num()), nil 190 } 191 192 func fracIntFactorial(n *big.Int) *big.Rat { 193 one := big.NewInt(1) 194 fac := big.NewInt(1) 195 for n.Sign() > 0 { 196 fac = fac.Mul(fac, n) 197 n = n.Sub(n, one) 198 } 199 return big.NewRat(0, 1).SetFrac(fac, big.NewInt(1)) 200 } 201 202 func fracChoose(n, k *big.Rat) (*big.Rat, error) { 203 v, err := fracPermute(n, k) 204 if err != nil { 205 return v, err 206 } 207 208 f, err := fracFactorial(k) 209 if err != nil { 210 return nil, err 211 } 212 213 if f.Sign() == 0 { 214 return nil, fmt.Errorf(`factorial mistakenly resulted in 0`) 215 } 216 return v.Quo(v, f), nil 217 } 218 219 func fracPermute(n, k *big.Rat) (*big.Rat, error) { 220 if !n.IsInt() || n.Sign() < 0 || !k.IsInt() || k.Sign() < 0 { 221 return nil, fmt.Errorf(`expected 2 non-negative integers`) 222 } 223 224 one := big.NewRat(1, 1) 225 perm := big.NewRat(1, 1) 226 // end = n - k + 1 227 end := big.NewRat(1, 1).Set(n) 228 end = end.Sub(end, k) 229 end = end.Add(end, one) 230 231 for v := big.NewRat(1, 1).Set(n); v.Cmp(end) >= 0; v = v.Sub(v, one) { 232 perm = perm.Mul(perm, v) 233 } 234 return perm, nil 235 } 236 237 func fracPower2(x *big.Rat) (*big.Rat, error) { 238 return fracPower(big.NewRat(2, 1), x) 239 } 240 241 func fracPower10(x *big.Rat) (*big.Rat, error) { 242 return fracPower(big.NewRat(10, 1), x) 243 } 244 245 func fracPower(x *big.Rat, y *big.Rat) (*big.Rat, error) { 246 if !y.IsInt() { 247 return nil, errors.New(`only integer exponents are supported`) 248 } 249 return fracIntPower(x, y.Num()) 250 } 251 252 func fracIntPower(x *big.Rat, y *big.Int) (*big.Rat, error) { 253 if x.Sign() == 0 && y.Sign() == 0 { 254 return nil, errors.New(`zero to the zero power isn't defined`) 255 } 256 257 if x.Sign() == 0 { 258 return big.NewRat(0, 1), nil 259 } 260 if y.Sign() == 0 { 261 return big.NewRat(1, 1), nil 262 } 263 264 return powFractionInPlace(x, y) 265 } 266 267 // powFractionInPlace calculates values in place: since bignums are pointers 268 // to their representations, this means the original values will change 269 func powFractionInPlace(x *big.Rat, y *big.Int) (*big.Rat, error) { 270 xsign := x.Sign() 271 ysign := y.Sign() 272 273 // 0 ** 0 is undefined 274 if xsign == 0 && ysign == 0 { 275 const msg = `0 to the 0 doesn't make sense` 276 return nil, errors.New(msg) 277 } 278 279 // otherwise x ** 0 is 1 280 if ysign == 0 { 281 return big.NewRat(1, 1), nil 282 } 283 284 // x ** (y < 0) is like (1/x) ** -y 285 if ysign < 0 { 286 inv := big.NewRat(1, 1).Inv(x) 287 neg := big.NewInt(1).Neg(y) 288 return powFractionInPlace(inv, neg) 289 } 290 291 // 0 ** (y > 0) is 0 292 if xsign == 0 { 293 return x, nil 294 } 295 296 // x ** 0 is 0 297 if ysign == 0 { 298 return big.NewRat(0, 1), nil 299 } 300 301 // x ** 1 is x 302 if y.IsInt64() && y.Int64() == 1 { 303 return x, nil 304 } 305 306 return _powFractionRec(x, y), nil 307 } 308 309 func _powFractionRec(x *big.Rat, y *big.Int) *big.Rat { 310 switch y.Sign() { 311 case -1: 312 return big.NewRat(0, 1) 313 case 0: 314 return big.NewRat(1, 1) 315 case 1: 316 if y.IsInt64() && y.Int64() == 1 { 317 return x 318 } 319 } 320 321 yhalf := big.NewInt(0) 322 oddrem := big.NewInt(0) 323 yhalf.QuoRem(y, big.NewInt(2), oddrem) 324 325 if oddrem.Sign() == 0 { 326 xsquare := big.NewRat(0, 1) 327 return _powFractionRec(xsquare.Mul(x, x), yhalf) 328 } 329 prevpow := _powFractionRec(x, y.Sub(y, big.NewInt(1))) 330 return prevpow.Mul(prevpow, x) 331 } 332 333 // fracEval is just an excuse to group all sorts of funcs to eval fractions 334 type fracEval struct{} 335 336 func (fe fracEval) eval(expr ast.Expr) (*big.Rat, error) { 337 switch expr := expr.(type) { 338 case *ast.BasicLit: 339 v, err := fe.literal(expr) 340 return v, fe.context(expr.Value, err) 341 case *ast.ParenExpr: 342 return fe.eval(expr.X) 343 case *ast.Ident: 344 v, err := fe.constant(expr) 345 return v, fe.context(expr.Name, err) 346 case *ast.UnaryExpr: 347 v, err := fe.unary(expr) 348 return v, fe.context(expr.Op.String(), err) 349 case *ast.BinaryExpr: 350 v, err := fe.binary(expr) 351 return v, fe.context(expr.Op.String(), err) 352 case *ast.SelectorExpr: 353 v, err := fe.selector(expr) 354 return v, fe.context(expr.Sel.Name, err) 355 case *ast.CallExpr: 356 return fe.call(expr) 357 default: 358 return nil, fmt.Errorf(`unsupported expression type %T`, expr) 359 } 360 } 361 362 func (fe fracEval) context(s string, err error) error { 363 if err == nil { 364 return nil 365 } 366 return fmt.Errorf(`%s: %w`, s, err) 367 } 368 369 func (fe fracEval) normalize(s string) string { 370 s = strings.TrimSpace(s) 371 s = strings.ReplaceAll(s, `_`, ``) 372 s = strings.ToLower(s) 373 return s 374 } 375 376 func (fe fracEval) constant(expr *ast.Ident) (*big.Rat, error) { 377 k := fe.normalize(expr.Name) 378 if len(k) == 0 { 379 return nil, errors.New(`empty-name value`) 380 } 381 382 if s, ok := fracConstants[k]; ok { 383 if num, ok := big.NewInt(0).SetString(s, 10); ok { 384 return big.NewRat(0, 1).SetFrac(num, big.NewInt(1)), nil 385 } 386 return nil, errors.New(`integer literal not set (?)`) 387 } 388 return nil, errors.New(`value named ` + k + ` not found`) 389 } 390 391 func (fe fracEval) literal(expr *ast.BasicLit) (*big.Rat, error) { 392 switch expr.Kind { 393 case token.INT: 394 s := strings.ReplaceAll(expr.Value, `_`, ``) 395 if num, ok := big.NewInt(0).SetString(s, 10); ok { 396 return big.NewRat(0, 1).SetFrac(num, big.NewInt(1)), nil 397 } 398 return nil, errors.New(`integer literal not set (?)`) 399 400 case token.FLOAT: 401 return fe.parse(expr.Value) 402 403 case token.IMAG: 404 return fe.parse(expr.Value) 405 406 default: 407 return nil, fmt.Errorf(`unknown literal kind %s`, expr.Kind.String()) 408 } 409 } 410 411 func (fe fracEval) parse(s string) (*big.Rat, error) { 412 s = strings.TrimSuffix(s, `i`) 413 s = strings.ReplaceAll(s, `_`, ``) 414 den := math.Pow10(countDecimals(s)) 415 s = strings.Replace(s, `.`, ``, 1) 416 f, err := strconv.ParseFloat(s, 64) 417 if err != nil { 418 return nil, err 419 } 420 return big.NewRat(int64(f), int64(den)), nil 421 } 422 423 func (fe fracEval) unary(expr *ast.UnaryExpr) (*big.Rat, error) { 424 return fe.call1(expr.Op.String(), expr.X) 425 } 426 427 func (fe fracEval) binary(expr *ast.BinaryExpr) (*big.Rat, error) { 428 return fe.call2(expr.Op.String(), expr.X, expr.Y) 429 } 430 431 func (fe fracEval) selector(expr *ast.SelectorExpr) (*big.Rat, error) { 432 return fe.call1(expr.Sel.Name, expr.X) 433 } 434 435 func (fe fracEval) call(expr *ast.CallExpr) (*big.Rat, error) { 436 switch len(expr.Args) { 437 case 1: 438 switch f := expr.Fun.(type) { 439 case *ast.Ident: 440 v, err := fe.call1(f.Name, expr.Args[0]) 441 return v, fe.context(f.Name, err) 442 case *ast.SelectorExpr: 443 v, err := fe.call2(f.Sel.Name, f.X, expr.Args[0]) 444 return v, fe.context(f.Sel.Name, err) 445 default: 446 return nil, fmt.Errorf(`unsupported expression type %T`, f) 447 } 448 449 case 2: 450 switch f := expr.Fun.(type) { 451 case *ast.Ident: 452 v, err := fe.call2(f.Name, expr.Args[0], expr.Args[1]) 453 return v, fe.context(f.Name, err) 454 default: 455 return nil, fmt.Errorf(`unsupported expression type %T`, f) 456 } 457 458 default: 459 var name string 460 switch f := expr.Fun.(type) { 461 case *ast.Ident: 462 name = f.Name 463 case *ast.SelectorExpr: 464 name = f.Sel.Name 465 default: 466 return nil, fmt.Errorf(`unsupported expression type %T`, f) 467 } 468 469 const msg = `only functions with 1 or 2 arguments are supported` 470 return nil, fe.context(name, errors.New(msg)) 471 } 472 } 473 474 func (fe fracEval) call1(name string, arg ast.Expr) (*big.Rat, error) { 475 x, err := fe.eval(arg) 476 if err != nil { 477 return nil, err 478 } 479 480 k := fe.normalize(name) 481 if len(k) == 0 { 482 return nil, errors.New(`empty-name function`) 483 } 484 485 if f, ok := fracUnary[k]; ok { 486 return f(x) 487 } 488 return nil, errors.New(`function named ` + k + ` not found`) 489 } 490 491 func (fe fracEval) call2(name string, arg1, arg2 ast.Expr) (*big.Rat, error) { 492 x, err := fe.eval(arg1) 493 if err != nil { 494 return nil, err 495 } 496 y, err := fe.eval(arg2) 497 if err != nil { 498 return nil, err 499 } 500 501 k := fe.normalize(name) 502 if len(k) == 0 { 503 return nil, errors.New(`empty-name function`) 504 } 505 506 if f, ok := fracBinary[k]; ok { 507 return f(x, y) 508 } 509 return nil, errors.New(`function named ` + k + ` not found`) 510 } File: tu/go.mod 1 module tt 2 3 go 1.18 File: tu/id3pic.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "encoding/binary" 7 "errors" 8 "io" 9 "mime" 10 ) 11 12 // id3Picture isolates the thumbnail bytes from the id3/mp3 stream given 13 func id3Picture(w *bufio.Writer, r io.Reader) error { 14 _, err := pickID3Picture(w, r) 15 if err == io.EOF { 16 return errors.New(`no thumbnail found`) 17 } 18 return err 19 } 20 21 // pickID3Picture isolates the thumbnail bytes from the id3/mp3 stream given, 22 // also returning the detected MIME-type 23 func pickID3Picture(w io.Writer, r io.Reader) (mimetype string, err error) { 24 // http://www.unixgods.org/Ruby/ID3/docs/ID3_comparison.html 25 26 br := bufio.NewReader(r) 27 28 for { 29 b, err := br.ReadByte() 30 if err != nil { 31 return ``, err 32 } 33 34 switch b { 35 case 'A': 36 // check for an `APIC` section 37 ok, err := matchBytes(br, []byte{'P', 'I', 'C'}) 38 if err != nil { 39 return ``, err 40 } 41 if ok { 42 return handleAPIC(w, br) 43 } 44 45 case 'P': 46 // check for a `PIC` section 47 ok, err := matchBytes(br, []byte{'I', 'C'}) 48 if err != nil { 49 return ``, err 50 } 51 if ok { 52 return handlePIC(w, br) 53 } 54 } 55 } 56 } 57 58 // matchBytes is used by func id3Picture to skip right past the byte-sequence 59 // given 60 func matchBytes(br *bufio.Reader, data []byte) (bool, error) { 61 cur := data[:] 62 63 for { 64 if len(cur) == 0 { 65 return true, nil 66 } 67 68 b, err := br.ReadByte() 69 if err != nil { 70 return false, err 71 } 72 73 if b != cur[0] { 74 err = br.UnreadByte() 75 return false, err 76 } 77 78 cur = cur[1:] 79 } 80 } 81 82 // handleAPIC is used by func id3Picture 83 func handleAPIC(w io.Writer, br *bufio.Reader) (mimeType string, err error) { 84 // section-size seems stored as 4 little-endian bytes 85 var size uint32 86 err = binary.Read(br, binary.LittleEndian, &size) 87 if err != nil { 88 const msg = `failed to detect thumbnail-payload size` 89 return ``, errors.New(msg) 90 } 91 92 kind, n, err := getThumbnailTypeAPIC(br) 93 if err != nil { 94 const msg = `failed to sync to start of thumbnail data` 95 return ``, errors.New(msg) 96 } 97 98 mimeType = string(kind) 99 size -= uint32(n) 100 101 for { 102 b, err := br.ReadByte() 103 if err != nil { 104 const msg = `failed to sync to comment before thumbnail` 105 return mimeType, errors.New(msg) 106 } 107 size-- 108 109 if b == 0 { 110 // some podcasts add an extra null-terminated `image` tag, right 111 // after misidentifying the MIME-type of the picture to follow 112 junk := []byte{'i', 'm', 'a', 'g', 'e', 0} 113 extra, err := br.Peek(len(junk)) 114 if err != nil { 115 return mimeType, err 116 } 117 if bytes.Equal(extra, junk) { 118 br.Discard(len(junk)) 119 } 120 121 _, err = io.Copy(w, io.LimitReader(br, int64(size))) 122 return mimeType, adaptWriteError(err) 123 } 124 } 125 } 126 127 // handlePIC is used by func id3Picture 128 func handlePIC(w io.Writer, br *bufio.Reader) (mimeType string, err error) { 129 // http://www.unixgods.org/Ruby/ID3/docs/id3v2-00.html#PIC 130 131 var buf [8]byte 132 n, err := br.Read(buf[:3]) 133 if err != nil || n != 3 { 134 const msg = `failed to detect thumbnail-payload size` 135 return ``, errors.New(msg) 136 } 137 138 // thumbnail-payload-size seems stored as 3 big-endian bytes 139 var size uint32 140 size += 256 * 256 * uint32(buf[0]) 141 size += 256 * uint32(buf[1]) 142 size += uint32(buf[2]) 143 144 // skip the text encoding 145 n, err = br.Read(buf[:5]) 146 if err != nil || n != 5 { 147 const msg = `failed to read thumbnail-payload type` 148 return ``, errors.New(msg) 149 } 150 151 // skip a null-delimited string 152 _, err = br.ReadString(0) 153 if err != nil { 154 const msg = `failed to read thumbnail-payload description` 155 return ``, errors.New(msg) 156 } 157 158 var ext [4]byte 159 ext[0] = '.' 160 ext[1] = buf[1] 161 ext[2] = buf[2] 162 ext[3] = buf[3] 163 164 // use made-up file-extension to detect MIME-type, then copy all 165 // thumbnail bytes 166 mimeType = mime.TypeByExtension(string(ext[:])) 167 _, err = io.Copy(w, io.LimitReader(br, int64(size))) 168 return mimeType, adaptWriteError(err) 169 } 170 171 // getThumbnailTypeAPIC is used by func handleAPIC 172 func getThumbnailTypeAPIC(br *bufio.Reader) ([]byte, int, error) { 173 var kind []byte 174 n, err := meetBytes(br, []byte(`image/`)) 175 if err != nil { 176 return nil, n, err 177 } 178 179 kind = append(kind, `image/`...) 180 for { 181 b, err := br.ReadByte() 182 if err != nil { 183 return kind, n, err 184 } 185 n++ 186 187 if b == 0 { 188 return kind, n, nil 189 } 190 kind = append(kind, b) 191 } 192 } 193 194 // meetBytes is used by func getThumbnailTypeAPIC to skip right past the 195 // byte-sequence given 196 func meetBytes(br *bufio.Reader, data []byte) (int, error) { 197 n := 0 198 cur := data[:] 199 200 for { 201 if len(cur) == 0 { 202 return n, nil 203 } 204 205 b, err := br.ReadByte() 206 if err != nil { 207 return n, err 208 } 209 n++ 210 211 if b == cur[0] { 212 cur = cur[1:] 213 } else { 214 cur = data 215 } 216 } 217 } File: tu/info.txt 1 tu [tool...] [arguments...] 2 3 Tiny and Useful tools is a collection of various command-line tools: most of 4 these are line-oriented text-processors, but others even work with non-text 5 bytes in general. 6 7 Tool names have various aliases, and you can use any dashes/minuses and/or 8 underscores freely in the names, since they're ignored during tool look-up; 9 the only exceptions to that are `--` and `-`, which are hard-coded aliases 10 for the `compose` tool. 11 12 Tool `lines` is one of the exceptions, and allows any number of filepaths, 13 with a single dash meaning standard-input. It even allows using single 14 dashes multiple times: in that case, standard-input is only read once and 15 kept for later (re)uses. File: tu/io.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "encoding/csv" 7 "errors" 8 "fmt" 9 "io" 10 "os" 11 "strconv" 12 ) 13 14 const ( 15 kb = 1024 16 mb = 1024 * kb 17 gb = 1024 * mb 18 19 bufferSize = 16 * kb 20 ) 21 22 // utf8BOM is used by older windows apps, but it's useless, as UTF-8 has 23 // only 1 guaranteed byte-order 24 var utf8BOM = []byte{0xef, 0xbb, 0xbf} 25 26 // errNoMoreOutput isn't meant to ever show, and is just an excuse to quickly 27 // quit the app successfully 28 var errNoMoreOutput = errors.New(`no more output`) 29 30 func loopCSV(r io.Reader, fn func(row []string) error) error { 31 rr := newReaderCSV(r) 32 rr.ReuseRecord = true 33 34 for { 35 row, err := rr.Read() 36 if err == io.EOF { 37 return nil 38 } 39 40 if err != nil { 41 return err 42 } 43 44 if err := fn(row); err != nil { 45 return err 46 } 47 } 48 } 49 50 type chunkHandler func(pos int, chunk []byte) (keepGoing bool) 51 52 func loopChunksBackward(rs io.ReadSeeker, buf []byte, fn chunkHandler) error { 53 if len(buf) == 0 { 54 return errors.New(`internal error: read-buffer given is empty`) 55 } 56 57 pos, err := rs.Seek(0, io.SeekEnd) 58 if err == io.EOF { 59 return nil 60 } 61 if err != nil { 62 return err 63 } 64 65 if pos < 1 { 66 fn(0, nil) 67 return nil 68 } 69 70 // first (backward-chunk) read may be smaller than the others 71 chunkSize := pos % int64(len(buf)) 72 if chunkSize == 0 { 73 chunkSize = int64(len(buf)) 74 } 75 76 for { 77 where := pos - chunkSize 78 if where < 0 { 79 where = 0 80 } 81 82 // ensure full-buffer reads for all later (backward-chunk) reads 83 chunkSize = int64(len(buf)) 84 85 pos, err = rs.Seek(where, io.SeekStart) 86 if err != nil { 87 return err 88 } 89 90 read, err := rs.Read(buf[:chunkSize]) 91 if err == io.EOF { 92 return nil 93 } 94 if err != nil { 95 return err 96 } 97 98 if !fn(int(pos), buf[:read]) { 99 _, err = rs.Seek(pos, io.SeekStart) 100 return err 101 } 102 103 if where < 1 { 104 _, err = rs.Seek(0, io.SeekStart) 105 return err 106 } 107 } 108 } 109 110 func loopLines(r io.Reader, fn func(line []byte) error) error { 111 sc := bufio.NewScanner(r) 112 sc.Buffer(nil, 8*gb) 113 114 for i := 0; sc.Scan(); i++ { 115 s := sc.Bytes() 116 if i == 0 { 117 s = bytes.TrimPrefix(s, utf8BOM) 118 } 119 120 if err := fn(s); err != nil { 121 return err 122 } 123 } 124 125 return sc.Err() 126 } 127 128 func loopLinesString(r io.Reader, fn func(line string) error) error { 129 sc := bufio.NewScanner(r) 130 sc.Buffer(nil, 8*gb) 131 132 for i := 0; sc.Scan(); i++ { 133 s := sc.Text() 134 if i == 0 && startsWith(s, utf8BOM) { 135 s = s[len(utf8BOM):] 136 } 137 138 if err := fn(s); err != nil { 139 return err 140 } 141 } 142 143 return sc.Err() 144 } 145 146 func loadSetPair(paths []string, r io.Reader) ([]string, []string, error) { 147 if len(paths) == 1 { 148 paths = []string{paths[0], `-`} 149 } 150 if len(paths) != 2 { 151 return nil, nil, fmt.Errorf(`expected 2 args, but was given %d`, len(paths)) 152 } 153 154 if paths[0] == `-` && paths[1] == `-` { 155 u, err := loadUniqueLines(r) 156 return u, u, err 157 } 158 159 var x, y []string 160 161 err := handleNamedInput(paths[0], r, func(r io.Reader) error { 162 u, err := loadUniqueLines(r) 163 x = u 164 return err 165 }) 166 167 if err != nil { 168 return x, y, err 169 } 170 171 err = handleNamedInput(paths[1], r, func(r io.Reader) error { 172 u, err := loadUniqueLines(r) 173 y = u 174 return err 175 }) 176 177 return x, y, err 178 } 179 180 func loadUniqueLines(r io.Reader) ([]string, error) { 181 var lines []string 182 got := make(map[string]struct{}) 183 184 err := loopLines(r, func(line []byte) error { 185 s := string(line) 186 if _, ok := got[s]; ok { 187 return nil 188 } 189 190 got[s] = struct{}{} 191 lines = append(lines, s) 192 return nil 193 }) 194 195 return lines, err 196 } 197 198 type namedInputHandler func(path string, r io.Reader) error 199 200 func handleNamedInputs(paths []string, r io.Reader, handle namedInputHandler) error { 201 if len(paths) == 0 { 202 return handle(`-`, os.Stdin) 203 } 204 205 // find if single-dash was given more than once 206 dashes := 0 207 reuseStdin := false 208 for _, s := range paths { 209 if s == `-` { 210 dashes++ 211 } 212 if dashes > 1 { 213 // counting 2 dashes is enough 214 reuseStdin = true 215 break 216 } 217 } 218 219 var stdin []byte 220 gotStdin := false 221 222 for _, s := range paths { 223 if s == `-` { 224 // handle multiple single-dashes by slurping stdin the first 225 // time, then `replaying` for every `dash` 226 if reuseStdin { 227 if !gotStdin { 228 gotStdin = true 229 b, err := io.ReadAll(os.Stdin) 230 if err != nil { 231 return err 232 } 233 stdin = b 234 } 235 236 if err := handle(s, bytes.NewReader(stdin)); err != nil { 237 return err 238 } 239 continue 240 } 241 242 // no need to slurp stdin, if it's only being used once 243 if err := handle(s, r); err != nil { 244 return err 245 } 246 continue 247 } 248 249 // handle actual files 250 err := handleFile(s, func(r io.Reader) error { return handle(s, r) }) 251 if err != nil { 252 return err 253 } 254 } 255 256 return nil 257 } 258 259 func handleNamedInput(path string, r io.Reader, handle func(r io.Reader) error) error { 260 if path == `-` { 261 return handle(r) 262 } 263 264 // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) { 265 // resp, err := http.Get(path) 266 // if err != nil { 267 // return err 268 // } 269 // defer resp.Body.Close() 270 // return handle(resp.Body) 271 // } 272 273 return handleFile(path, handle) 274 } 275 276 func handleFile(path string, handle func(r io.Reader) error) error { 277 f, err := os.Open(path) 278 if err != nil { 279 return errors.New(`can't open file named ` + path) 280 } 281 defer f.Close() 282 return handle(f) 283 } 284 285 func endLine(w io.Writer) error { 286 _, err := w.Write([]byte{'\n'}) 287 return adaptWriteError(err) 288 } 289 290 // func adaptReadError(err error) error { 291 // if err == nil || err == io.EOF { 292 // return nil 293 // } 294 // return err 295 // } 296 297 func adaptWriteError(err error) error { 298 if err == nil { 299 return nil 300 } 301 return errNoMoreOutput 302 } 303 304 func copyBytes(w io.Writer, r io.Reader) error { 305 _, err := io.Copy(w, r) 306 return adaptWriteError(err) 307 } 308 309 func copyLines(w *bufio.Writer, r io.Reader) error { 310 return loopLines(r, func(line []byte) error { 311 w.Write(line) 312 return endLine(w) 313 }) 314 } 315 316 func writeInt64(w io.Writer, n int64) error { 317 var buf [32]byte 318 _, err := w.Write(strconv.AppendInt(buf[:0], n, 10)) 319 return adaptWriteError(err) 320 } 321 322 func writeLines(w *bufio.Writer, lines []string) error { 323 for _, s := range lines { 324 w.WriteString(s) 325 if err := endLine(w); err != nil { 326 return err 327 } 328 } 329 330 return nil 331 } 332 333 func writeSpaces(w *bufio.Writer, n int) { 334 const ( 335 spaces16 = ` ` 336 spaces32 = spaces16 + spaces16 337 spaces = spaces32 + spaces32 338 ) 339 340 if n < 1 { 341 return 342 } 343 344 for n >= len(spaces) { 345 w.WriteString(spaces) 346 n -= len(spaces) 347 } 348 w.WriteString(spaces[:n]) 349 } 350 351 func newReaderCSV(r io.Reader) *csv.Reader { 352 rr := csv.NewReader(r) 353 rr.Comma = ',' 354 rr.FieldsPerRecord = -1 355 rr.LazyQuotes = true 356 return rr 357 } 358 359 func newWriterCSV(w io.Writer) *csv.Writer { 360 rw := csv.NewWriter(w) 361 rw.Comma = ',' 362 rw.UseCRLF = false 363 return rw 364 } 365 366 // readPairFunc narrows source-code lines below 367 type readPairFunc func(*bufio.Reader) (byte, byte, error) 368 369 // readBytePairBE gets you a pair of bytes in big-endian (original) order 370 func readBytePairBE(br *bufio.Reader) (byte, byte, error) { 371 a, err := br.ReadByte() 372 if err != nil { 373 return a, 0, err 374 } 375 b, err := br.ReadByte() 376 return a, b, err 377 } 378 379 // readBytePairLE gets you a pair of bytes in little-endian order 380 func readBytePairLE(br *bufio.Reader) (byte, byte, error) { 381 a, b, err := readBytePairBE(br) 382 return b, a, err 383 } File: tu/json0.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "errors" 7 "io" 8 "strconv" 9 ) 10 11 // linePosError is a more descriptive kind of error, showing the source of 12 // the input-related problem, as 1-based a line/pos number pair in front 13 // of the error message 14 type linePosError struct { 15 // line is the 1-based line count from the input 16 line int 17 18 // pos is the 1-based `horizontal` position in its line 19 pos int 20 21 // err is the error message to `decorate` with the position info 22 err error 23 } 24 25 // Error satisfies the error interface 26 func (lpe linePosError) Error() string { 27 where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos) 28 return where + `: ` + lpe.err.Error() 29 } 30 31 var ( 32 errCommentEarlyEnd = errors.New(`unexpected early-end of comment`) 33 errInputEarlyEnd = errors.New(`expected end of input data`) 34 errInvalidComment = errors.New(`expected / or *`) 35 errInvalidHex = errors.New(`expected a base-16 digit`) 36 errInvalidToken = errors.New(`invalid JSON token`) 37 errNoDigits = errors.New(`expected numeric digits`) 38 errNoStringQuote = errors.New(`expected " or '`) 39 errNoArrayComma = errors.New(`missing comma between array values`) 40 errNoObjectComma = errors.New(`missing comma between key-value pairs`) 41 errStringEarlyEnd = errors.New(`unexpected early-end of string`) 42 errExtraBytes = errors.New(`unexpected extra input bytes`) 43 44 // errNoMoreOutput is a generic dummy output-error, which is meant to be 45 // ultimately ignored, being just an excuse to quit the app immediately 46 // and successfully 47 // errNoMoreOutput = errors.New(`no more output`) 48 ) 49 50 // isIdentifier improves control-flow of func jsonReader.key, when it handles 51 // unquoted object keys 52 var isIdentifier = [256]bool{ 53 '_': true, 54 55 '0': true, '1': true, '2': true, '3': true, '4': true, 56 '5': true, '6': true, '7': true, '8': true, '9': true, 57 58 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 59 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 60 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 61 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 62 'Y': true, 'Z': true, 63 64 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 65 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 66 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 67 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 68 'y': true, 'z': true, 69 } 70 71 // matchHex both figures out if a byte is a valid ASCII hex-digit, by not 72 // being 0, and normalizes letter-case for the hex letters 73 var matchHex = [256]byte{ 74 '0': '0', '1': '1', '2': '2', '3': '3', '4': '4', 75 '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', 76 'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F', 77 'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F', 78 } 79 80 // escapedStringBytes helps func stringValue treat all string bytes quickly 81 // and correctly, using their officially-supported JSON escape sequences 82 // 83 // https://www.rfc-editor.org/rfc/rfc8259#section-7 84 var escapedStringBytes = [256][]byte{ 85 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 86 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 87 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 88 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 89 {'\\', 'b'}, {'\\', 't'}, 90 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 91 {'\\', 'f'}, {'\\', 'r'}, 92 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 93 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 94 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 95 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 96 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 97 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 98 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 99 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 100 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 101 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 102 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 103 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 104 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 105 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 106 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 107 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 108 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 109 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 110 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 111 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 112 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 113 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 114 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 115 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 116 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 117 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 118 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 119 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 120 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 121 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 122 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 123 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 124 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 125 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 126 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 127 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 128 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 129 } 130 131 // json0 converts JSON/pseudo-JSON into (valid) minimal JSON, except for an 132 // extra/single line-feed at the end of the output 133 func json0(w *bufio.Writer, r io.Reader) error { 134 br := bufio.NewReader(r) 135 jr := jsonReader{br, 1, 1} 136 if err := jr.run(w); err != nil { 137 return err 138 } 139 return endLine(w) 140 } 141 142 // jsonl converts lines, each with JSON/pseudo-JSON data, into a (valid) 143 // minimal JSON array 144 func jsonl(w *bufio.Writer, r io.Reader) error { 145 i := 0 146 147 err := loopLines(r, func(line []byte) error { 148 if i == 0 { 149 w.WriteByte('[') 150 } else { 151 w.WriteByte(',') 152 } 153 i++ 154 155 br := bufio.NewReader(bytes.NewReader(line)) 156 jr := jsonReader{br, 1, 1} 157 // make errors refer to the right line number 158 jr.line = i + 1 159 return jr.run(w) 160 }) 161 162 if err != nil { 163 return err 164 } 165 166 if i > 0 { 167 w.WriteByte(']') 168 } 169 return endLine(w) 170 } 171 172 // jsonReader reads data via a buffer, keeping track of the input position: 173 // this in turn allows showing much more useful errors, when these happen 174 type jsonReader struct { 175 // r is the actual reader 176 r *bufio.Reader 177 178 // line is the 1-based line-counter for input bytes, and gives errors 179 // useful position info 180 line int 181 182 // pos is the 1-based `horizontal` position in its line, and gives 183 // errors useful position info 184 pos int 185 } 186 187 // improveError makes any error more useful, by giving it info about the 188 // current input-position, as a 1-based line/within-line-position pair 189 func (jr jsonReader) improveError(err error) error { 190 if _, ok := err.(linePosError); ok { 191 return err 192 } 193 194 if err == io.EOF { 195 return linePosError{jr.line, jr.pos, errInputEarlyEnd} 196 } 197 if err != nil { 198 return linePosError{jr.line, jr.pos, err} 199 } 200 return nil 201 } 202 203 // run does all the work for func json0, and each input line's work for func 204 // jsonl 205 func (jr *jsonReader) run(w *bufio.Writer) error { 206 // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order 207 // mark) gives no useful info if present, as UTF-8 leaves no ambiguity 208 // about byte-order by design 209 jr.skipUTF8BOM() 210 211 // ignore leading whitespace and/or comments 212 if err := jr.seekNext(); err != nil { 213 return err 214 } 215 216 // handle a single top-level JSON value 217 if err := jr.value(w); err != nil { 218 return err 219 } 220 221 // ignore trailing whitespace and/or comments 222 if err := jr.seekNext(); err != nil { 223 return err 224 } 225 226 // beyond trailing whitespace and/or comments, any more bytes 227 // make the whole input data invalid JSON 228 if _, ok := jr.peekByte(); ok { 229 return jr.improveError(errExtraBytes) 230 } 231 return nil 232 } 233 234 // demandSyntax fails with an error when the next byte isn't the one given; 235 // when it is, the byte is then read/skipped, and a nil error is returned 236 func (jr *jsonReader) demandSyntax(syntax byte) error { 237 chunk, err := jr.r.Peek(1) 238 if err == io.EOF { 239 return jr.improveError(errInputEarlyEnd) 240 } 241 if err != nil { 242 return jr.improveError(err) 243 } 244 245 if len(chunk) < 1 || chunk[0] != syntax { 246 msg := `expected ` + string(rune(syntax)) 247 return jr.improveError(errors.New(msg)) 248 } 249 250 jr.readByte() 251 return nil 252 } 253 254 // updatePosInfo does what it says, given the byte just read separately 255 func (jr *jsonReader) updatePosInfo(b byte) { 256 if b == '\n' { 257 jr.line += 1 258 jr.pos = 1 259 } else { 260 jr.pos++ 261 } 262 } 263 264 // peekByte simplifies control-flow for various other funcs 265 func (jr jsonReader) peekByte() (b byte, ok bool) { 266 chunk, err := jr.r.Peek(1) 267 if err == nil && len(chunk) >= 1 { 268 return chunk[0], true 269 } 270 return 0, false 271 } 272 273 // readByte does what it says, updating the reader's position info 274 func (jr *jsonReader) readByte() (b byte, err error) { 275 b, err = jr.r.ReadByte() 276 if err == nil { 277 jr.updatePosInfo(b) 278 return b, nil 279 } 280 return b, jr.improveError(err) 281 } 282 283 // seekNext skips/seeks the next token, ignoring runs of whitespace symbols 284 // and comments, either single-line (starting with //) or general (starting 285 // with /* and ending with */) 286 func (jr *jsonReader) seekNext() error { 287 for { 288 b, ok := jr.peekByte() 289 if !ok { 290 return nil 291 } 292 293 // case ' ', '\t', '\f', '\v', '\r', '\n': 294 if b <= 32 { 295 // keep skipping whitespace bytes 296 b, _ := jr.readByte() 297 jr.updatePosInfo(b) 298 continue 299 } 300 301 if b != '/' { 302 // reached the next token 303 return nil 304 } 305 306 if err := jr.skipComment(); err != nil { 307 return err 308 } 309 310 // after comments, keep looking for more whitespace and/or comments 311 } 312 } 313 314 // skipComment helps func seekNext skip over comments, simplifying the latter 315 // func's control-flow 316 func (jr *jsonReader) skipComment() error { 317 err := jr.demandSyntax('/') 318 if err != nil { 319 return err 320 } 321 322 b, ok := jr.peekByte() 323 if !ok { 324 return jr.improveError(errInputEarlyEnd) 325 } 326 327 switch b { 328 case '/': 329 // handle single-line comments 330 return jr.skipLine() 331 332 case '*': 333 // handle (potentially) multi-line comments 334 return jr.skipGeneralComment() 335 336 default: 337 return jr.improveError(errInvalidComment) 338 } 339 } 340 341 // skipLine handles single-line comments for func skipComment 342 func (jr *jsonReader) skipLine() error { 343 for { 344 b, err := jr.r.ReadByte() 345 if err == io.EOF { 346 // end of input is fine in this case 347 return nil 348 } 349 if err != nil { 350 return err 351 } 352 353 jr.updatePosInfo(b) 354 if b == '\n' { 355 jr.line++ 356 return nil 357 } 358 } 359 } 360 361 // skipGeneralComment handles (potentially) multi-line comments for func 362 // skipComment 363 func (jr *jsonReader) skipGeneralComment() error { 364 var prev byte 365 for { 366 b, err := jr.readByte() 367 if err != nil { 368 return jr.improveError(errCommentEarlyEnd) 369 } 370 371 if prev == '*' && b == '/' { 372 return nil 373 } 374 if b == '\n' { 375 jr.line++ 376 } 377 prev = b 378 } 379 } 380 381 // skipUTF8BOM does what it says, if a UTF-8 BOM is present 382 func (jr *jsonReader) skipUTF8BOM() { 383 lead, err := jr.r.Peek(3) 384 if err == nil && bytes.HasPrefix(lead, []byte{0xef, 0xbb, 0xbf}) { 385 jr.readByte() 386 jr.readByte() 387 jr.readByte() 388 jr.pos += 3 389 } 390 } 391 392 // outputByte is a small wrapper on func WriteByte, which adapts any error 393 // into a custom dummy output-error, which is in turn meant to be ignored, 394 // being just an excuse to quit the app immediately and successfully 395 func outputByte(w *bufio.Writer, b byte) error { 396 err := w.WriteByte(b) 397 if err == nil { 398 return nil 399 } 400 return errNoMoreOutput 401 } 402 403 // array handles arrays for func value 404 func (jr *jsonReader) array(w *bufio.Writer) error { 405 if err := jr.demandSyntax('['); err != nil { 406 return err 407 } 408 w.WriteByte('[') 409 410 for n := 0; true; n++ { 411 // there may be whitespace/comments before the next comma 412 if err := jr.seekNext(); err != nil { 413 return err 414 } 415 416 // handle commas between values, as well as trailing ones 417 comma := false 418 b, _ := jr.peekByte() 419 if b == ',' { 420 jr.readByte() 421 comma = true 422 423 // there may be whitespace/comments before an ending ']' 424 if err := jr.seekNext(); err != nil { 425 return err 426 } 427 b, _ = jr.peekByte() 428 } 429 430 // handle end of array 431 if b == ']' { 432 jr.readByte() 433 w.WriteByte(']') 434 return nil 435 } 436 437 // don't forget commas between adjacent values 438 if n > 0 { 439 if !comma { 440 return errNoArrayComma 441 } 442 if err := outputByte(w, ','); err != nil { 443 return err 444 } 445 } 446 447 // handle the next value 448 if err := jr.seekNext(); err != nil { 449 return err 450 } 451 if err := jr.value(w); err != nil { 452 return err 453 } 454 } 455 456 // make the compiler happy 457 return nil 458 } 459 460 // digits helps various number-handling funcs do their job 461 func (jr *jsonReader) digits(w *bufio.Writer) error { 462 for n := 0; true; n++ { 463 b, _ := jr.peekByte() 464 465 // support `nice` long numbers by ignoring their underscores 466 if b == '_' { 467 jr.readByte() 468 continue 469 } 470 471 if '0' <= b && b <= '9' { 472 jr.readByte() 473 w.WriteByte(b) 474 continue 475 } 476 477 if n == 0 { 478 return errNoDigits 479 } 480 return nil 481 } 482 483 // make the compiler happy 484 return nil 485 } 486 487 // dot handles pseudo-JSON numbers which start with a decimal dot 488 func (jr *jsonReader) dot(w *bufio.Writer) error { 489 if err := jr.demandSyntax('.'); err != nil { 490 return err 491 } 492 w.Write([]byte{'0', '.'}) 493 return jr.digits(w) 494 } 495 496 // key is used by func object and generalizes func stringValue, by allowing 497 // unquoted object keys; it's not used anywhere else, as allowing unquoted 498 // string values is ambiguous with actual JSON-keyword values null, false, and 499 // true 500 func (jr *jsonReader) key(w *bufio.Writer) error { 501 quote, ok := jr.peekByte() 502 if quote == '"' || quote == '\'' { 503 return jr.stringValue(w) 504 } 505 if !ok { 506 return jr.improveError(errStringEarlyEnd) 507 } 508 509 w.WriteByte('"') 510 for { 511 if b, _ := jr.peekByte(); isIdentifier[b] { 512 jr.readByte() 513 w.WriteByte(b) 514 continue 515 } 516 517 w.WriteByte('"') 518 return nil 519 } 520 } 521 522 // trySimpleInner tries to handle (more quickly) inner-strings where all bytes 523 // are unescaped ASCII symbols: this is a very common case for strings, and is 524 // almost always the case for object keys; returns whether it succeeded, so 525 // this func's caller knows knows if it needs to do anything, the slower way 526 func trySimpleInner(w *bufio.Writer, jr *jsonReader, quote byte) (gotIt bool) { 527 chunk, _ := jr.r.Peek(64) 528 529 for i, b := range chunk { 530 if b < 32 || b > 127 || b == '\\' { 531 return false 532 } 533 if b != quote { 534 continue 535 } 536 537 // bulk-writing the chunk is this func's whole point 538 w.WriteByte('"') 539 w.Write(chunk[:i]) 540 w.WriteByte('"') 541 542 jr.r.Discard(i + 1) 543 return true 544 } 545 546 // maybe the inner-string is ok, but it's just longer than the chunk 547 return false 548 } 549 550 // keyword demands the exact keyword/string given to it 551 func (jr *jsonReader) keyword(w *bufio.Writer, kw []byte) error { 552 for rest := kw; len(rest) > 0; rest = rest[1:] { 553 b, err := jr.readByte() 554 if err == nil && b == rest[0] { 555 // keywords given to this func have no line-feeds 556 jr.pos++ 557 continue 558 } 559 560 msg := `expected JSON value ` + string(kw) 561 return jr.improveError(errors.New(msg)) 562 } 563 564 w.Write(kw) 565 return nil 566 } 567 568 // negative handles numbers starting with a negative sign for func value 569 func (jr *jsonReader) negative(w *bufio.Writer) error { 570 if err := jr.demandSyntax('-'); err != nil { 571 return err 572 } 573 574 w.WriteByte('-') 575 if b, _ := jr.peekByte(); b == '.' { 576 jr.readByte() 577 w.Write([]byte{'0', '.'}) 578 return jr.digits(w) 579 } 580 return jr.number(w) 581 } 582 583 // number handles numeric values/tokens, including invalid-JSON cases, such 584 // as values starting with a decimal dot 585 func (jr *jsonReader) number(w *bufio.Writer) error { 586 // handle integer digits 587 if err := jr.digits(w); err != nil { 588 return err 589 } 590 591 // handle optional decimal digits, starting with a leading dot 592 if b, _ := jr.peekByte(); b == '.' { 593 jr.readByte() 594 w.WriteByte('.') 595 return jr.digits(w) 596 } 597 return nil 598 } 599 600 // object handles objects for func value 601 func (jr *jsonReader) object(w *bufio.Writer) error { 602 if err := jr.demandSyntax('{'); err != nil { 603 return err 604 } 605 w.WriteByte('{') 606 607 for npairs := 0; true; npairs++ { 608 // there may be whitespace/comments before the next comma 609 if err := jr.seekNext(); err != nil { 610 return err 611 } 612 613 // handle commas between key-value pairs, as well as trailing ones 614 comma := false 615 b, _ := jr.peekByte() 616 if b == ',' { 617 jr.readByte() 618 comma = true 619 620 // there may be whitespace/comments before an ending '}' 621 if err := jr.seekNext(); err != nil { 622 return err 623 } 624 b, _ = jr.peekByte() 625 } 626 627 // handle end of object 628 if b == '}' { 629 jr.readByte() 630 w.WriteByte('}') 631 return nil 632 } 633 634 // don't forget commas between adjacent key-value pairs 635 if npairs > 0 { 636 if !comma { 637 return errNoObjectComma 638 } 639 if err := outputByte(w, ','); err != nil { 640 return err 641 } 642 } 643 644 // handle the next pair's key 645 if err := jr.seekNext(); err != nil { 646 return err 647 } 648 if err := jr.key(w); err != nil { 649 return err 650 } 651 652 // demand a colon right after the key 653 if err := jr.seekNext(); err != nil { 654 return err 655 } 656 if err := jr.demandSyntax(':'); err != nil { 657 return err 658 } 659 w.WriteByte(':') 660 661 // handle the next pair's value 662 if err := jr.seekNext(); err != nil { 663 return err 664 } 665 if err := jr.value(w); err != nil { 666 return err 667 } 668 } 669 670 // make the compiler happy 671 return nil 672 } 673 674 // positive handles numbers starting with a positive sign for func value 675 func (jr *jsonReader) positive(w *bufio.Writer) error { 676 if err := jr.demandSyntax('+'); err != nil { 677 return err 678 } 679 680 // valid JSON isn't supposed to have leading pluses on numbers, so 681 // emit nothing for it, unlike for negative numbers 682 683 if b, _ := jr.peekByte(); b == '.' { 684 jr.readByte() 685 w.Write([]byte{'0', '.'}) 686 return jr.digits(w) 687 } 688 return jr.number(w) 689 } 690 691 // stringValue handles strings for funcs value and key, and supports both 692 // single-quotes and double-quotes, always emitting the latter in the output, 693 // of course 694 func (jr *jsonReader) stringValue(w *bufio.Writer) error { 695 quote, ok := jr.peekByte() 696 if !ok || (quote != '"' && quote != '\'') { 697 return errNoStringQuote 698 } 699 700 jr.readByte() 701 // try the quicker all-unescaped-ASCII handler 702 if trySimpleInner(w, jr, quote) { 703 return nil 704 } 705 706 // it's a non-trivial inner-string, so handle it byte-by-byte 707 w.WriteByte('"') 708 escaped := false 709 710 for { 711 b, err := jr.r.ReadByte() 712 if err != nil { 713 if err == io.EOF { 714 return jr.improveError(errStringEarlyEnd) 715 } 716 return jr.improveError(err) 717 } 718 719 if !escaped { 720 if b == '\\' { 721 escaped = true 722 continue 723 } 724 725 // handle end of string 726 if b == quote { 727 return outputByte(w, '"') 728 } 729 730 w.Write(escapedStringBytes[b]) 731 jr.updatePosInfo(b) 732 continue 733 } 734 735 // handle escaped items 736 escaped = false 737 738 switch b { 739 case 'u': 740 // \u needs exactly 4 hex-digits to follow it 741 w.Write([]byte{'\\', 'u'}) 742 if err := copyHex(w, 4, jr); err != nil { 743 return jr.improveError(err) 744 } 745 746 case 'x': 747 // JSON only supports 4 escaped hex-digits, so pad the 2 748 // expected hex-digits with 2 zeros 749 w.Write([]byte{'\\', 'u', '0', '0'}) 750 if err := copyHex(w, 2, jr); err != nil { 751 return jr.improveError(err) 752 } 753 754 case 't', 'f', 'r', 'n', 'b', '\\', '"': 755 // handle valid-JSON escaped string sequences 756 w.WriteByte('\\') 757 w.WriteByte(b) 758 759 // case '\'': 760 // // escaped single-quotes aren't standard JSON, but they can 761 // // be handy when the input uses non-standard single-quoted 762 // // strings 763 // w.WriteByte('\'') 764 765 default: 766 // return jr.decorateError(unexpectedByte{b}) 767 w.Write(escapedStringBytes[b]) 768 } 769 } 770 } 771 772 // copyHex handles a run of hex-digits for func stringValue, starting right 773 // after the leading `\u` (or `\x`) part; this func doesn't `improve` its 774 // errors with position info: that's up to the caller 775 func copyHex(w *bufio.Writer, n int, jr *jsonReader) error { 776 for i := 0; i < n; i++ { 777 b, err := jr.r.ReadByte() 778 if err == io.EOF { 779 return errStringEarlyEnd 780 } 781 if err != nil { 782 return err 783 } 784 785 jr.updatePosInfo(b) 786 787 if b := matchHex[b]; b != 0 { 788 w.WriteByte(b) 789 continue 790 } 791 792 return errInvalidHex 793 } 794 795 return nil 796 } 797 798 // value is a generic JSON-token/value handler, which allows the recursive 799 // behavior to handle any kind of JSON/pseudo-JSON input 800 func (jr *jsonReader) value(w *bufio.Writer) error { 801 chunk, err := jr.r.Peek(1) 802 if err == nil && len(chunk) >= 1 { 803 return jr.dispatch(w, chunk[0]) 804 } 805 806 if err == io.EOF { 807 return jr.improveError(errInputEarlyEnd) 808 } 809 return jr.improveError(errInputEarlyEnd) 810 } 811 812 // dispatch simplifies control-flow for func value 813 func (jr *jsonReader) dispatch(w *bufio.Writer, b byte) error { 814 switch b { 815 case 'f': 816 return jr.keyword(w, []byte{'f', 'a', 'l', 's', 'e'}) 817 case 'n': 818 return jr.keyword(w, []byte{'n', 'u', 'l', 'l'}) 819 case 't': 820 return jr.keyword(w, []byte{'t', 'r', 'u', 'e'}) 821 case '.': 822 return jr.dot(w) 823 case '+': 824 return jr.positive(w) 825 case '-': 826 return jr.negative(w) 827 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 828 return jr.number(w) 829 case '\'', '"': 830 return jr.stringValue(w) 831 case '[': 832 return jr.array(w) 833 case '{': 834 return jr.object(w) 835 default: 836 return jr.improveError(errInvalidToken) 837 } 838 } File: tu/json0_test.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "io" 7 "strings" 8 "testing" 9 ) 10 11 func TestJSON0(t *testing.T) { 12 var tests = []struct { 13 Input string 14 Expected string 15 }{ 16 {`false`, `false`}, 17 {`null`, `null`}, 18 {` true `, `true`}, 19 20 {`0`, `0`}, 21 {`1`, `1`}, 22 {`2`, `2`}, 23 {`3`, `3`}, 24 {`4`, `4`}, 25 {`5`, `5`}, 26 {`6`, `6`}, 27 {`7`, `7`}, 28 {`8`, `8`}, 29 {`9`, `9`}, 30 31 {` .345`, `0.345`}, 32 {` -.345`, `-0.345`}, 33 {` +.345`, `0.345`}, 34 {` +123.345`, `123.345`}, 35 {` +.345`, `0.345`}, 36 {` 123.34523`, `123.34523`}, 37 {` 123.34_523`, `123.34523`}, 38 {` 123_456.123`, `123456.123`}, 39 40 {`""`, `""`}, 41 {`''`, `""`}, 42 {`"\""`, `"\""`}, 43 {`'\"'`, `"\""`}, 44 {`'\''`, `"'"`}, 45 {`'abc\u0e9A'`, `"abc\u0E9A"`}, 46 {`'abc\x1f[0m'`, `"abc\u001F[0m"`}, 47 48 {`[ ]`, `[]`}, 49 {`[ , ]`, `[]`}, 50 {`[.345, false,null , ]`, `[0.345,false,null]`}, 51 52 {`{ }`, `{}`}, 53 {`{ , }`, `{}`}, 54 55 { 56 `{ 'abc': .345, "def" : false, 'xyz':null , }`, 57 `{"abc":0.345,"def":false,"xyz":null}`, 58 }, 59 60 {`{0problems:123,}`, `{"0problems":123}`}, 61 {`{0_problems:123}`, `{"0_problems":123}`}, 62 } 63 64 for _, tc := range tests { 65 t.Run(tc.Input, func(t *testing.T) { 66 var out strings.Builder 67 w := bufio.NewWriter(&out) 68 r := bufio.NewReader(strings.NewReader(tc.Input)) 69 if err := json0(w, r); err != nil && err != io.EOF { 70 t.Fatal(err) 71 return 72 } 73 // don't forget to flush the buffer, or output will be empty 74 w.Flush() 75 76 s := out.String() 77 s = strings.TrimSuffix(s, "\n") 78 if s != tc.Expected { 79 t.Fatalf("<got>\n%s\n<expected>\n%s", s, tc.Expected) 80 return 81 } 82 }) 83 } 84 } 85 86 func TestEscapedStringBytes(t *testing.T) { 87 var escaped = map[rune][]byte{ 88 '\x00': {'\\', 'u', '0', '0', '0', '0'}, 89 '\x01': {'\\', 'u', '0', '0', '0', '1'}, 90 '\x02': {'\\', 'u', '0', '0', '0', '2'}, 91 '\x03': {'\\', 'u', '0', '0', '0', '3'}, 92 '\x04': {'\\', 'u', '0', '0', '0', '4'}, 93 '\x05': {'\\', 'u', '0', '0', '0', '5'}, 94 '\x06': {'\\', 'u', '0', '0', '0', '6'}, 95 '\x07': {'\\', 'u', '0', '0', '0', '7'}, 96 '\x0b': {'\\', 'u', '0', '0', '0', 'b'}, 97 '\x0e': {'\\', 'u', '0', '0', '0', 'e'}, 98 '\x0f': {'\\', 'u', '0', '0', '0', 'f'}, 99 '\x10': {'\\', 'u', '0', '0', '1', '0'}, 100 '\x11': {'\\', 'u', '0', '0', '1', '1'}, 101 '\x12': {'\\', 'u', '0', '0', '1', '2'}, 102 '\x13': {'\\', 'u', '0', '0', '1', '3'}, 103 '\x14': {'\\', 'u', '0', '0', '1', '4'}, 104 '\x15': {'\\', 'u', '0', '0', '1', '5'}, 105 '\x16': {'\\', 'u', '0', '0', '1', '6'}, 106 '\x17': {'\\', 'u', '0', '0', '1', '7'}, 107 '\x18': {'\\', 'u', '0', '0', '1', '8'}, 108 '\x19': {'\\', 'u', '0', '0', '1', '9'}, 109 '\x1a': {'\\', 'u', '0', '0', '1', 'a'}, 110 '\x1b': {'\\', 'u', '0', '0', '1', 'b'}, 111 '\x1c': {'\\', 'u', '0', '0', '1', 'c'}, 112 '\x1d': {'\\', 'u', '0', '0', '1', 'd'}, 113 '\x1e': {'\\', 'u', '0', '0', '1', 'e'}, 114 '\x1f': {'\\', 'u', '0', '0', '1', 'f'}, 115 116 '\t': {'\\', 't'}, 117 '\f': {'\\', 'f'}, 118 '\b': {'\\', 'b'}, 119 '\r': {'\\', 'r'}, 120 '\n': {'\\', 'n'}, 121 '\\': {'\\', '\\'}, 122 '"': {'\\', '"'}, 123 } 124 125 if n := len(escapedStringBytes); n != 256 { 126 t.Fatalf(`expected 256 entries, instead of %d`, n) 127 return 128 } 129 130 for i, v := range escapedStringBytes { 131 exp := []byte{byte(i)} 132 if esc, ok := escaped[rune(i)]; ok { 133 exp = esc 134 } 135 136 if !bytes.Equal(v, exp) { 137 t.Fatalf("%d: expected %#v, got %#v", i, exp, v) 138 return 139 } 140 } 141 } File: tu/lines.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "crypto/md5" 7 "crypto/sha1" 8 "crypto/sha256" 9 "crypto/sha512" 10 "encoding/base64" 11 "encoding/hex" 12 "errors" 13 "fmt" 14 "hash" 15 "io" 16 "math" 17 "mime" 18 "os" 19 "path/filepath" 20 "regexp" 21 "sort" 22 "strconv" 23 "strings" 24 "unicode" 25 "unicode/utf8" 26 ) 27 28 func begin(w *bufio.Writer, r io.Reader, args []string) error { 29 for _, s := range args { 30 w.WriteString(s) 31 if err := endLine(w); err != nil { 32 return err 33 } 34 } 35 36 return copyLines(w, r) 37 } 38 39 func blowTabs(w *bufio.Writer, r io.Reader, n int) error { 40 return loopLines(r, func(line []byte) error { 41 blowTabsLine(w, line, n) 42 return endLine(w) 43 }) 44 } 45 46 func breathe(w *bufio.Writer, r io.Reader, n int) error { 47 if n < 1 { 48 return loopLines(r, func(line []byte) error { 49 w.Write(line) 50 return endLine(w) 51 }) 52 } 53 54 i := 0 55 56 return loopLines(r, func(line []byte) error { 57 if i%n == 0 && i > 0 { 58 if err := endLine(w); err != nil { 59 return err 60 } 61 } 62 63 i++ 64 w.Write(line) 65 return endLine(w) 66 }) 67 } 68 69 func common(w *bufio.Writer, r io.Reader, args []string) error { 70 src1, src2, err := loadSetPair(args, r) 71 if err != nil { 72 return err 73 } 74 75 got := make(map[string]struct{}, len(src1)) 76 for _, s := range src1 { 77 got[s] = struct{}{} 78 } 79 80 for _, s := range src2 { 81 if _, ok := got[s]; !ok { 82 continue 83 } 84 85 w.WriteString(s) 86 if err := endLine(w); err != nil { 87 return err 88 } 89 } 90 91 return nil 92 } 93 94 func crlf(w *bufio.Writer, line []byte) error { 95 w.Write(line) 96 w.WriteByte('\r') 97 return endLine(w) 98 } 99 100 func dataURI(w *bufio.Writer, r io.Reader, args []string) error { 101 for _, path := range args { 102 if path == `-` { 103 return errors.New(`standard input (-) not supported`) 104 } 105 } 106 107 for _, path := range args { 108 err := handleNamedInput(path, r, func(r io.Reader) error { 109 kind := mime.TypeByExtension(filepath.Ext(path)) 110 if len(kind) == 0 { 111 return errors.New(path + `: can't guess a MIME type`) 112 } 113 114 w.WriteString(`data:`) 115 w.WriteString(kind) 116 w.WriteString(`;base64,`) 117 return toBase64(w, r) 118 }) 119 120 if err != nil { 121 return err 122 } 123 } 124 125 return nil 126 } 127 128 func dedup(w *bufio.Writer, r io.Reader) error { 129 got := make(map[string]struct{}) 130 131 return loopLines(r, func(line []byte) error { 132 s := string(line) 133 if _, ok := got[s]; ok { 134 return nil 135 } 136 137 got[s] = struct{}{} 138 w.Write(line) 139 return endLine(w) 140 }) 141 } 142 143 func dropEnd(w *bufio.Writer, r io.Reader, suffixes []string) error { 144 return loopLinesString(r, func(line string) error { 145 for _, s := range suffixes { 146 line = strings.TrimSuffix(line, s) 147 } 148 w.WriteString(line) 149 return endLine(w) 150 }) 151 } 152 153 func dropPunctuation(w *bufio.Writer, r io.Reader) error { 154 return loopLines(r, func(line []byte) error { 155 dropPunctuationLine(w, line) 156 return endLine(w) 157 }) 158 } 159 160 func dropStart(w *bufio.Writer, r io.Reader, prefixes []string) error { 161 return loopLinesString(r, func(line string) error { 162 for _, s := range prefixes { 163 line = strings.TrimPrefix(line, s) 164 } 165 w.WriteString(line) 166 return endLine(w) 167 }) 168 } 169 170 func dropTabs(w *bufio.Writer, r io.Reader) error { 171 return loopLines(r, func(line []byte) error { 172 dropTabsLine(w, line) 173 return endLine(w) 174 }) 175 } 176 177 // chopLF ignores the last byte, if it's a line-feed 178 func chopLF(w *bufio.Writer, r io.Reader) error { 179 i := 0 180 return loopLines(r, func(line []byte) error { 181 if i > 0 { 182 if err := endLine(w); err != nil { 183 return err 184 } 185 } 186 187 i++ 188 w.Write(line) 189 return nil 190 }) 191 } 192 193 func end(w *bufio.Writer, r io.Reader, args []string) error { 194 if err := copyLines(w, r); err != nil { 195 return err 196 } 197 return writeLines(w, args) 198 } 199 200 func first(w *bufio.Writer, r io.Reader, n int) error { 201 if n < 1 { 202 return nil 203 } 204 205 return loopLines(r, func(line []byte) error { 206 if n < 1 { 207 return errNoMoreOutput 208 } 209 210 n-- 211 w.Write(line) 212 return endLine(w) 213 }) 214 } 215 216 // func drop(w *bufio.Writer, r io.Reader, args []string) error { 217 // return loopLinesString(r, func(line string) error { 218 // for _, what := range args { 219 // line = strings.ReplaceAll(line, what, ``) 220 // } 221 // w.WriteString(line) 222 // return endLine(w) 223 // }) 224 // } 225 226 func drop(w *bufio.Writer, r io.Reader, args []string) error { 227 if len(args) == 0 { 228 return copyLines(w, r) 229 } 230 231 if len(args) == 1 { 232 return dropString(w, r, []byte(args[0])) 233 } 234 235 var bb1, bb2 bytes.Buffer 236 avoid := make([][]byte, 0, len(args)) 237 for _, s := range args { 238 avoid = append(avoid, []byte(s)) 239 } 240 241 return loopLines(r, func(line []byte) error { 242 src := &bb1 243 dest := &bb2 244 src.Reset() 245 src.Write(line) 246 247 for _, what := range avoid { 248 s := src.Bytes() 249 dest.Reset() 250 251 for len(s) > 0 { 252 i := bytes.Index(s, what) 253 if i < 0 { 254 dest.Write(s) 255 break 256 } 257 258 dest.Write(s[:i]) 259 s = s[i+len(what):] 260 } 261 262 src, dest = dest, src 263 } 264 265 // any loop results in a buffer-swap, so the final result always 266 // ends in the `src` buffer, confusingly 267 w.Write(src.Bytes()) 268 return endLine(w) 269 }) 270 } 271 272 // dropString handles the 1-argument case for func drop more efficiently, 273 // by avoiding copying data into swappable byte-buffer pairs 274 func dropString(w *bufio.Writer, r io.Reader, what []byte) error { 275 return loopLines(r, func(line []byte) error { 276 for len(line) > 0 { 277 i := bytes.Index(line, what) 278 if i < 0 { 279 w.Write(line) 280 break 281 } 282 283 w.Write(line[:i]) 284 line = line[i+len(what):] 285 } 286 287 return endLine(w) 288 }) 289 } 290 291 func glue(w *bufio.Writer, r io.Reader, args []string) error { 292 sep, err := optionalStringArg(args, ``) 293 if err != nil { 294 return err 295 } 296 297 i := 0 298 err = loopLines(r, func(line []byte) error { 299 if i > 0 { 300 w.WriteString(sep) 301 } 302 i++ 303 w.Write(line) 304 return nil 305 }) 306 307 if err != nil { 308 return err 309 } 310 311 if i > 0 { 312 return endLine(w) 313 } 314 return nil 315 } 316 317 func indent(w *bufio.Writer, r io.Reader, n int) error { 318 return loopLines(r, func(line []byte) error { 319 writeSpaces(w, n) 320 w.Write(line) 321 return endLine(w) 322 }) 323 } 324 325 func join(w *bufio.Writer, r io.Reader, args []string) error { 326 if len(args) > 1 { 327 return fmt.Errorf(`multiple arguments not supported`) 328 } 329 330 sep := "\t" 331 if len(args) == 1 { 332 sep = args[0] 333 } 334 335 i := 0 336 err := loopLines(r, func(line []byte) error { 337 if i > 0 { 338 _, err := w.WriteString(sep) 339 if err := adaptWriteError(err); err != nil { 340 return err 341 } 342 } 343 i++ 344 345 w.Write(line) 346 return nil 347 }) 348 349 if err != nil { 350 return err 351 } 352 353 if i > 0 { 354 return endLine(w) 355 } 356 return nil 357 } 358 359 func last(w *bufio.Writer, r io.Reader, n int) error { 360 if n < 1 { 361 return nil 362 } 363 364 if rs, ok := r.(io.ReadSeeker); ok && rs != os.Stdin { 365 if err := seekLastLinesApprox(rs, n); err != nil { 366 return err 367 } 368 } 369 370 if n == 1 { 371 return lastLine(w, r) 372 } 373 374 latest := newStringRing(n) 375 err := loopLines(r, func(line []byte) error { 376 latest.Bring(string(line)) 377 return nil 378 }) 379 380 if err != nil { 381 return err 382 } 383 384 first, second := latest.Halves() 385 if err := writeLines(w, first); err != nil { 386 return err 387 } 388 return writeLines(w, second) 389 } 390 391 // lastLine handles the special 1-line case for func last more efficiently, 392 // as it doesn't involve copying things into ring-buffers 393 func lastLine(w *bufio.Writer, r io.Reader) error { 394 var last []byte 395 err := loopLines(r, func(line []byte) error { 396 last = line 397 return nil 398 }) 399 400 if err != nil { 401 return err 402 } 403 404 w.Write(last) 405 return endLine(w) 406 } 407 408 // seekLastLinesApprox positions a reader `approximately` where the last n 409 // lines start; the position `sought` is never after the start of the first 410 // of those trailing lines 411 func seekLastLinesApprox(rs io.ReadSeeker, n int) error { 412 if n < 1 { 413 return nil 414 } 415 416 first := true 417 var buf [bufferSize]byte 418 419 return loopChunksBackward(rs, buf[:], func(pos int, chunk []byte) (keepGoing bool) { 420 // handle trailing line-feed by effectively ignoring it 421 if first { 422 first = false 423 if len(chunk) > 0 && chunk[len(chunk)-1] == '\n' { 424 n++ 425 } 426 } 427 428 c := bytes.Count(chunk, []byte{'\n'}) 429 if n >= c { 430 n -= c 431 return true 432 } 433 434 if n < 1 { 435 // first of the trailing lines may start in a previous chunk 436 return bytes.IndexByte(chunk, '\n') < 0 437 } 438 // search ended in this chunk 439 return false 440 }) 441 } 442 443 func lines(w *bufio.Writer, r io.Reader, args []string) error { 444 return handleNamedInputs(args, r, func(path string, r io.Reader) error { 445 return copyLines(w, r) 446 }) 447 } 448 449 func lineUp(w *bufio.Writer, r io.Reader, perLine int) error { 450 if perLine < 1 { 451 return lineAllUp(w, r) 452 } 453 454 i := 0 455 err := loopLines(r, func(line []byte) error { 456 if i > 0 { 457 if i%perLine != 0 { 458 w.WriteByte('\t') 459 } else { 460 if err := endLine(w); err != nil { 461 return err 462 } 463 } 464 } 465 i++ 466 467 w.Write(line) 468 return nil 469 }) 470 471 if err != nil { 472 return err 473 } 474 475 if i > 0 { 476 return endLine(w) 477 } 478 return nil 479 } 480 481 func lineAllUp(w *bufio.Writer, r io.Reader) error { 482 i := 0 483 err := loopLines(r, func(line []byte) error { 484 if i > 0 { 485 w.WriteByte('\t') 486 } 487 i++ 488 w.Write(line) 489 return nil 490 }) 491 492 if err != nil { 493 return err 494 } 495 496 if i > 0 { 497 return endLine(w) 498 } 499 return nil 500 } 501 502 func links(w *bufio.Writer, r io.Reader) error { 503 return loopLines(r, func(line []byte) error { 504 var err error 505 loopLinks(line, func(i int, s []byte) (keepGoing bool) { 506 w.Write(s) 507 err = endLine(w) 508 return err == nil 509 }) 510 return err 511 }) 512 } 513 514 func lower(w *bufio.Writer, line []byte) error { 515 for len(line) > 0 { 516 r, size := utf8.DecodeRune(line) 517 r = unicode.ToLower(r) 518 w.WriteRune(r) 519 line = line[size:] 520 } 521 return endLine(w) 522 } 523 524 func matchParagraphsFold(w *bufio.Writer, r io.Reader, args []string) error { 525 // no expressions means match all paragraphs 526 if len(args) == 0 { 527 return stomp(w, r) 528 } 529 530 // turn arguments into case-insensitive regexes 531 var match []*regexp.Regexp 532 for _, s := range args { 533 if !strings.HasPrefix(s, `(?i)`) { 534 s = `(?i)` + s 535 } 536 537 m, err := regexp.Compile(s) 538 if err != nil { 539 return err 540 } 541 match = append(match, m) 542 } 543 544 shown := 0 545 didMatch := false 546 var par bytes.Buffer 547 548 err := loopLines(r, func(line []byte) error { 549 if len(bytes.TrimSpace(line)) == 0 { 550 if !didMatch { 551 par.Reset() 552 return nil 553 } 554 555 if par.Len() == 0 { 556 didMatch = false 557 return nil 558 } 559 560 if shown > 0 { 561 w.WriteByte('\n') 562 } 563 _, err := w.Write(par.Bytes()) 564 565 shown++ 566 par.Reset() 567 didMatch = false 568 return adaptWriteError(err) 569 } 570 571 // remember all paragraph lines, even if no match has happened for 572 // it yet, since a match can happen on its last line, for example 573 par.Write(line) 574 par.WriteByte('\n') 575 576 // no need to test later lines from already-matched paragraphs 577 if didMatch { 578 return nil 579 } 580 581 for _, m := range match { 582 if m.Match(line) { 583 didMatch = true 584 return nil 585 } 586 } 587 return nil 588 }) 589 590 if err != nil { 591 return err 592 } 593 594 // don't forget to show the last matched paragraph 595 if !didMatch || par.Len() == 0 { 596 return nil 597 } 598 599 if shown > 0 { 600 w.WriteByte('\n') 601 } 602 _, err = w.Write(par.Bytes()) 603 return adaptWriteError(err) 604 } 605 606 func md5Hex(w *bufio.Writer, r io.Reader) error { 607 return hashHex(w, r, md5.New()) 608 } 609 610 func missing(w *bufio.Writer, r io.Reader, args []string) error { 611 src1, src2, err := loadSetPair(args, r) 612 if err != nil { 613 return err 614 } 615 616 got := make(map[string]struct{}, len(src1)) 617 for _, s := range src1 { 618 got[s] = struct{}{} 619 } 620 621 for _, s := range src2 { 622 if _, ok := got[s]; ok { 623 continue 624 } 625 626 w.WriteString(s) 627 if err := endLine(w); err != nil { 628 return err 629 } 630 } 631 632 return nil 633 } 634 635 func numbers(w *bufio.Writer, r io.Reader) error { 636 return loopLines(r, func(line []byte) error { 637 var err error 638 loopNumbers(line, func(i int, s []byte) (keepGoing bool) { 639 w.Write(s) 640 err = endLine(w) 641 return err == nil 642 }) 643 return err 644 }) 645 } 646 647 func null(w *bufio.Writer, r io.Reader) error { 648 return nil 649 } 650 651 func numberLines(w *bufio.Writer, r io.Reader, n int) error { 652 return loopLines(r, func(line []byte) error { 653 n++ 654 writeInt64(w, int64(n-1)) 655 w.WriteByte('\t') 656 w.Write(line) 657 return endLine(w) 658 }) 659 } 660 661 func primes(w *bufio.Writer, r io.Reader, count int) error { 662 // 2 is the only even prime, and the smallest one 663 if count > 0 { 664 w.WriteString(`2`) 665 if err := endLine(w); err != nil { 666 return err 667 } 668 count-- 669 } 670 671 for n := 3; count > 0; n += 2 { 672 if isOddDiv(n) { 673 continue 674 } 675 676 var buf [32]byte 677 w.Write(strconv.AppendInt(buf[:0], int64(n), 10)) 678 if err := endLine(w); err != nil { 679 return err 680 } 681 count-- 682 } 683 684 return nil 685 } 686 687 // isOddDiv helps func primes do its job efficiently 688 func isOddDiv(n int) bool { 689 max := int(math.Sqrt(float64(n))) 690 691 for div := 3; div <= max; div += 2 { 692 if n%div == 0 { 693 return true 694 } 695 } 696 697 return false 698 } 699 700 func reflow(w *bufio.Writer, r io.Reader, max int) error { 701 return loopLines(r, func(line []byte) error { 702 reflowLine(w, line, max) 703 return endLine(w) 704 }) 705 } 706 707 func runes(w *bufio.Writer, r io.Reader) error { 708 return loopLines(r, func(line []byte) error { 709 for len(line) > 0 { 710 r, size := utf8.DecodeRune(line) 711 line = line[size:] 712 713 w.WriteRune(r) 714 if err := endLine(w); err != nil { 715 return err 716 } 717 } 718 719 return nil 720 }) 721 } 722 723 func sha1Hex(w *bufio.Writer, r io.Reader) error { 724 return hashHex(w, r, sha1.New()) 725 } 726 727 func sha256Hex(w *bufio.Writer, r io.Reader) error { 728 return hashHex(w, r, sha256.New()) 729 } 730 731 func sha512Hex(w *bufio.Writer, r io.Reader) error { 732 return hashHex(w, r, sha512.New()) 733 } 734 735 func skip(w *bufio.Writer, r io.Reader, n int) error { 736 return loopLines(r, func(line []byte) error { 737 if n > 0 { 738 n-- 739 return nil 740 } 741 742 w.Write(line) 743 return endLine(w) 744 }) 745 } 746 747 func skipEmpty(w *bufio.Writer, line []byte) error { 748 if len(line) == 0 { 749 return nil 750 } 751 752 w.Write(line) 753 return endLine(w) 754 } 755 756 func skipLast(w *bufio.Writer, r io.Reader, n int) error { 757 if n < 1 { 758 return copyLines(w, r) 759 } 760 761 latest := newStringRing(n) 762 return loopLines(r, func(line []byte) error { 763 if latest.Len() < n { 764 latest.Bring(string(line)) 765 return nil 766 } 767 768 w.WriteString(latest.Earliest()) 769 latest.Bring(string(line)) 770 return endLine(w) 771 }) 772 } 773 774 func squeeze(w *bufio.Writer, line []byte) error { 775 line = trimSpaces(line) 776 777 for len(line) > 0 { 778 i := bytes.IndexByte(line, '\t') 779 if i < 0 { 780 squeezeChunk(w, line) 781 break 782 } 783 784 squeezeChunk(w, line[:i]) 785 w.WriteByte('\t') 786 line = line[i+1:] 787 } 788 789 return endLine(w) 790 } 791 792 func stomp(w *bufio.Writer, r io.Reader) error { 793 empty := 0 794 nonEmpty := 0 795 796 return loopLines(r, func(line []byte) error { 797 if len(line) == 0 { 798 empty++ 799 return nil 800 } 801 802 if empty > 0 && nonEmpty > 0 { 803 if err := endLine(w); err != nil { 804 return err 805 } 806 } 807 808 empty = 0 809 nonEmpty++ 810 w.Write(line) 811 return endLine(w) 812 }) 813 } 814 815 func tally(w *bufio.Writer, r io.Reader) error { 816 tally := make(map[string]int) 817 818 err := loopLines(r, func(line []byte) error { 819 tally[string(line)]++ 820 return nil 821 }) 822 823 if err != nil { 824 return err 825 } 826 827 sorted := make([]string, 0, len(tally)) 828 for k := range tally { 829 sorted = append(sorted, k) 830 } 831 832 // reverse-sort keys by their tally-count 833 sort.SliceStable(sorted, func(i, j int) bool { 834 return tally[sorted[i]] > tally[sorted[j]] 835 }) 836 837 for _, k := range sorted { 838 writeInt64(w, int64(tally[k])) 839 w.WriteByte('\t') 840 w.WriteString(k) 841 if err := endLine(w); err != nil { 842 return err 843 } 844 } 845 return nil 846 } 847 848 func uriEncode(w *bufio.Writer, line []byte) error { 849 // s := url.PathEscape(string(line)) 850 // w.WriteString(s) 851 // return endLine(w) 852 853 for len(line) > 0 { 854 r, size := utf8.DecodeRune(line) 855 line = line[size:] 856 857 if r < 128 && uriUnescapedASCII[r] { 858 w.WriteByte(byte(r)) 859 continue 860 } 861 862 const hex = `0123456789ABCDEF` 863 const l = byte(len(hex)) 864 w.WriteByte('%') 865 w.WriteByte(hex[byte(r)/l]) 866 w.WriteByte(hex[byte(r)%l]) 867 } 868 869 return endLine(w) 870 } 871 872 // toBase64 is named to avoid clashing with package `base64` 873 func toBase64(w *bufio.Writer, r io.Reader) error { 874 enc := base64.NewEncoder(base64.StdEncoding, w) 875 if err := copyBytes(enc, r); err != nil { 876 return err 877 } 878 879 // can't defer-call this, as it must happen before ending the line 880 enc.Close() 881 return endLine(w) 882 } 883 884 // toHex is named to avoid clashing with package `hex` 885 func toHex(w *bufio.Writer, r io.Reader) error { 886 enc := hex.NewEncoder(w) 887 if err := copyBytes(enc, r); err != nil { 888 return err 889 } 890 return endLine(w) 891 } 892 893 // hashHex hash-encodes data as a hex-ASCII line 894 func hashHex(w *bufio.Writer, r io.Reader, h hash.Hash) error { 895 err := hashBytes(hex.NewEncoder(w), r, h) 896 if err != nil { 897 return err 898 } 899 return endLine(w) 900 } File: tu/lines_test.go 1 package main 2 3 import ( 4 "fmt" 5 "strconv" 6 "strings" 7 "testing" 8 ) 9 10 func TestPureLineTools(t *testing.T) { 11 var tests = []struct { 12 Tool string 13 Input string 14 Expected string 15 }{ 16 {`squeeze`, ``, ``}, 17 {`squeeze`, ` `, "\n"}, 18 {`squeeze`, `abc def`, "abc def\n"}, 19 {`squeeze`, "abc \t def", "abc\tdef\n"}, 20 {`trim`, ``, ``}, 21 {`trim`, ` `, "\n"}, 22 {`trim`, `abc def`, "abc def\n"}, 23 {`trim`, "abc \t def", "abc \t def\n"}, 24 } 25 26 for i, tc := range tests { 27 t.Run(strconv.Itoa(i), func(t *testing.T) { 28 var sb strings.Builder 29 30 err := run(&sb, strings.NewReader(tc.Input), tc.Tool, nil) 31 if err != nil { 32 t.Fatal(err) 33 return 34 } 35 36 got := sb.String() 37 if got != tc.Expected { 38 t.Fatalf("got\n%q\ninstead of\n%q", got, tc.Expected) 39 return 40 } 41 }) 42 } 43 } 44 45 func TestLineIntTools(t *testing.T) { 46 var tests = []struct { 47 Tool string 48 Input string 49 Arg int 50 Expected string 51 }{ 52 {`blow`, ``, 3, ``}, 53 {`blow`, ``, 0, ``}, 54 {`blow`, ``, -3, ``}, 55 {`blow`, `abc def`, 3, "abc def\n"}, 56 {`blow`, "abc\tdef", 3, "abc def\n"}, 57 {`blow`, "abc\tdef", 4, "abc def\n"}, 58 {`blow`, "abc\tdef", 0, "abcdef\n"}, 59 {`blow`, "abc\tdef", -1, "abcdef\n"}, 60 61 {`indent`, ``, 3, ``}, 62 {`indent`, "abc\ndef", 3, " abc\n def\n"}, 63 {`indent`, "abc\ndef", 0, "abc\ndef\n"}, 64 {`indent`, "abc\ndef", -10, "abc\ndef\n"}, 65 66 {`dedent`, " abc\n def\n", 3, "abc\ndef\n"}, 67 {`dedent`, " abc\n def\n", 1, " abc\n def\n"}, 68 {`dedent`, " abc\n def\n", 0, " abc\n def\n"}, 69 {`dedent`, " abc\n def\n", -5, " abc\n def\n"}, 70 71 {`last`, "abc\ndef\n", -5, ``}, 72 {`last`, "abc\ndef\n", 0, ``}, 73 {`last`, "abc\ndef\n123\n456\n", 3, "def\n123\n456\n"}, 74 {`last`, "abc\ndef\n123\n456\n", 1, "456\n"}, 75 {`last`, "abc\ndef\n123\n456\n", 2, "123\n456\n"}, 76 {`last`, "abc\ndef\n123\n456\n", 200, "abc\ndef\n123\n456\n"}, 77 {`last`, "abc\ndef\n", -5, ``}, 78 79 {`skip-last`, "abc\ndef\n", 0, "abc\ndef\n"}, 80 {`skip-last`, "abc\ndef\n123\n456\n", -20, "abc\ndef\n123\n456\n"}, 81 {`skip-last`, "abc\ndef\n123\n456\n", 4, ``}, 82 {`skip-last`, "abc\ndef\n123\n456\n", 3, "abc\n"}, 83 {`skip-last`, "abc\ndef\n123\n456\n", 1, "abc\ndef\n123\n"}, 84 {`skip-last`, "abc\ndef\n123\n456\n", 2, "abc\ndef\n"}, 85 {`skip-last`, "abc\ndef\n123\n456\n", 200, ``}, 86 } 87 88 for _, tc := range tests { 89 name := fmt.Sprintf(`%s(%d): %q`, tc.Tool, tc.Arg, tc.Input) 90 91 t.Run(name, func(t *testing.T) { 92 var out strings.Builder 93 args := []string{strconv.Itoa(tc.Arg)} 94 95 err := run(&out, strings.NewReader(tc.Input), tc.Tool, args) 96 if err != nil { 97 t.Fatal(err) 98 return 99 } 100 101 got := out.String() 102 if got != tc.Expected { 103 t.Fatalf("got\n%q\ninstead of\n%q", got, tc.Expected) 104 return 105 } 106 }) 107 } 108 } File: tu/main.go 1 package main 2 3 import ( 4 "bufio" 5 "errors" 6 "os" 7 "sort" 8 9 _ "embed" 10 ) 11 12 //go:embed examples.sh 13 var examples string 14 15 //go:embed info.txt 16 var info string 17 18 //go:embed tldr.sh 19 var tldr string 20 21 //go:embed tools.txt 22 var tools string 23 24 // errGeneric's message isn't meant to show, opting to quit the app with a 25 // generic error code instead; it's used when other errors were shown before 26 var errGeneric = errors.New(`failed`) 27 28 func main() { 29 addMetaTools() 30 31 if len(os.Args) < 2 { 32 run(os.Stderr, os.Stdin, `help`, nil) 33 showError(errors.New(`no tool name given`)) 34 os.Exit(1) 35 } 36 37 switch os.Args[1] { 38 case `-h`, `--h`, `-help`, `--help`: 39 run(os.Stdout, os.Stdin, `help`, nil) 40 return 41 } 42 43 err := run(os.Stdout, os.Stdin, os.Args[1], os.Args[2:]) 44 if errors.Is(err, errGeneric) { 45 os.Exit(1) 46 } 47 if err != nil { 48 showError(err) 49 os.Exit(1) 50 } 51 } 52 53 func aliases(w *bufio.Writer) error { 54 aliases := make(map[string][]string) 55 for k, v := range toolNameAliases { 56 aliases[v] = append(aliases[v], k) 57 } 58 59 keys := make([]string, 0, len(aliases)) 60 for k := range aliases { 61 keys = append(keys, k) 62 sort.Strings(aliases[k]) 63 } 64 sort.Strings(keys) 65 66 for _, k := range keys { 67 for i, v := range aliases[k] { 68 if i > 0 { 69 w.WriteByte('\t') 70 } 71 w.WriteString(v) 72 } 73 74 if err := endLine(w); err != nil { 75 return err 76 } 77 } 78 79 return nil 80 } 81 82 func showExamples(w *bufio.Writer) error { 83 w.WriteString(examples) 84 return nil 85 } 86 87 func showHelp(w *bufio.Writer) error { 88 w.WriteString(info) 89 w.WriteString("\n\nTools Available\n\n\n") 90 w.WriteString(tools) 91 return nil 92 } 93 94 func showTLDR(w *bufio.Writer) error { 95 w.WriteString(tldr) 96 return nil 97 } 98 99 // showError standardizes how errors from this app look 100 func showError(err error) { 101 if err == nil { 102 return 103 } 104 105 os.Stderr.WriteString("\x1b[31m") 106 os.Stderr.WriteString(err.Error()) 107 os.Stderr.WriteString("\x1b[0m\n") 108 } File: tu/mit-license.txt 1 The MIT License (MIT) 2 3 Copyright © 2024 pacman64 4 5 Permission is hereby granted, free of charge, to any person obtaining a copy of 6 this software and associated documentation files (the “Software”), to deal 7 in the Software without restriction, including without limitation the rights to 8 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 of the Software, and to permit persons to whom the Software is furnished to do 10 so, subject to the following conditions: 11 12 The above copyright notice and this permission notice shall be included in all 13 copies or substantial portions of the Software. 14 15 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 SOFTWARE. File: tu/other.go 1 package main 2 3 import ( 4 "bufio" 5 "errors" 6 "io" 7 "os/exec" 8 "path/filepath" 9 "runtime" 10 "strings" 11 "time" 12 ) 13 14 // open tries to open/show/start all files/folder/URIs given, even other apps 15 func openTool(w *bufio.Writer, r io.Reader, names []string) error { 16 if len(names) == 0 || (len(names) == 1 && names[0] == `-`) { 17 return errors.New(`expected files/folders/URIs`) 18 } 19 20 for _, s := range names { 21 if err := popupName(s); err != nil { 22 return err 23 } 24 } 25 return nil 26 } 27 28 // popupName is used by func open to flatten its control-flow 29 func popupName(s string) error { 30 if f := strings.HasPrefix; f(s, `https://`) || f(s, `http://`) { 31 return popup(s) 32 } 33 34 if strings.HasPrefix(s, `:`) { 35 _, err := parsePortNumber(s[1:]) 36 if err == nil { 37 return popup(`http://127.0.0.1` + s) 38 } 39 } 40 41 s, err := filepath.Abs(s) 42 if err != nil { 43 return err 44 } 45 return popup(s) 46 } 47 48 // popup tries to open the file/folder/URI given using the system default 49 // handlers for these 50 func popup(what string) error { 51 switch runtime.GOOS { 52 case `windows`: 53 const how = `url.dll,FileProtocolHandler` 54 return exec.Command(`rundll32`, how, what).Run() 55 case `darwin`: 56 return exec.Command(`open`, what).Run() 57 default: 58 return exec.Command(`xdg-open`, what).Run() 59 } 60 } 61 62 func today(w *bufio.Writer) error { 63 var buf [32]byte 64 const fmt = `2006-01-02 Mon Jan 02` 65 w.Write(time.Now().AppendFormat(buf[:0], fmt)) 66 return endLine(w) 67 } 68 69 func now(w *bufio.Writer) error { 70 var buf [32]byte 71 const fmt = `2006-01-02 15:04:05 Jan Mon` 72 w.Write(time.Now().AppendFormat(buf[:0], fmt)) 73 return endLine(w) 74 } 75 76 func ymd(w *bufio.Writer) error { 77 var buf [16]byte 78 w.Write(time.Now().AppendFormat(buf[:0], `2006-01-02`)) 79 return endLine(w) 80 } File: tu/overview.txt 1 # General 2 3 This command-line app brings many tools, tiny and/or useful, into one. This 4 approach consolidates what would otherwise be a smattering of files, which 5 is always nice, and even saves file-space, as Go apps each carry a file-size 6 overhead of megabytes. 7 8 The first argument given is looked-up as a tool name, or one of their many 9 aliases. Tools are implemented as simple funcs, with their specific types 10 determining how exactly they're called. 11 12 Tool names/aliases are always lower-cased, and any dashes/underscores given 13 in them are ignored, except for `--` and `-`, which are hard-coded aliases 14 for the `compose` tool. 15 16 17 # io.go 18 19 Source file io.go standardizes how this app's tools read input, and how they 20 emit output, via various funcs and types. 21 22 23 # main.go 24 25 Source file main.go defines the app's starting point, and a few help-related 26 tools/variables. 27 28 29 # running.go 30 31 Source file running.go has all tool-lookup tables, as well as the tool-name 32 aliases. That file also defines higher-order tools, which run/call other 33 tools, the main examples being the `compose` and `each` tools. 34 35 Func `run` does the lookup, delegating the actual running to `dispatchFunc`, 36 the latter choosing its exact behavior based on the type of the looked-up 37 func: some tool funcs are called for each input line, while other funcs are 38 simply given an io.Reader for them to handle, for example. 39 40 41 # strings.go 42 43 Source file strings.go defines various string-related funcs for tools to use, 44 along with a few string-related types, such as the custom ring-buffer type 45 named `stringRing`. 46 47 48 # Other Source Files 49 50 bytes.go byte-slice-oriented tools, whether binary or line-based 51 coby.go `COunt BYtes` 52 examples.sh shell-runnable examples shown by tool `examples` 53 fractions.go `fractions`, an arbitrary-precision rational calculator 54 id3pic.go `id3-pic`, a thumbnail-picture extraction tool 55 info.txt part of the help message 56 json0.go `json-0` and `jsonl`, 2 useful JSON fixers/squeezers 57 lines.go string-oriented tools, mostly line-based 58 other.go argument-oriented tools which don't read input 59 plain.go `plain` 60 sbs.go `Side By Side` 61 symbols.go `symbols`, along with the long lookup-tables it uses 62 tables.go tools about tabular formats, like CSV and TSV 63 tldr.sh pseudo-examples shown by tool `tldr` 64 tools.txt descriptions of most tools, shown with the help message 65 utf8.go `utf-8` File: tu/plain.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "io" 7 ) 8 9 // plainState is a custom type used in func plain and its helper funcs 10 type plainState int 11 12 const ( 13 plainNormal = plainState(0) 14 plainEscape = plainState(1) 15 plainANSI = plainState(2) 16 ) 17 18 // plain ignores all ANSI-style sequences 19 func plain(w *bufio.Writer, r io.Reader) error { 20 state := plainNormal 21 var buf [bufferSize]byte 22 23 for { 24 n, err := r.Read(buf[:]) 25 26 if n < 1 { 27 if state == plainEscape { 28 w.WriteByte('\x1b') 29 } 30 31 if err == io.EOF { 32 return nil 33 } 34 return err 35 } 36 37 chunk := buf[:n] 38 39 // special-handle chunks where there can't be any ANSI-sequences: 40 // bytes.IndexByte is a really quick check, and when it can't find 41 // escape-bytes, whole chunks can be bulk-copied into the output 42 if state == plainNormal { 43 if i := bytes.IndexByte(chunk, '\x1b'); i < 0 { 44 w.Write(chunk) 45 continue 46 } 47 } 48 49 for len(chunk) > 0 { 50 switch state { 51 case plainNormal: 52 chunk, state = plainHandleNormal(w, chunk) 53 case plainEscape: 54 chunk, state = plainHandleEscape(w, chunk) 55 case plainANSI: 56 chunk, state = plainHandleANSI(chunk) 57 } 58 } 59 } 60 } 61 62 // plainHandleNormal is used by func plain 63 func plainHandleNormal(w *bufio.Writer, chunk []byte) ([]byte, plainState) { 64 for len(chunk) > 0 { 65 b := chunk[0] 66 chunk = chunk[1:] 67 68 if b == '\x1b' { 69 return chunk, plainEscape 70 } else { 71 w.WriteByte(b) 72 } 73 } 74 75 return nil, plainNormal 76 } 77 78 // plainHandleEscape is used by func plain 79 func plainHandleEscape(w *bufio.Writer, chunk []byte) ([]byte, plainState) { 80 if len(chunk) > 0 { 81 b := chunk[0] 82 chunk = chunk[1:] 83 84 if b == '[' { 85 return chunk, plainANSI 86 } else { 87 w.WriteByte('\x1b') 88 w.WriteByte(b) 89 return chunk, plainNormal 90 } 91 } 92 93 return nil, plainEscape 94 } 95 96 // plainHandleANSI is used by func plain 97 func plainHandleANSI(chunk []byte) ([]byte, plainState) { 98 for len(chunk) > 0 { 99 b := chunk[0] 100 chunk = chunk[1:] 101 102 // turn lowercase ASCII letters into uppercase ones 103 b &= ^byte(32) 104 105 if 'A' <= b && b <= 'Z' { 106 return chunk, plainNormal 107 } 108 } 109 110 return nil, plainANSI 111 } File: tu/running.go 1 package main 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "strconv" 9 "strings" 10 ) 11 12 type fallbackIntTool struct { 13 // tool is the func to run 14 tool func(w *bufio.Writer, r io.Reader, n int) error 15 16 // n is the default int argument to use, when none was given explicitly 17 n int 18 } 19 20 func (fit fallbackIntTool) run(w *bufio.Writer, r io.Reader, args []string) error { 21 switch len(args) { 22 case 0: 23 return fit.tool(w, r, fit.n) 24 25 case 1: 26 n, err := strconv.Atoi(strings.ReplaceAll(args[0], `_`, ``)) 27 if err != nil { 28 const fs = `expected an integer argument, but was given %s` 29 return fmt.Errorf(fs, args[0]) 30 } 31 return fit.tool(w, r, n) 32 33 case 2: 34 return handleNamedInput(args[1], r, func(r io.Reader) error { 35 n, err := strconv.Atoi(strings.ReplaceAll(args[0], `_`, ``)) 36 if err != nil { 37 const fs = `expected an integer argument, but was given %s` 38 return fmt.Errorf(fs, args[0]) 39 } 40 return fit.tool(w, r, n) 41 }) 42 43 default: 44 const fs = `expected at most 1 argument, but was given %d` 45 return fmt.Errorf(fs, len(args)) 46 } 47 } 48 49 // toolNameAliases translates synonyms into proper keys in table name2tool; 50 // unit-tests force it to have self-aliases, as well as entries for all tools 51 var toolNameAliases = map[string]string{ 52 `after`: `after`, 53 `afterlast`: `afterlast`, 54 `pastlast`: `afterlast`, 55 `aliases`: `aliases`, 56 `base64`: `base64`, 57 `before`: `before`, 58 `beforelast`: `beforelast`, 59 `begin`: `begin`, 60 `begincsv`: `begincsv`, 61 `begintsv`: `begintsv`, 62 `bigfiles`: `bigfiles`, 63 `blow`: `blow`, 64 `expand`: `blow`, 65 `book`: `book`, 66 `br`: `breathe`, 67 `breathe`: `breathe`, 68 `bytes`: `bytes`, 69 `cat`: `bytes`, 70 `choplf`: `choplf`, 71 `common`: `common`, 72 `compose`: `compose`, 73 `intersection`: `common`, 74 `coby`: `countbytes`, 75 `countbytes`: `countbytes`, 76 `crlf`: `crlf`, 77 `doslines`: `crlf`, 78 `windowslines`: `crlf`, 79 `winlines`: `crlf`, 80 `csv`: `csv`, 81 `csv2tsv`: `csv2tsv`, 82 `csvtotsv`: `csv2tsv`, 83 `datauri`: `datauri`, 84 `datauris`: `datauri`, 85 `debase64`: `debase64`, 86 `unbase64`: `debase64`, 87 `dedent`: `dedent`, 88 `dedup`: `dedup`, 89 `deduplicate`: `dedup`, 90 `unique`: `dedup`, 91 `drop`: `drop`, 92 `dropall`: `drop`, 93 `dropend`: `dropend`, 94 `dropsuffix`: `dropend`, 95 `dropsuffixes`: `dropend`, 96 `dropstart`: `dropstart`, 97 `dropprefix`: `dropstart`, 98 `dropprefixes`: `dropstart`, 99 `droptabs`: `droptabs`, 100 `each`: `each`, 101 `emptyfiles`: `emptyfiles`, 102 `emptyfolders`: `emptyfolders`, 103 `end`: `end`, 104 `endcsv`: `endcsv`, 105 `endtsv`: `endtsv`, 106 `examples`: `examples`, 107 `allfiles`: `files`, 108 `files`: `files`, 109 `first`: `first`, 110 `limit`: `first`, 111 `allfolders`: `folders`, 112 `folders`: `folders`, 113 `calc`: `frac`, 114 `frac`: `frac`, 115 `fracal`: `frac`, 116 `fracalc`: `frac`, 117 `glue`: `glue`, 118 `gbs`: `groupbysize`, 119 `groupbysize`: `groupbysize`, 120 `gz`: `gzip`, 121 `gzip`: `gzip`, 122 `help`: `help`, 123 `hex`: `hex`, 124 `id3image`: `id3pic`, 125 `id3img`: `id3pic`, 126 `id3pic`: `id3pic`, 127 `id3pict`: `id3pic`, 128 `id3picture`: `id3pic`, 129 `mp3image`: `id3pic`, 130 `mp3img`: `id3pic`, 131 `mp3pic`: `id3pic`, 132 `mp3pict`: `id3pic`, 133 `mp3picture`: `id3pic`, 134 `indent`: `indent`, 135 `join`: `join`, 136 `j0`: `json0`, 137 `jl`: `jsonl`, 138 `json0`: `json0`, 139 `jsonl`: `jsonl`, 140 `jsonlines`: `jsonl`, 141 `junk`: `junk`, 142 `last`: `last`, 143 `lines`: `lines`, 144 `unixify`: `lines`, 145 `lineup`: `lineup`, 146 `hyperlinks`: `links`, 147 `links`: `links`, 148 `countloc`: `loc`, 149 `loc`: `loc`, 150 `lower`: `lower`, 151 `lowercase`: `lower`, 152 `matchpar`: `matchpara`, 153 `matchpara`: `matchpara`, 154 `matchparagraph`: `matchpara`, 155 `mpar`: `matchpara`, 156 `mpara`: `matchpara`, 157 `paramatch`: `matchpara`, 158 `parmatch`: `matchpara`, 159 `pmatch`: `matchpara`, 160 `md5`: `md5`, 161 `md5hex`: `md5`, 162 `md5sum`: `md5`, 163 `missing`: `missing`, 164 `mumble`: `mumble`, 165 `n`: `n`, 166 `droppun`: `nopun`, 167 `droppunc`: `nopun`, 168 `droppunct`: `nopun`, 169 `nopun`: `nopun`, 170 `nopunc`: `nopun`, 171 `nopunct`: `nopun`, 172 `now`: `now`, 173 `nil`: `null`, 174 `nothing`: `null`, 175 `null`: `null`, 176 `numbers`: `numbers`, 177 `open`: `open`, 178 `destyle`: `plain`, 179 `plain`: `plain`, 180 `plaintext`: `plain`, 181 `unstyle`: `plain`, 182 `primes`: `primes`, 183 `reflow`: `reflow`, 184 `reprose`: `reflow`, 185 `runes`: `runes`, 186 `sbs`: `sbs`, 187 `sidebyside`: `sbs`, 188 `sha1`: `sha1`, 189 `sha1hex`: `sha1`, 190 `sha1sum`: `sha1`, 191 `sha256`: `sha256`, 192 `sha256hex`: `sha256`, 193 `sha256sum`: `sha256`, 194 `sha512`: `sha512`, 195 `sha512hex`: `sha512`, 196 `sha512sum`: `sha512`, 197 `showit`: `si`, 198 `si`: `si`, 199 `since`: `since`, 200 `sincelast`: `sincelast`, 201 `butfirst`: `skip`, 202 `skip`: `skip`, 203 `butlast`: `skiplast`, 204 `skipfirst`: `skip`, 205 `skiplast`: `skiplast`, 206 `skipempty`: `skipempty`, 207 `smallfiles`: `smallfiles`, 208 `squeeze`: `squeeze`, 209 `ssv`: `ssv`, 210 `fields2tsv`: `ssv2tsv`, 211 `ssv2tsv`: `ssv2tsv`, 212 `fieldstotsv`: `ssv2tsv`, 213 `ssvtotsv`: `ssv2tsv`, 214 `stomp`: `stomp`, 215 `strings`: `strings`, 216 `symbol`: `symbols`, 217 `symbols`: `symbols`, 218 `tally`: `tally`, 219 `tldr`: `tldr`, 220 `today`: `today`, 221 `topfiles`: `topfiles`, 222 `topfolders`: `topfolders`, 223 `strip`: `trim`, 224 `stripend`: `trimtrail`, 225 `stripright`: `trimtrail`, 226 `striptrail`: `trimtrail`, 227 `striptrails`: `trimtrail`, 228 `trim`: `trim`, 229 `trimend`: `trimtrail`, 230 `trimright`: `trimtrail`, 231 `trimtrail`: `trimtrail`, 232 `trimtrails`: `trimtrail`, 233 `trunc`: `truncate`, 234 `truncate`: `truncate`, 235 `tsv`: `tsv`, 236 `debz`: `unbzip`, 237 `debz2`: `unbzip`, 238 `debzip`: `unbzip`, 239 `debzip2`: `unbzip`, 240 `unbz`: `unbzip`, 241 `unbz2`: `unbzip`, 242 `unbzip`: `unbzip`, 243 `unbzip2`: `unbzip`, 244 `degz`: `ungzip`, 245 `degzip`: `ungzip`, 246 `ungz`: `ungzip`, 247 `ungzip`: `ungzip`, 248 `dehex`: `unhex`, 249 `unhex`: `unhex`, 250 `until`: `until`, 251 `upto`: `until`, 252 `untillast`: `untillast`, 253 `uptolast`: `untillast`, 254 `uri`: `uriencode`, 255 `uriencode`: `uriencode`, 256 `utf8`: `utf8`, 257 `vulgarize`: `vulgarize`, 258 `with`: `with`, 259 `ymd`: `ymd`, 260 } 261 262 // name2tool turns canonical names into callable funcs; table toolNameAliases 263 // adapts many dashless lowercased names into keys/names for this table 264 var name2tool = map[string]any{ 265 `after`: after, 266 `afterlast`: afterLast, 267 `aliases`: aliases, 268 `base64`: toBase64, 269 `before`: before, 270 `beforelast`: beforeLast, 271 `begin`: begin, 272 `begincsv`: beginCSV, 273 `begintsv`: beginTSV, 274 `bigfiles`: bigFiles, 275 `blow`: fallbackIntTool{blowTabs, 4}, 276 `book`: book, 277 `breathe`: fallbackIntTool{breathe, 5}, 278 `bytes`: bytesTool, 279 `choplf`: chopLF, 280 `common`: common, 281 `compose`: nil, 282 `countbytes`: coby, 283 `crlf`: crlf, 284 `csv`: csv2lines, 285 `csv2tsv`: csv2tsv, 286 `datauri`: dataURI, 287 `debase64`: debase64, 288 `dedent`: dedent, 289 `dedup`: dedup, 290 `drop`: drop, 291 `dropend`: dropEnd, 292 `dropstart`: dropStart, 293 `droptabs`: dropTabs, 294 `each`: nil, 295 `emptyfiles`: emptyFiles, 296 `emptyfolders`: emptyFolders, 297 `end`: end, 298 `endcsv`: endCSV, 299 `endtsv`: endTSV, 300 `examples`: showExamples, 301 `files`: allFiles, 302 `first`: fallbackIntTool{first, 1}, 303 `folders`: allFolders, 304 `frac`: fractions, 305 `glue`: glue, 306 `groupbysize`: groupByFileSize, 307 `gzip`: gzipBytes, 308 `help`: showHelp, 309 `hex`: toHex, 310 `id3pic`: id3Picture, 311 `indent`: indent, 312 `join`: join, 313 `json0`: json0, 314 `jsonl`: jsonl, 315 `junk`: fallbackIntTool{junk, 1_024}, 316 `last`: fallbackIntTool{last, 1}, 317 `lines`: lines, 318 `lineup`: fallbackIntTool{lineUp, 0}, 319 `links`: links, 320 `loc`: locTool, 321 `lower`: lower, 322 `matchpara`: matchParagraphsFold, 323 `md5`: md5Hex, 324 `missing`: missing, 325 `mumble`: nil, 326 `n`: fallbackIntTool{numberLines, 1}, 327 `nopun`: dropPunctuation, 328 `now`: now, 329 `null`: null, 330 `numbers`: numbers, 331 `open`: openTool, 332 `plain`: plain, 333 `primes`: fallbackIntTool{primes, 1_000_000}, 334 `reflow`: reflow, 335 `runes`: runes, 336 `sbs`: fallbackIntTool{sbs, 0}, 337 `sha1`: sha1Hex, 338 `sha256`: sha256Hex, 339 `sha512`: sha512Hex, 340 `si`: si, 341 `since`: since, 342 `sincelast`: sinceLast, 343 `skip`: skip, 344 `skipempty`: skipEmpty, 345 `skiplast`: fallbackIntTool{skipLast, 1}, 346 `smallfiles`: smallFiles, 347 `squeeze`: squeeze, 348 `ssv`: ssv2lines, 349 `ssv2tsv`: ssv2tsv, 350 `stomp`: stomp, 351 `strings`: stringsTool, 352 `symbols`: showSymbols, 353 `tally`: tally, 354 `today`: today, 355 `tldr`: showTLDR, 356 `topfiles`: topFiles, 357 `topfolders`: topFolders, 358 `trim`: trimSpaces, 359 `trimtrail`: trimTrail, 360 `truncate`: truncateRunes, 361 `tsv`: tsv2lines, 362 `unbzip`: debzip2, 363 `ungzip`: degzip, 364 `unhex`: unHex, 365 `until`: until, 366 `untillast`: untilLast, 367 `uriencode`: uriEncode, 368 `utf8`: toUTF8, 369 `vulgarize`: vulgarize, 370 `with`: nil, 371 `ymd`: ymd, 372 } 373 374 func addMetaTools() { 375 // enable special tools which circularly-refer to the tool-lookup table; 376 // the compiler complains if these are setup in the lookup-table itself 377 name2tool[`compose`] = compose 378 name2tool[`each`] = each 379 name2tool[`mumble`] = mumble 380 name2tool[`with`] = runWith 381 } 382 383 func each(w *bufio.Writer, r io.Reader, args []string) error { 384 if len(args) < 1 { 385 return errors.New(`expected at least 1 arg, but was given none`) 386 } 387 388 tool := args[0] 389 pars := make([]string, 0, len(args)) 390 pars = append(pars, args[1:]...) 391 return loopLinesString(r, func(line string) error { 392 return run(w, r, tool, append(pars, line)) 393 }) 394 } 395 396 func mumble(w *bufio.Writer, r io.Reader, args []string) error { 397 if len(args) < 1 { 398 return errors.New(`expected at least 1 arg, but was given none`) 399 } 400 401 for i, s := range args { 402 if i > 0 { 403 w.WriteByte(' ') 404 } 405 w.WriteString(s) 406 } 407 w.WriteByte('\t') 408 return run(w, r, args[0], args[1:]) 409 } 410 411 // run handles lookup and dispatch for all tools in this app 412 func run(w io.Writer, r io.Reader, tool string, args []string) error { 413 key := strings.TrimSpace(tool) 414 415 // handle custom aliases for the `compose` tool 416 switch key { 417 case `-`, `--`: 418 bw := bufio.NewWriterSize(w, bufferSize) 419 defer bw.Flush() 420 return compose(bw, r, args) 421 } 422 423 key = strings.ToLower(key) 424 key = strings.ReplaceAll(key, `-`, ``) 425 key = strings.ReplaceAll(key, `_`, ``) 426 if alias, ok := toolNameAliases[key]; ok { 427 key = alias 428 } 429 430 if key == `` { 431 return errors.New(`no tool-name given`) 432 } 433 434 fn, ok := name2tool[key] 435 if !ok { 436 return fmt.Errorf(`no tool named %q`, tool) 437 } 438 439 bw := bufio.NewWriterSize(w, bufferSize) 440 defer bw.Flush() 441 442 switch len(args) { 443 case 1: 444 switch fn.(type) { 445 case 446 func(line []byte) []byte, 447 func(w *bufio.Writer, r io.Reader) error, 448 func(w *bufio.Writer, line []byte) error, 449 func(w *bufio.Writer, line string) error: 450 return handleFile(args[0], func(r io.Reader) error { 451 return dispatchFunc(bw, r, fn, nil) 452 }) 453 } 454 455 case 2: 456 switch fn.(type) { 457 case 458 func(line []byte, arg []byte) []byte, 459 func(line []byte, n int) []byte, 460 func(w *bufio.Writer, r io.Reader, n int) error: 461 path := args[len(args)-1] 462 rest := args[:len(args)-1] 463 return handleFile(path, func(r io.Reader) error { 464 return dispatchFunc(bw, r, fn, rest) 465 }) 466 } 467 } 468 469 err := dispatchFunc(bw, r, fn, args) 470 if err == errNoMoreOutput { 471 // deliberately quit the app successfully and right away 472 return nil 473 } 474 475 if err != nil { 476 return fmt.Errorf(`%s: %w`, tool, err) 477 } 478 return nil 479 } 480 481 // runWith implements tool `with` 482 func runWith(w *bufio.Writer, r io.Reader, args []string) error { 483 if len(args) < 2 { 484 return fmt.Errorf(`expected at least 2 args, but was given %d`, len(args)) 485 } 486 487 path := args[0] 488 tool := args[1] 489 args = args[2:] 490 491 return handleFile(path, func(r io.Reader) error { 492 return run(w, r, tool, args) 493 }) 494 } 495 496 // unsupportedFuncType is a custom error type which enables unit-tests to 497 // automatically check if all entries in the func-dispatch table are callable 498 type unsupportedFuncType struct { 499 fn any 500 } 501 502 // Error implements the error interface 503 func (uft unsupportedFuncType) Error() string { 504 return fmt.Sprintf(`unsupported func type %T`, uft.fn) 505 } 506 507 // dispatchFunc is used by func run, making the latter more readable 508 func dispatchFunc(w *bufio.Writer, r io.Reader, fn any, args []string) error { 509 switch tool := fn.(type) { 510 case func(line []byte) []byte: 511 return loopLines(r, func(line []byte) error { 512 w.Write(tool(line)) 513 return endLine(w) 514 }) 515 516 case func(line string) string: 517 return loopLinesString(r, func(line string) error { 518 w.WriteString(tool(line)) 519 return endLine(w) 520 }) 521 522 case func(line []byte, arg []byte) []byte: 523 if len(args) != 1 { 524 return fmt.Errorf(`expected 1 arg, but was given %d`, len(args)) 525 } 526 x := []byte(args[0]) 527 return loopLines(r, func(line []byte) error { 528 w.Write(tool(line, x)) 529 return endLine(w) 530 }) 531 532 case func(line []byte, n int) []byte: 533 n, err := demandIntegerArg(args) 534 if err != nil { 535 return err 536 } 537 return loopLines(r, func(line []byte) error { 538 w.Write(tool(line, n)) 539 return endLine(w) 540 }) 541 542 case func(w *bufio.Writer) error: 543 return tool(w) 544 545 case func(w *bufio.Writer, args []string) error: 546 return tool(w, args) 547 548 case func(w *bufio.Writer, r io.Reader) error: 549 return tool(w, r) 550 551 case func(w *bufio.Writer, r io.Reader, args []string) error: 552 return tool(w, r, args) 553 554 case func(w *bufio.Writer, line []byte) error: 555 if len(args) != 0 { 556 return fmt.Errorf(`expected no args, but was given %d`, len(args)) 557 } 558 return loopLines(r, func(line []byte) error { 559 return tool(w, line) 560 }) 561 562 case func(w *bufio.Writer, line string) error: 563 if len(args) != 0 { 564 return fmt.Errorf(`expected no args, but was given %d`, len(args)) 565 } 566 return loopLinesString(r, func(line string) error { 567 return tool(w, line) 568 }) 569 570 case func(w *bufio.Writer, r io.Reader, n int) error: 571 n, err := demandIntegerArg(args) 572 if err != nil { 573 return err 574 } 575 return tool(w, r, n) 576 577 case fallbackIntTool: 578 return tool.run(w, r, args) 579 580 default: 581 return unsupportedFuncType{tool} 582 } 583 } 584 585 // demandIntegerArg helps various tools handle a single non-optional int arg 586 func demandIntegerArg(args []string) (int, error) { 587 if len(args) != 1 { 588 return 0, fmt.Errorf(`expected 1 argument, but was given %d`, len(args)) 589 } 590 591 n, err := strconv.Atoi(strings.ReplaceAll(args[0], `_`, ``)) 592 if err != nil { 593 const fs = `expected 1 integer argument, but was given %s` 594 return n, fmt.Errorf(fs, args[0]) 595 } 596 return n, nil 597 } 598 599 // optionalIntegerArg helps various tools handle a single optional int arg 600 // func optionalIntegerArg(args []string, fallback int) (int, error) { 601 // if len(args) == 0 { 602 // return fallback, nil 603 // } 604 605 // if len(args) > 1 { 606 // const fs = `expected at most 1 argument, but was given %d` 607 // return fallback, fmt.Errorf(fs, len(args)) 608 // } 609 610 // n, err := strconv.Atoi(strings.ReplaceAll(args[0], `_`, ``)) 611 // if err != nil { 612 // const fs = `expected at most 1 integer argument, but was given %s` 613 // return n, fmt.Errorf(fs, args[0]) 614 // } 615 // return n, nil 616 // } 617 618 // optionalIntegerArg helps various tools handle a single optional int arg 619 func optionalStringArg(args []string, fallback string) (string, error) { 620 if len(args) == 0 { 621 return fallback, nil 622 } 623 624 if len(args) > 1 { 625 const fs = `expected at most 1 argument, but was given %d` 626 return fallback, fmt.Errorf(fs, len(args)) 627 } 628 629 return fallback, nil 630 } 631 632 // compose runs a chain of commands asynchronously, but still keeping their 633 // implied I/O order 634 func compose(w *bufio.Writer, r io.Reader, rest []string) error { 635 cmds := splitSliceNonEmpty(rest, `--`) 636 return composeAsyncRec(w, r, cmds) 637 } 638 639 // composeAsyncRec handles the recursion for func composeAsync; the code to 640 // merge the error-channels looks slightly `simplifiable`, but trying to do 641 // so can lead to ugly concurrency bugs; things seem to work, so keep as is 642 func composeAsyncRec(w io.Writer, r io.Reader, cmds [][]string) error { 643 if len(cmds) == 0 { 644 return nil 645 } 646 647 // check, even if func splitSliceNonEmpty is supposed to prevent this 648 if len(cmds[0]) == 0 { 649 return errors.New(`internal error: unexpected empty command-slice`) 650 } 651 tool := cmds[0][0] 652 args := cmds[0][1:] 653 654 // handle the last subcommand/tool in the chain 655 if len(cmds) == 1 { 656 return run(w, r, tool, args) 657 } 658 659 // handle the steps along the way, gathering a single error result 660 errch := make(chan error) 661 defer close(errch) 662 663 go func() { 664 nextpipe, curpipe := io.Pipe() 665 666 curerrch := make(chan error) 667 defer close(curerrch) 668 nexterrch := make(chan error) 669 defer close(nexterrch) 670 671 // start the current task asynchronously 672 go func() { 673 // directly using io.Pipe can lead to an astonishing number of 674 // empty/tiny byte-slices being passed around channels, which 675 // slows things down considerably when dealing with many data 676 w := bufio.NewWriterSize(curpipe, bufferSize) 677 678 // ensure clean-up in case current tool panics 679 defer curpipe.Close() 680 defer w.Flush() 681 682 // make sequence of all steps explicit, to ensure things are 683 // happening in the correct order 684 err := run(w, r, tool, args) 685 w.Flush() 686 curpipe.Close() 687 curerrch <- err 688 }() 689 690 // start all later tasks asynchronously, by way of recursion 691 go func() { 692 // ensure clean-up in case later tools panic 693 defer nextpipe.Close() 694 695 // make sequence of all steps explicit 696 err := composeAsyncRec(w, nextpipe, cmds[1:]) 697 nextpipe.Close() 698 nexterrch <- err 699 }() 700 701 // wait for completion of all tasks, in any order: this is done 702 // by waiting for 2 tasks, since the latter of these handles all 703 // later tasks, by way of recursion 704 705 select { 706 case err := <-curerrch: 707 if adaptWriteError(err) != nil { 708 // wait for the later tasks to end, ignoring their error 709 <-nexterrch 710 711 // return error from the current task 712 errch <- err 713 return 714 } 715 716 // wait for later tasks to end, ignoring their error 717 errch <- <-nexterrch 718 return 719 720 case err := <-nexterrch: 721 // try to explicitly end the current task sooner; multiple 722 // closures of io.Pipe `values` are allowed: their Close 723 // funcs do nothing when called after the first time 724 curpipe.Close() 725 726 if adaptWriteError(err) != nil { 727 // wait for current task to end, ignoring its error 728 <-curerrch 729 730 // return error from later tasks 731 errch <- err 732 return 733 } 734 735 // wait for current task to end 736 errch <- <-curerrch 737 return 738 } 739 }() 740 741 // wait for a definitive error/result from the async tasks 742 return <-errch 743 } File: tu/running_test.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "errors" 7 "io" 8 "testing" 9 ) 10 11 func TestToolAliases(t *testing.T) { 12 for k, v := range toolNameAliases { 13 t.Run(k, func(t *testing.T) { 14 if !isValidTableString(k) { 15 t.Fatalf(`unexpected string %q in table`, k) 16 return 17 } 18 19 if !isValidTableString(v) { 20 t.Fatalf(`unexpected string %q in table`, v) 21 return 22 } 23 24 if _, ok := name2tool[v]; !ok { 25 t.Fatalf(`alias %q doesn't lead to a func`, k) 26 return 27 } 28 }) 29 } 30 31 for k, v := range toolNameAliases { 32 t.Run(k, func(t *testing.T) { 33 if !isValidTableString(k) { 34 t.Fatalf(`unexpected string %q in table`, k) 35 return 36 } 37 38 if _, ok := toolNameAliases[v]; !ok { 39 t.Fatalf(`no self-alias for %q`, v) 40 return 41 } 42 }) 43 } 44 } 45 46 func isValidTableString(s string) bool { 47 for _, r := range s { 48 if 'a' <= r && r <= 'z' { 49 continue 50 } 51 if '0' <= r && r <= '9' { 52 continue 53 } 54 55 return false 56 } 57 58 return true 59 } 60 61 func TestToolTable(t *testing.T) { 62 addMetaTools() 63 64 for name, tool := range name2tool { 65 // avoid some of the more `interactive` tools, and tools which 66 // visibly open other apps/files 67 switch name { 68 case `si`: 69 continue 70 } 71 72 t.Run(name, func(t *testing.T) { 73 w := bufio.NewWriter(io.Discard) 74 err := dispatchFunc(w, bytes.NewReader(nil), tool, nil) 75 if errors.As(err, &unsupportedFuncType{}) { 76 t.Fatal(err.Error()) 77 return 78 } 79 80 if _, ok := toolNameAliases[name]; !ok { 81 t.Fatalf(`no self-alias for %q`, name) 82 return 83 } 84 }) 85 } 86 } File: tu/sbs.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "errors" 7 "io" 8 "math" 9 "strings" 10 "unicode/utf8" 11 ) 12 13 const ( 14 // sbsTabStop is the space-count used for tab-expansion 15 sbsTabStop = 4 16 17 // sbsSeparator is the string put between adjacent columns 18 sbsSeparator = ` █ ` 19 20 // sbsMaxAutoWidth is the output max-width allowed when auto-picking a 21 // column-count; chosen to fit very old monitors 22 sbsMaxAutoWidth = 79 23 ) 24 25 // book lays out input lines side-by-side like in a book 26 func book(w *bufio.Writer, r io.Reader, height int) error { 27 if height < 2 { 28 return errors.New(`page-height can't be less than 2`) 29 } 30 31 c := 0 32 leftw := 0 33 rightw := 0 34 35 step := height - 1 36 var left, right [][]byte 37 38 err := loopLines(r, func(line []byte) error { 39 i := c 40 c = (c + 1) % (2 * step) 41 42 // expand all tabs, to avoid any later ambiguity about alignments 43 if bytes.IndexByte(line, '\t') >= 0 { 44 var exp bytes.Buffer 45 sbsExpand(line, sbsTabStop, &exp) 46 line = exp.Bytes() 47 } else { 48 line = bytes.Clone(line) 49 } 50 51 n := unstyledWidth(line) 52 53 if i < step { 54 if leftw < n { 55 leftw = n 56 } 57 left = append(left, line) 58 return nil 59 } 60 61 if rightw < n { 62 rightw = n 63 } 64 right = append(right, line) 65 return nil 66 }) 67 68 if err != nil { 69 return err 70 } 71 72 if len(right) == 0 { 73 for _, s := range left { 74 w.Write(s) 75 if err := endLine(w); err != nil { 76 return err 77 } 78 } 79 return nil 80 } 81 82 const sep = ` █ ` 83 dots := strings.Repeat(`·`, leftw+utf8.RuneCountInString(sep)+rightw) 84 85 n := len(left) 86 if n < len(right) { 87 n = len(right) 88 } 89 90 for i := 0; i < n; i++ { 91 if i%step == 0 && i > 0 { 92 w.WriteString(dots) 93 endLine(w) 94 } 95 96 x := safeIndex(left, i) 97 w.Write(x) 98 writeSpaces(w, leftw-unstyledWidth(x)) 99 w.WriteString(sep) 100 w.Write(safeIndex(right, i)) 101 102 err := endLine(w) 103 if err != nil { 104 return err 105 } 106 } 107 108 return nil 109 } 110 111 func safeIndex(items [][]byte, i int) []byte { 112 if i < len(items) { 113 return items[i] 114 } 115 return nil 116 } 117 118 func sbs(w *bufio.Writer, r io.Reader, ncols int) error { 119 var lines [][]byte 120 err := loopLines(r, func(line []byte) error { 121 // expand all tabs, to avoid any later ambiguity about alignments 122 if bytes.IndexByte(line, '\t') >= 0 { 123 var exp bytes.Buffer 124 sbsExpand(line, sbsTabStop, &exp) 125 lines = append(lines, exp.Bytes()) 126 return nil 127 } 128 129 lines = append(lines, bytes.Clone(line)) 130 return nil 131 }) 132 133 if err != nil { 134 return err 135 } 136 137 // choose a default number of columns, if not given a positive one 138 if ncols < 1 { 139 ncols = sbsChooseNumColumns(lines) 140 } 141 return sideBySide(w, lines, ncols) 142 } 143 144 // handleLines handles the use-case of showing/rearranging lines from a 145 // single input source (presumably standard input) into several columns 146 func sideBySide(w *bufio.Writer, lines [][]byte, ncols int) error { 147 if ncols < 1 { 148 return nil 149 } 150 151 if ncols == 1 { 152 for _, s := range lines { 153 w.Write(s) 154 err := w.WriteByte('\n') 155 if err != nil { 156 // assume error probably results from a closed stdout 157 // pipe, so quit the app right away without complaining 158 return err 159 } 160 } 161 return nil 162 } 163 164 // nothing to show, so don't even bother 165 if len(lines) == 0 { 166 return nil 167 } 168 169 cols, height := splitLines(lines, ncols) 170 widths := make([]int, 0, len(cols)) 171 for _, c := range cols { 172 // find the max width of all lines of the current column 173 maxw := 0 174 for _, v := range c { 175 w := width(v) 176 if w > maxw { 177 maxw = w 178 } 179 } 180 181 widths = append(widths, maxw) 182 } 183 184 // endSep is right-trimmed to avoid unneeded trailing spaces on output 185 // lines whose last column is an empty/missing input line 186 endSep := strings.TrimRight(sbsSeparator, ` `) 187 188 // show columns side by side 189 for r := 0; r < height; r++ { 190 for c := 0; c < len(cols); c++ { 191 badr := r >= len(cols[c]) 192 193 // clearly separate columns visually 194 if c > 0 { 195 if c == len(cols)-1 && (badr || len(cols[c][r]) == 0) { 196 // avoid unneeded trailing spaces 197 w.WriteString(endSep) 198 } else { 199 w.WriteString(sbsSeparator) 200 } 201 } 202 203 if badr { 204 // exceeding items for this (last) column 205 continue 206 } 207 208 // pad all columns, except the last 209 width := 0 210 if c < len(cols)-1 { 211 width = widths[c] 212 } 213 214 // emit maybe-padded column 215 writeItem(w, cols[c][r], width) 216 } 217 218 // end the line 219 err := w.WriteByte('\n') 220 if err != nil { 221 // probably a pipe was closed 222 return nil 223 } 224 } 225 226 return nil 227 } 228 229 // sbsExpand replaces all tabs with correctly-padded tabstops, turning all tabs 230 // each into 1 or more spaces, as appropriate 231 func sbsExpand(s []byte, tabstop int, sb *bytes.Buffer) { 232 sb.Reset() 233 numrunes := 0 234 235 for _, b := range s { 236 switch b { 237 case '\t': 238 numspaces := tabstop - numrunes%tabstop 239 for i := 0; i < numspaces; i++ { 240 sb.WriteRune(' ') 241 } 242 numrunes += numspaces 243 244 default: 245 sb.WriteByte(b) 246 numrunes++ 247 } 248 } 249 } 250 251 // width calculates visually-correct string widths 252 func width(s []byte) int { 253 return utf8.RuneCount(s) - ansiLength(s) 254 } 255 256 // ansiLength calculates how many bytes ANSI-codes take in the string given: 257 // func width uses this to calculate visually-correct string widths 258 func ansiLength(s []byte) int { 259 n := 0 260 var prev byte 261 ansi := false 262 263 for _, r := range s { 264 if ansi { 265 n++ 266 } 267 268 if ansi && r == 'm' { 269 ansi = false 270 continue 271 } 272 273 if prev == '\x1b' && r == '[' { 274 n += 2 // count the 2-item starter-sequence `\x1b[` 275 ansi = true 276 } 277 prev = r 278 } 279 280 return n 281 } 282 283 // splitLines turns an array of lines into sub-arrays of lines, so they can 284 // be shown side by side later on 285 func splitLines(lines [][]byte, ncols int) (cols [][][]byte, maxheight int) { 286 n := ncols 287 hfrac := float64(len(lines)) / float64(n) 288 h := int(math.Ceil(hfrac)) 289 290 cols = make([][][]byte, 0, n) 291 for len(lines) > h { 292 cols = append(cols, lines[:h]) 293 lines = lines[h:] 294 } 295 if len(lines) != 0 { 296 cols = append(cols, lines) 297 } 298 return cols, h 299 } 300 301 // padWrite emits the string given, following it with spaces to fill the 302 // width given if string is shorter than that 303 func padWrite(w *bufio.Writer, s []byte, n int) { 304 w.Write(s) 305 writeSpaces(w, n-width(s)) 306 } 307 308 // writeItem emits the string given, followed by any padding needed, as well 309 // as ANSI-style clearing, again if needed 310 func writeItem(w *bufio.Writer, s []byte, width int) { 311 padWrite(w, s, width) 312 if needsStyleReset(s) { 313 w.WriteString("\x1b[0m") 314 } 315 } 316 317 func needsStyleReset(s []byte) bool { 318 return true && 319 bytes.Contains(s, []byte{'\x1b', '['}) && 320 !bytes.HasSuffix(s, []byte{'\x1b', '[', '0', 'm'}) 321 } 322 323 // sbsChooseNumColumns implements heuristics to auto-pick the number of columns 324 // to show: this func is used when the app is using data from standard-input 325 func sbsChooseNumColumns(lines [][]byte) int { 326 if len(lines) == 0 { 327 return 1 328 } 329 330 // sepw is the separator width 331 sepw := utf8.RuneCountInString(sbsSeparator) 332 333 // see if lines can even fit a single column 334 if !sbsColumnsCanFit(1, lines, sepw) { 335 return 1 336 } 337 338 // starting from the max possible columns which may fit, keep trying 339 // with 1 fewer column, until the columns fit 340 for ncols := int(sbsMaxAutoWidth / sepw); ncols > 1; ncols-- { 341 if sbsColumnsCanFit(ncols, lines, sepw) { 342 // success: found the most columns which fit 343 return ncols 344 } 345 } 346 347 // avoid multiple columns if some lines are too wide 348 return 1 349 } 350 351 // sbsColumnsCanFit checks whether the number of columns given would fit the 352 // display max-width constant 353 func sbsColumnsCanFit(ncols int, lines [][]byte, gap int) bool { 354 if ncols < 1 { 355 // avoid surprises when called with non-sense column counts 356 return true 357 } 358 359 // stack-allocate the backing-array behind slice maxw 360 var buf [sbsMaxAutoWidth / 2]int 361 maxw := buf[:0] 362 363 // find the column max-height, to chunk lines into columns 364 h := int(math.Ceil(float64(len(lines)) / float64(ncols))) 365 366 // find column max-width by looping over chunks of lines 367 for len(lines) >= h { 368 w := findMaxWidth(lines[:h]) 369 maxw = append(maxw, w) 370 lines = lines[h:] 371 } 372 373 // don't forget the last column 374 if len(lines) > 0 { 375 w := findMaxWidth(lines) 376 maxw = append(maxw, w) 377 } 378 379 // remember to add the gaps/separators between columns, along with 380 // all the individual column max-widths 381 w := (ncols - 1) * gap 382 for _, n := range maxw { 383 w += n 384 } 385 386 // do the columns fit? 387 return w <= sbsMaxAutoWidth 388 } 389 390 // findMaxWidth finds the max width in the slice given, ignoring ANSI codes 391 func findMaxWidth(lines [][]byte) int { 392 maxw := 0 393 for _, s := range lines { 394 w := width(s) 395 if w > maxw { 396 maxw = w 397 } 398 } 399 return maxw 400 } File: tu/si.go 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "encoding/base64" 7 "errors" 8 "fmt" 9 "io" 10 "net" 11 "strings" 12 ) 13 14 const ( 15 // beforeAudio starts HTML webpage with just an audio player 16 beforeAudio = `<!DOCTYPE html> 17 <html> 18 <head> 19 <meta charset="UTF-8"> 20 <link rel="icon" href="data:,"> 21 <title>wave sound</title> 22 <style> 23 body { margin: 2rem auto; width: 90vw; } 24 audio { margin: auto; width: 100%; } 25 </style> 26 </head> 27 <body> 28 <audio controls autofocus src="` 29 30 // beforeAutoplayAudio starts HTML webpage with just an audio player 31 // in autoplay mode 32 beforeAutoplayAudio = `<!DOCTYPE html> 33 <html> 34 <head> 35 <meta charset="UTF-8"> 36 <link rel="icon" href="data:,"> 37 <title>wave sound</title> 38 <style> 39 body { margin: 2rem auto; width: 90vw; } 40 audio { margin: auto; width: 100%; } 41 </style> 42 </head> 43 <body> 44 <audio controls autofocus autoplay src="` 45 46 // afterAudio ends HTML webpage with just an audio player 47 afterAudio = "\"></audio>\n</body>\n</html>\n" 48 49 // beforeBitmap starts HTML webpage with just an image 50 beforeBitmap = `<!DOCTYPE html> 51 <html> 52 <head> 53 <meta charset="UTF-8"> 54 <link rel="icon" href="data:,"> 55 <title>bitmap image</title> 56 <style> 57 body { margin: 0.5rem auto; width: 90vw; } 58 img { margin: auto; width: 100%; } 59 </style> 60 </head> 61 <body> 62 <img src="` 63 64 // afterBitmap ends HTML webpage with just an image 65 afterBitmap = "\"></img>\n</body>\n</html>\n" 66 ) 67 68 // si implements the `Show It` tool 69 func si(w *bufio.Writer, r io.Reader, names []string) error { 70 if len(names) > 0 { 71 return openTool(w, r, names) 72 } 73 74 for _, s := range names { 75 // handle data-URIs 76 if strings.HasPrefix(s, `data:`) && strings.Contains(s, `;base64,`) { 77 if err := popup(s); err != nil { 78 return err 79 } 80 81 if err := siHandleInput(strings.NewReader(s)); err != nil { 82 return err 83 } 84 85 continue 86 } 87 88 if err := popupName(s); err != nil { 89 return err 90 } 91 } 92 93 if len(names) == 0 { 94 return siHandleInput(r) 95 } 96 return nil 97 } 98 99 // siHandleInput specifically handles stdin and data-URIs 100 func siHandleInput(r io.Reader) error { 101 // before starting the single-request server, try to detect the MIME type 102 // by inspecting the first bytes of the stream and matching known filetype 103 // starting patterns 104 var buf [64]byte 105 n, err := r.Read(buf[:]) 106 if err != nil && err != io.EOF { 107 return err 108 } 109 start := buf[:n] 110 111 // handle data-URI-like inputs 112 if bytes.HasPrefix(start, []byte(`data:`)) { 113 if bytes.Contains(start, []byte(`;base64,`)) { 114 return siHandleDataURI(start, r) 115 } 116 } 117 118 // handle regular data, trying to auto-detect its MIME type using 119 // its first few bytes 120 mime, ok := detectMIME(start) 121 if !ok || len(mime) == 0 { 122 mime = `text/plain` 123 } 124 125 // remember to precede the partly-used reader with the starting bytes; 126 // give a negative/invalid filesize hint, since stream is single-use 127 const autoplay = true 128 return serveOnce(start, r, serveConfig{ 129 ContentType: mime, 130 ContentLength: -1, 131 Autoplay: autoplay, 132 }) 133 } 134 135 // siHandleDataURI handles data-URIs for func handleInput 136 func siHandleDataURI(start []byte, r io.Reader) error { 137 const autoplay = true 138 if !bytes.HasPrefix(start, []byte(`data:`)) { 139 return errors.New(`invalid data-URI`) 140 } 141 142 i := bytes.Index(start, []byte(`;base64,`)) 143 if i < 0 { 144 return errors.New(`invalid data-URI`) 145 } 146 147 // force browser to play wave and aiff sounds, instead of 148 // showing a useless download-file option 149 switch mime := string(start[len(`data:`):i]); mime { 150 case `audio/wav`, `audio/wave`, `audio/x-wav`, `audio/aiff`, `audio/x-aiff`: 151 before := beforeAudio 152 if autoplay { 153 before = beforeAutoplayAudio 154 } 155 156 // surround URI-encoded audio data with a web page only having 157 // a media player in it: this is necessary for wave and aiff 158 // sounds, since web browsers may insist on a useless download 159 // option for those media types 160 r = io.MultiReader( 161 strings.NewReader(before), 162 bytes.NewReader(start), 163 r, 164 strings.NewReader(afterAudio), 165 ) 166 167 return serveOnce(nil, r, serveConfig{ 168 ContentType: `text/html; charset=UTF-8`, 169 ContentLength: -1, 170 Autoplay: autoplay, 171 }) 172 173 case `image/bmp`, `audio/x-bmp`: 174 // surround URI-encoded bitmap data with a web page only having 175 // an image element in it: this is necessary for bitmap pictures, 176 // since web browsers may insist on a useless download option for 177 // that media type 178 r = io.MultiReader( 179 strings.NewReader(beforeBitmap), 180 bytes.NewReader(start), 181 r, 182 strings.NewReader(afterBitmap), 183 ) 184 185 return serveOnce(nil, r, serveConfig{ 186 ContentType: `text/html; charset=UTF-8`, 187 ContentLength: -1, 188 Autoplay: autoplay, 189 }) 190 191 default: 192 start = start[i+len(`;base64,`):] 193 r = io.MultiReader(bytes.NewReader(start), r) 194 dec := base64.NewDecoder(base64.URLEncoding, r) 195 196 // give a negative/invalid filesize hint, since stream is single-use 197 return serveOnce(nil, dec, serveConfig{ 198 ContentType: mime, 199 ContentLength: -1, 200 Autoplay: autoplay, 201 }) 202 } 203 } 204 205 // serveConfig has all details func serveOnce needs 206 type serveConfig struct { 207 // ContentType is the MIME type of what's being served 208 ContentType string 209 210 // ContentLength is the byte-count of what's being served; negative 211 // values are ignored 212 ContentLength int 213 214 // Autoplay autoplays audio/video data from stdin 215 Autoplay bool 216 } 217 218 // serveOnce literally serves a single web request and no more 219 func serveOnce(start []byte, rest io.Reader, cfg serveConfig) error { 220 // pick a random port from the currently-available ones 221 srv, err := net.Listen(`tcp`, `127.0.0.1:0`) 222 if err != nil { 223 return err 224 } 225 defer srv.Close() 226 227 // open a new browser tab for that localhost port 228 err = popup(fmt.Sprintf(`http://%s`, srv.Addr().String())) 229 if err != nil { 230 return err 231 } 232 233 // accept first connection: no need for async as the server quits after 234 // its first response 235 conn, err := srv.Accept() 236 if err != nil { 237 return err 238 } 239 defer conn.Close() 240 241 respond(conn, start, rest, cfg) 242 return nil 243 } 244 245 // respond reads/ignores all request headers, and then replies with some 246 // content given, quitting immediately after 247 func respond(conn net.Conn, start []byte, rest io.Reader, cfg serveConfig) { 248 // maxbufsize is the max capacity the HTTP-protocol line-scanners are 249 // allowed to reach 250 const maxbufsize = 128 * 1024 251 252 sc := bufio.NewScanner(conn) 253 sc.Buffer(nil, maxbufsize) 254 for sc.Scan() && sc.Text() != `` { 255 // ignore all request headers 256 } 257 258 switch cfg.ContentType { 259 case `audio/wav`, `audio/wave`, `audio/x-wav`, `audio/aiff`, `audio/x-aiff`: 260 // force browser to play wave and aiff sounds, instead of showing 261 // a useless download-file option; encode audio bytes as data-URI 262 // in an intermediate buffer 263 264 writePreludeHTTP(conn, `text/html; charset=UTF-8`, -1) 265 // emit opening HTML right until <audio controls src=" 266 if cfg.Autoplay { 267 fmt.Fprint(conn, beforeAutoplayAudio) 268 } else { 269 fmt.Fprint(conn, beforeAudio) 270 } 271 // emit the data-URI 272 writeBase64(conn, cfg.ContentType, start, rest) 273 // emit closing HTML after data-URI audio 274 fmt.Fprint(conn, afterAudio) 275 return 276 277 case `image/bmp`, `image/x-bmp`: 278 // force browser to show bitmap pictures, instead of showing a 279 // useless download-file option; encode picture bytes as data-URI 280 // in an intermediate buffer 281 282 writePreludeHTTP(conn, `text/html; charset=UTF-8`, -1) 283 // emit opening HTML right until <img src=" 284 fmt.Fprint(conn, beforeBitmap) 285 // emit the data-URI 286 writeBase64(conn, cfg.ContentType, start, rest) 287 // emit closing HTML after data-URI image 288 fmt.Fprint(conn, afterBitmap) 289 return 290 291 default: 292 writePreludeHTTP(conn, cfg.ContentType, cfg.ContentLength) 293 // send the starting bytes used to auto-detect the content-type 294 conn.Write(start) 295 // send rest of payload at light-speed 296 io.Copy(conn, rest) 297 } 298 } 299 300 func writePreludeHTTP(conn net.Conn, contentType string, contentLength int) { 301 // respond right after the first empty line, which always follows the 302 // request's headers 303 fmt.Fprint(conn, "HTTP/1.1 200 OK\r\n") 304 fmt.Fprintf(conn, "Content-Type: %s\r\n", contentType) 305 if contentLength > 0 { 306 fmt.Fprintf(conn, "Content-Length: %d\r\n", contentLength) 307 } 308 309 // prevent download-dialog or auto-download from the browser's part 310 fmt.Fprintf(conn, "Content-Disposition: inline\r\n") 311 // tell browser this is the last request 312 fmt.Fprint(conn, "Connection: close\r\n") 313 // payload starts right after an empty line 314 fmt.Fprint(conn, "\r\n") 315 } 316 317 func writeBase64(conn net.Conn, mimeType string, start []byte, rest io.Reader) { 318 // send the data-URI intro 319 fmt.Fprintf(conn, `data:%s;base64,`, mimeType) 320 enc := base64.NewEncoder(base64.StdEncoding, conn) 321 // base64-encode the starting bytes used to auto-detect the input type 322 enc.Write(start) 323 // base64-encode the rest of the input 324 io.Copy(enc, rest) 325 enc.Close() 326 } File: tu/strings.go 1 package main 2 3 import ( 4 "errors" 5 "math" 6 "os" 7 "strconv" 8 "strings" 9 ) 10 11 func countDecimals(s string) int { 12 if i := strings.LastIndexByte(s, '.'); i >= 0 { 13 return len(s) - 1 - i 14 } 15 return 0 16 } 17 18 // parseInt makes using func parseInt64 more convenient 19 func parseInt(s string) (int, error) { 20 n, err := parseInt64(s) 21 return int(n), err 22 } 23 24 // parseInt64 is basically a more flexible strconv.Atoi, since it supports 25 // the handy floating-point notation for large numbers, and it ignores 26 // underscores as well; when exposed to user-given args/input, it makes 27 // using this app a much more pleasant experience overall 28 func parseInt64(s string) (int64, error) { 29 f, err := strconv.ParseFloat(s, 64) 30 if err == nil && !math.IsNaN(f) && !math.IsInf(f, 0) { 31 return int64(f), nil 32 } 33 return strconv.ParseInt(s, 10, 64) 34 } 35 36 // parsePortNumber handles the details specific to parsing valid port numbers; 37 // this func refuses to accept 0 as a valid port number 38 func parsePortNumber(s string) (int, error) { 39 port, err := parseInt(s) 40 if err != nil { 41 return port, err 42 } 43 44 if port < 1 || port > 65_535 { 45 msg := strconv.Itoa(port) + ` is an invalid port number` 46 return port, errors.New(msg) 47 } 48 return port, nil 49 } 50 51 // splitSliceNonEmpty does what it says, ensuring no subslice in the result 52 // is empty; empty slices return empty results 53 func splitSliceNonEmpty(items []string, sep string) [][]string { 54 cur := items 55 var res [][]string 56 57 for len(cur) > 0 { 58 // skip all leading separators, also ensuring no empty subslices 59 // sneak thru the splitting happending below 60 for len(cur) > 0 && cur[0] == sep { 61 cur = cur[1:] 62 } 63 64 i := findNext(cur, sep) 65 // no more subslices, or the very last subslice follows 66 if i < 0 { 67 // don't forget trailing subslices, after the last separator 68 if len(cur) > 0 { 69 res = append(res, cur) 70 } 71 return res 72 } 73 74 // ignore empty subslices 75 if i == 0 { 76 continue 77 } 78 79 res = append(res, cur[:i]) 80 cur = cur[i+1:] 81 } 82 83 return res 84 } 85 86 func startsWith(s string, what []byte) bool { 87 if len(s) < len(what) { 88 return false 89 } 90 91 for i := 0; i < len(s); i++ { 92 if s[i] != what[i] { 93 return false 94 } 95 } 96 return true 97 } 98 99 // findNext finds a string in a string-slice, returning an invalid negative 100 // index on failure 101 func findNext(src []string, what string) int { 102 for i, s := range src { 103 if s == what { 104 return i 105 } 106 } 107 return -1 108 } 109 110 // unixSlashes ensures paths written to the output use unix-style slashes 111 // on all systems 112 func unixSlashes(path string) string { 113 if os.PathSeparator == '\\' { 114 return strings.ReplaceAll(path, `\`, `/`) 115 } 116 return path 117 } 118 119 // stringRing is a circular/ring-buffer for strings, where adding new items 120 // eventually starts to overwrite earlier-added ones 121 type stringRing struct { 122 items []string 123 next int 124 } 125 126 // newStringRing is the constructor for type stringRing; the capacity given 127 // to it can't be less than 1 128 func newStringRing(cap int) stringRing { 129 if cap < 1 { 130 cap = 1 131 } 132 return stringRing{make([]string, 0, cap), 0} 133 } 134 135 // Len is the current item-count in the ring, which can never exceed the 136 // max-capacity it was setup with 137 func (sr stringRing) Len() int { 138 return len(sr.items) 139 } 140 141 // Earliest gives you the earliest-added item still hanging in the ring 142 func (sr stringRing) Earliest() string { 143 if len(sr.items) > 0 { 144 return sr.items[sr.next%len(sr.items)] 145 } 146 return `` 147 } 148 149 // Bring used to be called `Put`, but the new name rhymes 150 func (sr *stringRing) Bring(s string) { 151 if len(sr.items) < cap(sr.items) { 152 sr.items = append(sr.items, s) 153 sr.next++ 154 return 155 } 156 157 i := sr.next % cap(sr.items) 158 sr.items[i] = s 159 sr.next = i + 1 160 } 161 162 // Halves is a simple way to handle items in order, by treating the slice 163 // as 2 separate sub-slices, each of which is ordered 164 func (sr *stringRing) Halves() (first, second []string) { 165 if sr.next > 0 { 166 return sr.items[sr.next:], sr.items[:sr.next] 167 } 168 return sr.items, nil 169 } File: tu/symbols.go 1 package main 2 3 import ( 4 "bufio" 5 "errors" 6 "io" 7 "sort" 8 "strings" 9 ) 10 11 const ( 12 faces = `` + 13 `😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏😐😑😒😓` + 14 `😔😕😖😗😘😙😚😛😜😝😞😟😠😡😢😣😤😥😦😧` + 15 `😨😩😪😫😬😭😮😯😰😱😲😳😴😵😶😷🙁🙂🙃🙄` + 16 `🧐👶🤓🤐🤑🤒🤔🤕🤗🤠🤡🤢🤣🤤🤥🤧🤨🤩🤪🤫` + 17 `🤬🤭🤮🤯` 18 ) 19 20 // `crlf`: "\r\n", 21 // `lf`: "\n", 22 // `linefeed`: "\n", 23 // `tab`: "\t", 24 25 // names2symbols translates common names for common symbols 26 var names2symbols = map[string]string{ 27 `adash`: `-`, 28 `amp`: `&`, 29 `ampersand`: `&`, 30 `apostrophe`: `’`, 31 `ast`: `*`, 32 `asterisk`: `*`, 33 `backquote`: "`", 34 `backslash`: `\`, 35 `ball`: `●`, 36 `bang`: `!`, 37 `block`: `█`, 38 `bquote`: "`", 39 `bslash`: `\`, 40 `bullet`: `•`, 41 `caret`: `^`, 42 `colon`: `:`, 43 `comma`: `,`, 44 `commaspace`: `, `, 45 `cquote`: `”`, 46 `cdot`: `·`, 47 `circle`: `●`, 48 `cloud`: `☁️`, 49 `copyright`: `©`, 50 `cross`: `×`, 51 `dash`: `–`, 52 `dollar`: `$`, 53 `dot`: `.`, 54 `dquote`: `"`, 55 `ellip`: `…`, 56 `ellipsis`: `…`, 57 `eq`: `=`, 58 `equal`: `=`, 59 `equals`: `=`, 60 `euro`: `€`, 61 `excl`: `!`, 62 `happy`: `😀`, 63 `hash`: `#`, 64 `heart`: `❤️`, 65 `hellip`: `…`, 66 `hole`: `○`, 67 `lightning`: `🌩️`, 68 `mdot`: `·`, 69 `minus`: `-`, 70 `moon`: `🌕`, 71 `oquote`: `“`, 72 `space`: ` `, 73 `percent`: `%`, 74 `pipe`: `|`, 75 `plus`: `+`, 76 `rain`: `🌧️`, 77 `semi`: `;`, 78 `semicolon`: `;`, 79 `sharp`: `#`, 80 `shit`: `💩`, 81 `slash`: `/`, 82 `slasher`: `⧸`, 83 `smile`: `🙂`, 84 `snow`: `❄️`, 85 `square`: `■`, 86 `squote`: `'`, 87 `star`: `⭐`, 88 `sun`: `☀️`, 89 `tilde`: `~`, 90 `underscore`: `_`, 91 `vbar`: `|`, 92 93 `digits`: `0123456789`, 94 `faces`: faces, 95 `greek`: `ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρστυφχψω`, 96 `infinity`: `∞`, 97 `latin`: `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz`, 98 `letters`: `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz`, 99 `lower`: `abcdefghijklmnopqrstuvwxyz`, 100 `loweralpha`: `α`, 101 `lowerbeta`: `β`, 102 `lowerdelta`: `δ`, 103 `lowerepsilon`: `ε`, 104 `lowergamma`: `γ`, 105 `lowerhex`: `0123456789abcdef`, 106 `lowerlambda`: `λ`, 107 `lowerlatin`: `abcdefghijklmnopqrstuvwxyz`, 108 `loweromega`: `ω`, 109 `lowerpi`: `π`, 110 `lowersigma`: `σ`, 111 `lowertau`: `τ`, 112 `lowertheta`: `θ`, 113 `lowergreek`: `αβγδεζηθικλμνξοπρστυφχψω`, 114 `math`: `+-×÷²³±`, 115 `other`: `✓✗✔❌`, 116 `plusminus`: `±`, 117 `product`: `Π`, 118 `punctuation`: `!"#$%&'()*+,-./:;<=>?@[\]^_` + "`" + `{|}~`, 119 `sum`: `Σ`, 120 `upper`: `ABCDEFGHIJKLMNOPQRSTUVWXYZ`, 121 `uppergreek`: `ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ`, 122 `upperhex`: `0123456789ABCDEF`, 123 `upperlatin`: `ABCDEFGHIJKLMNOPQRSTUVWXYZ`, 124 125 `eu`: `🇪🇺`, 126 `eur`: `🇪🇺`, 127 128 `af`: `🇦🇫`, 129 `ax`: `🇦🇽`, 130 `al`: `🇦🇱`, 131 `dz`: `🇩🇿`, 132 `as`: `🇦🇸`, 133 `ad`: `🇦🇩`, 134 `ao`: `🇦🇴`, 135 `ai`: `🇦🇮`, 136 `aq`: `🇦🇶`, 137 `ag`: `🇦🇬`, 138 `ar`: `🇦🇷`, 139 `am`: `🇦🇲`, 140 `aw`: `🇦🇼`, 141 `au`: `🇦🇺`, 142 `at`: `🇦🇹`, 143 `az`: `🇦🇿`, 144 `bs`: `🇧🇸`, 145 `bh`: `🇧🇭`, 146 `bd`: `🇧🇩`, 147 `bb`: `🇧🇧`, 148 `by`: `🇧🇾`, 149 `be`: `🇧🇪`, 150 `bz`: `🇧🇿`, 151 `bj`: `🇧🇯`, 152 `bm`: `🇧🇲`, 153 `bt`: `🇧🇹`, 154 `bo`: `🇧🇴`, 155 `bq`: `🇧🇶`, 156 `ba`: `🇧🇦`, 157 `bw`: `🇧🇼`, 158 `bv`: `🇧🇻`, 159 `br`: `🇧🇷`, 160 `io`: `🇮🇴`, 161 `bn`: `🇧🇳`, 162 `bg`: `🇧🇬`, 163 `bf`: `🇧🇫`, 164 `bi`: `🇧🇮`, 165 `cv`: `🇨🇻`, 166 `kh`: `🇰🇭`, 167 `cm`: `🇨🇲`, 168 `ca`: `🇨🇦`, 169 `ky`: `🇰🇾`, 170 `cf`: `🇨🇫`, 171 `td`: `🇹🇩`, 172 `cl`: `🇨🇱`, 173 `cn`: `🇨🇳`, 174 `cx`: `🇨🇽`, 175 `cc`: `🇨🇨`, 176 `co`: `🇨🇴`, 177 `km`: `🇰🇲`, 178 `cd`: `🇨🇩`, 179 `cg`: `🇨🇬`, 180 `ck`: `🇨🇰`, 181 `cr`: `🇨🇷`, 182 `ci`: `🇨🇮`, 183 `hr`: `🇭🇷`, 184 `cu`: `🇨🇺`, 185 `cw`: `🇨🇼`, 186 `cy`: `🇨🇾`, 187 `cz`: `🇨🇿`, 188 `dk`: `🇩🇰`, 189 `dj`: `🇩🇯`, 190 `dm`: `🇩🇲`, 191 `do`: `🇩🇴`, 192 `ec`: `🇪🇨`, 193 `eg`: `🇪🇬`, 194 `sv`: `🇸🇻`, 195 `gq`: `🇬🇶`, 196 `er`: `🇪🇷`, 197 `ee`: `🇪🇪`, 198 `sz`: `🇸🇿`, 199 `et`: `🇪🇹`, 200 `fk`: `🇫🇰`, 201 `fo`: `🇫🇴`, 202 `fj`: `🇫🇯`, 203 `fi`: `🇫🇮`, 204 `fr`: `🇫🇷`, 205 `gf`: `🇬🇫`, 206 `pf`: `🇵🇫`, 207 `tf`: `🇹🇫`, 208 `ga`: `🇬🇦`, 209 `gm`: `🇬🇲`, 210 `ge`: `🇬🇪`, 211 `de`: `🇩🇪`, 212 `gh`: `🇬🇭`, 213 `gi`: `🇬🇮`, 214 `gr`: `🇬🇷`, 215 `gl`: `🇬🇱`, 216 `gd`: `🇬🇩`, 217 `gp`: `🇬🇵`, 218 `gu`: `🇬🇺`, 219 `gt`: `🇬🇹`, 220 `gg`: `🇬🇬`, 221 `gn`: `🇬🇳`, 222 `gw`: `🇬🇼`, 223 `gy`: `🇬🇾`, 224 `ht`: `🇭🇹`, 225 `hm`: `🇭🇲`, 226 `va`: `🇻🇦`, 227 `hn`: `🇭🇳`, 228 `hk`: `🇭🇰`, 229 `hu`: `🇭🇺`, 230 `is`: `🇮🇸`, 231 `in`: `🇮🇳`, 232 `id`: `🇮🇩`, 233 `ir`: `🇮🇷`, 234 `iq`: `🇮🇶`, 235 `ie`: `🇮🇪`, 236 `im`: `🇮🇲`, 237 `il`: `🇮🇱`, 238 `it`: `🇮🇹`, 239 `jm`: `🇯🇲`, 240 `jp`: `🇯🇵`, 241 `je`: `🇯🇪`, 242 `jo`: `🇯🇴`, 243 `kz`: `🇰🇿`, 244 `ke`: `🇰🇪`, 245 `ki`: `🇰🇮`, 246 `kp`: `🇰🇵`, 247 `kr`: `🇰🇷`, 248 `kw`: `🇰🇼`, 249 `kg`: `🇰🇬`, 250 `la`: `🇱🇦`, 251 `lv`: `🇱🇻`, 252 `lb`: `🇱🇧`, 253 `ls`: `🇱🇸`, 254 `lr`: `🇱🇷`, 255 `ly`: `🇱🇾`, 256 `li`: `🇱🇮`, 257 `lt`: `🇱🇹`, 258 `lu`: `🇱🇺`, 259 `mo`: `🇲🇴`, 260 `mk`: `🇲🇰`, 261 `mg`: `🇲🇬`, 262 `mw`: `🇲🇼`, 263 `my`: `🇲🇾`, 264 `mv`: `🇲🇻`, 265 `ml`: `🇲🇱`, 266 `mt`: `🇲🇹`, 267 `mh`: `🇲🇭`, 268 `mq`: `🇲🇶`, 269 `mr`: `🇲🇷`, 270 `mu`: `🇲🇺`, 271 `yt`: `🇾🇹`, 272 `mx`: `🇲🇽`, 273 `fm`: `🇫🇲`, 274 `md`: `🇲🇩`, 275 `mc`: `🇲🇨`, 276 `mn`: `🇲🇳`, 277 `me`: `🇲🇪`, 278 `ms`: `🇲🇸`, 279 `ma`: `🇲🇦`, 280 `mz`: `🇲🇿`, 281 `mm`: `🇲🇲`, 282 `na`: `🇳🇦`, 283 `nr`: `🇳🇷`, 284 `np`: `🇳🇵`, 285 `nl`: `🇳🇱`, 286 `nc`: `🇳🇨`, 287 `nz`: `🇳🇿`, 288 `ni`: `🇳🇮`, 289 `ne`: `🇳🇪`, 290 `ng`: `🇳🇬`, 291 `nu`: `🇳🇺`, 292 `nf`: `🇳🇫`, 293 `mp`: `🇲🇵`, 294 `no`: `🇳🇴`, 295 `om`: `🇴🇲`, 296 `pk`: `🇵🇰`, 297 `pw`: `🇵🇼`, 298 `ps`: `🇵🇸`, 299 `pa`: `🇵🇦`, 300 `pg`: `🇵🇬`, 301 `py`: `🇵🇾`, 302 `pe`: `🇵🇪`, 303 `ph`: `🇵🇭`, 304 `pn`: `🇵🇳`, 305 `pl`: `🇵🇱`, 306 `pt`: `🇵🇹`, 307 `pr`: `🇵🇷`, 308 `qa`: `🇶🇦`, 309 `re`: `🇷🇪`, 310 `ro`: `🇷🇴`, 311 `ru`: `🇷🇺`, 312 `rw`: `🇷🇼`, 313 `bl`: `🇧🇱`, 314 `sh`: `🇸🇭`, 315 `kn`: `🇰🇳`, 316 `lc`: `🇱🇨`, 317 `mf`: `🇲🇫`, 318 `pm`: `🇵🇲`, 319 `vc`: `🇻🇨`, 320 `ws`: `🇼🇸`, 321 `sm`: `🇸🇲`, 322 `st`: `🇸🇹`, 323 `sa`: `🇸🇦`, 324 `sn`: `🇸🇳`, 325 `rs`: `🇷🇸`, 326 `sc`: `🇸🇨`, 327 `sl`: `🇸🇱`, 328 `sg`: `🇸🇬`, 329 `sx`: `🇸🇽`, 330 `sk`: `🇸🇰`, 331 `si`: `🇸🇮`, 332 `sb`: `🇸🇧`, 333 `so`: `🇸🇴`, 334 `za`: `🇿🇦`, 335 `gs`: `🇬🇸`, 336 `ss`: `🇸🇸`, 337 `es`: `🇪🇸`, 338 `lk`: `🇱🇰`, 339 `sd`: `🇸🇩`, 340 `sr`: `🇸🇷`, 341 `sj`: `🇸🇯`, 342 `se`: `🇸🇪`, 343 `ch`: `🇨🇭`, 344 `sy`: `🇸🇾`, 345 `tw`: `🇹🇼`, 346 `tj`: `🇹🇯`, 347 `tz`: `🇹🇿`, 348 `th`: `🇹🇭`, 349 `tl`: `🇹🇱`, 350 `tg`: `🇹🇬`, 351 `tk`: `🇹🇰`, 352 `to`: `🇹🇴`, 353 `tt`: `🇹🇹`, 354 `tn`: `🇹🇳`, 355 `tr`: `🇹🇷`, 356 `tm`: `🇹🇲`, 357 `tc`: `🇹🇨`, 358 `tv`: `🇹🇻`, 359 `ug`: `🇺🇬`, 360 `ua`: `🇺🇦`, 361 `ae`: `🇦🇪`, 362 `gb`: `🇬🇧`, 363 `um`: `🇺🇲`, 364 `us`: `🇺🇸`, 365 `uy`: `🇺🇾`, 366 `uz`: `🇺🇿`, 367 `vu`: `🇻🇺`, 368 `ve`: `🇻🇪`, 369 `vn`: `🇻🇳`, 370 `vg`: `🇻🇬`, 371 `vi`: `🇻🇮`, 372 `wf`: `🇼🇫`, 373 `eh`: `🇪🇭`, 374 `ye`: `🇾🇪`, 375 `zm`: `🇿🇲`, 376 `zw`: `🇿🇼`, 377 378 `afg`: `🇦🇫`, 379 `ala`: `🇦🇽`, 380 `alb`: `🇦🇱`, 381 `dza`: `🇩🇿`, 382 `asm`: `🇦🇸`, 383 `and`: `🇦🇩`, 384 `ago`: `🇦🇴`, 385 `aia`: `🇦🇮`, 386 `ata`: `🇦🇶`, 387 `atg`: `🇦🇬`, 388 `arg`: `🇦🇷`, 389 `arm`: `🇦🇲`, 390 `abw`: `🇦🇼`, 391 `aus`: `🇦🇺`, 392 `aut`: `🇦🇹`, 393 `aze`: `🇦🇿`, 394 `bhs`: `🇧🇸`, 395 `bhr`: `🇧🇭`, 396 `bgd`: `🇧🇩`, 397 `brb`: `🇧🇧`, 398 `blr`: `🇧🇾`, 399 `bel`: `🇧🇪`, 400 `blz`: `🇧🇿`, 401 `ben`: `🇧🇯`, 402 `bmu`: `🇧🇲`, 403 `btn`: `🇧🇹`, 404 `bol`: `🇧🇴`, 405 `bes`: `🇧🇶`, 406 `bih`: `🇧🇦`, 407 `bwa`: `🇧🇼`, 408 `bvt`: `🇧🇻`, 409 `bra`: `🇧🇷`, 410 `iot`: `🇮🇴`, 411 `brn`: `🇧🇳`, 412 `bgr`: `🇧🇬`, 413 `bfa`: `🇧🇫`, 414 `bdi`: `🇧🇮`, 415 `cpv`: `🇨🇻`, 416 `khm`: `🇰🇭`, 417 `cmr`: `🇨🇲`, 418 `can`: `🇨🇦`, 419 `cym`: `🇰🇾`, 420 `caf`: `🇨🇫`, 421 `tcd`: `🇹🇩`, 422 `chl`: `🇨🇱`, 423 `chn`: `🇨🇳`, 424 `cxr`: `🇨🇽`, 425 `cck`: `🇨🇨`, 426 `col`: `🇨🇴`, 427 `com`: `🇰🇲`, 428 `cod`: `🇨🇩`, 429 `cog`: `🇨🇬`, 430 `cok`: `🇨🇰`, 431 `cri`: `🇨🇷`, 432 `civ`: `🇨🇮`, 433 `hrv`: `🇭🇷`, 434 `cub`: `🇨🇺`, 435 `cuw`: `🇨🇼`, 436 `cyp`: `🇨🇾`, 437 `cze`: `🇨🇿`, 438 `dnk`: `🇩🇰`, 439 `dji`: `🇩🇯`, 440 `dma`: `🇩🇲`, 441 `dom`: `🇩🇴`, 442 `ecu`: `🇪🇨`, 443 `egy`: `🇪🇬`, 444 `slv`: `🇸🇻`, 445 `gnq`: `🇬🇶`, 446 `eri`: `🇪🇷`, 447 `est`: `🇪🇪`, 448 `swz`: `🇸🇿`, 449 `eth`: `🇪🇹`, 450 `flk`: `🇫🇰`, 451 `fro`: `🇫🇴`, 452 `fji`: `🇫🇯`, 453 `fin`: `🇫🇮`, 454 `fra`: `🇫🇷`, 455 `guf`: `🇬🇫`, 456 `pyf`: `🇵🇫`, 457 `atf`: `🇹🇫`, 458 `gab`: `🇬🇦`, 459 `gmb`: `🇬🇲`, 460 `geo`: `🇬🇪`, 461 `deu`: `🇩🇪`, 462 `gha`: `🇬🇭`, 463 `gib`: `🇬🇮`, 464 `grc`: `🇬🇷`, 465 `grl`: `🇬🇱`, 466 `grd`: `🇬🇩`, 467 `glp`: `🇬🇵`, 468 `gum`: `🇬🇺`, 469 `gtm`: `🇬🇹`, 470 `ggy`: `🇬🇬`, 471 `gin`: `🇬🇳`, 472 `gnb`: `🇬🇼`, 473 `guy`: `🇬🇾`, 474 `hti`: `🇭🇹`, 475 `hmd`: `🇭🇲`, 476 `vat`: `🇻🇦`, 477 `hnd`: `🇭🇳`, 478 `hkg`: `🇭🇰`, 479 `hun`: `🇭🇺`, 480 `isl`: `🇮🇸`, 481 `ind`: `🇮🇳`, 482 `idn`: `🇮🇩`, 483 `irn`: `🇮🇷`, 484 `irq`: `🇮🇶`, 485 `irl`: `🇮🇪`, 486 `imn`: `🇮🇲`, 487 `isr`: `🇮🇱`, 488 `ita`: `🇮🇹`, 489 `jam`: `🇯🇲`, 490 `jpn`: `🇯🇵`, 491 `jey`: `🇯🇪`, 492 `jor`: `🇯🇴`, 493 `kaz`: `🇰🇿`, 494 `ken`: `🇰🇪`, 495 `kir`: `🇰🇮`, 496 `prk`: `🇰🇵`, 497 `kor`: `🇰🇷`, 498 `kwt`: `🇰🇼`, 499 `kgz`: `🇰🇬`, 500 `lao`: `🇱🇦`, 501 `lva`: `🇱🇻`, 502 `lbn`: `🇱🇧`, 503 `lso`: `🇱🇸`, 504 `lbr`: `🇱🇷`, 505 `lby`: `🇱🇾`, 506 `lie`: `🇱🇮`, 507 `ltu`: `🇱🇹`, 508 `lux`: `🇱🇺`, 509 `mac`: `🇲🇴`, 510 `mkd`: `🇲🇰`, 511 `mdg`: `🇲🇬`, 512 `mwi`: `🇲🇼`, 513 `mys`: `🇲🇾`, 514 `mdv`: `🇲🇻`, 515 `mli`: `🇲🇱`, 516 `mlt`: `🇲🇹`, 517 `mhl`: `🇲🇭`, 518 `mtq`: `🇲🇶`, 519 `mrt`: `🇲🇷`, 520 `mus`: `🇲🇺`, 521 `myt`: `🇾🇹`, 522 `mex`: `🇲🇽`, 523 `fsm`: `🇫🇲`, 524 `mda`: `🇲🇩`, 525 `mco`: `🇲🇨`, 526 `mng`: `🇲🇳`, 527 `mne`: `🇲🇪`, 528 `msr`: `🇲🇸`, 529 `mar`: `🇲🇦`, 530 `moz`: `🇲🇿`, 531 `mmr`: `🇲🇲`, 532 `nam`: `🇳🇦`, 533 `nru`: `🇳🇷`, 534 `npl`: `🇳🇵`, 535 `nld`: `🇳🇱`, 536 `ncl`: `🇳🇨`, 537 `nzl`: `🇳🇿`, 538 `nic`: `🇳🇮`, 539 `ner`: `🇳🇪`, 540 `nga`: `🇳🇬`, 541 `niu`: `🇳🇺`, 542 `nfk`: `🇳🇫`, 543 `mnp`: `🇲🇵`, 544 `nor`: `🇳🇴`, 545 `omn`: `🇴🇲`, 546 `pak`: `🇵🇰`, 547 `plw`: `🇵🇼`, 548 `pse`: `🇵🇸`, 549 `pan`: `🇵🇦`, 550 `png`: `🇵🇬`, 551 `pry`: `🇵🇾`, 552 `per`: `🇵🇪`, 553 `phl`: `🇵🇭`, 554 `pcn`: `🇵🇳`, 555 `pol`: `🇵🇱`, 556 `prt`: `🇵🇹`, 557 `pri`: `🇵🇷`, 558 `qat`: `🇶🇦`, 559 `reu`: `🇷🇪`, 560 `rou`: `🇷🇴`, 561 `rus`: `🇷🇺`, 562 `rwa`: `🇷🇼`, 563 `blm`: `🇧🇱`, 564 `shn`: `🇸🇭`, 565 `kna`: `🇰🇳`, 566 `lca`: `🇱🇨`, 567 `maf`: `🇲🇫`, 568 `spm`: `🇵🇲`, 569 `vct`: `🇻🇨`, 570 `wsm`: `🇼🇸`, 571 `smr`: `🇸🇲`, 572 `stp`: `🇸🇹`, 573 `sau`: `🇸🇦`, 574 `sen`: `🇸🇳`, 575 `srb`: `🇷🇸`, 576 `syc`: `🇸🇨`, 577 `sle`: `🇸🇱`, 578 `sgp`: `🇸🇬`, 579 `sxm`: `🇸🇽`, 580 `svk`: `🇸🇰`, 581 `svn`: `🇸🇮`, 582 `slb`: `🇸🇧`, 583 `som`: `🇸🇴`, 584 `zaf`: `🇿🇦`, 585 `sgs`: `🇬🇸`, 586 `ssd`: `🇸🇸`, 587 `esp`: `🇪🇸`, 588 `lka`: `🇱🇰`, 589 `sdn`: `🇸🇩`, 590 `sur`: `🇸🇷`, 591 `sjm`: `🇸🇯`, 592 `swe`: `🇸🇪`, 593 `che`: `🇨🇭`, 594 `syr`: `🇸🇾`, 595 `twn`: `🇹🇼`, 596 `tjk`: `🇹🇯`, 597 `tza`: `🇹🇿`, 598 `tha`: `🇹🇭`, 599 `tls`: `🇹🇱`, 600 `tgo`: `🇹🇬`, 601 `tkl`: `🇹🇰`, 602 `ton`: `🇹🇴`, 603 `tto`: `🇹🇹`, 604 `tun`: `🇹🇳`, 605 `tur`: `🇹🇷`, 606 `tkm`: `🇹🇲`, 607 `tca`: `🇹🇨`, 608 `tuv`: `🇹🇻`, 609 `uga`: `🇺🇬`, 610 `ukr`: `🇺🇦`, 611 `are`: `🇦🇪`, 612 `gbr`: `🇬🇧`, 613 `uae`: `🇦🇪`, 614 `umi`: `🇺🇲`, 615 `usa`: `🇺🇸`, 616 `ury`: `🇺🇾`, 617 `uzb`: `🇺🇿`, 618 `vut`: `🇻🇺`, 619 `ven`: `🇻🇪`, 620 `vnm`: `🇻🇳`, 621 `vgb`: `🇻🇬`, 622 `vir`: `🇻🇮`, 623 `wlf`: `🇼🇫`, 624 `esh`: `🇪🇭`, 625 `yem`: `🇾🇪`, 626 `zmb`: `🇿🇲`, 627 `zwe`: `🇿🇼`, 628 } 629 630 // symbolAliases resolves aliases for keys used in table name2symbols 631 var symbolAliases = map[string]string{ 632 // greek symbols 633 `alpha`: `loweralpha`, 634 `beta`: `lowerbeta`, 635 `delta`: `lowerdelta`, 636 `epsilon`: `lowerepsilon`, 637 `gamma`: `lowergamma`, 638 `lambda`: `lowerlambda`, 639 `omega`: `loweromega`, 640 `pi`: `lowerpi`, 641 `sigma`: `lowersigma`, 642 `tau`: `lowertau`, 643 `theta`: `lowertheta`, 644 645 // math symbols 646 `inf`: `infinity`, 647 `infty`: `infinity`, 648 `summation`: `sum`, 649 650 // punctuation symbols 651 `apos`: `apostrophe`, 652 `bquo`: `bquote`, 653 `dquo`: `dquote`, 654 `squo`: `squote`, 655 `punct`: `punctuation`, 656 657 // country flags 658 `afghanistan`: `af`, 659 `america`: `us`, 660 `algeria`: `dz`, 661 `angola`: `ao`, 662 `arabia`: `sa`, 663 `arabemirates`: `ae`, 664 `argentina`: `ar`, 665 `australia`: `au`, 666 `austria`: `at`, 667 `bangladesh`: `bd`, 668 `belgium`: `be`, 669 `brazil`: `br`, 670 `britain`: `gb`, 671 `canada`: `ca`, 672 `chile`: `cl`, 673 `china`: `cn`, 674 `colombia`: `co`, 675 `czechia`: `cz`, 676 `czechrepublic`: `cz`, 677 `denmark`: `dk`, 678 `dominicanrepublic`: `do`, 679 `drc`: `cd`, 680 `drcongo`: `cd`, 681 `ecuador`: `ec`, 682 `egypt`: `eg`, 683 `emirates`: `ae`, 684 `england`: `gb`, 685 `ethiopia`: `et`, 686 `finland`: `fi`, 687 `france`: `fr`, 688 `germany`: `de`, 689 `ghana`: `gh`, 690 `greatbritain`: `gb`, 691 `greece`: `gr`, 692 `holland`: `nl`, 693 `hungary`: `hu`, 694 `india`: `in`, 695 `indonesia`: `id`, 696 `iran`: `ir`, 697 `iraq`: `iq`, 698 `ireland`: `ie`, 699 `israel`: `il`, 700 `italy`: `it`, 701 `japan`: `jp`, 702 `kazakhstan`: `kz`, 703 `kenya`: `ke`, 704 `korea`: `kr`, 705 `kuwait`: `kw`, 706 `madagascar`: `mg`, 707 `malaysia`: `my`, 708 `mexico`: `mx`, 709 `morocco`: `ma`, 710 `mozambique`: `mz`, 711 `myanmar`: `mm`, 712 `nepal`: `np`, 713 `netherlands`: `nl`, 714 `newzealand`: `nz`, 715 `nigeria`: `ng`, 716 `northkorea`: `kp`, 717 `norway`: `no`, 718 `pakistan`: `pk`, 719 `peru`: `pe`, 720 `philippines`: `ph`, 721 `poland`: `pl`, 722 `portugal`: `pt`, 723 `qatar`: `qa`, 724 `rok`: `kr`, 725 `romania`: `ro`, 726 `russia`: `ru`, 727 `saudiarabia`: `sa`, 728 `singapore`: `sg`, 729 `somalia`: `so`, 730 `southafrica`: `za`, 731 `southkorea`: `kr`, 732 `spain`: `es`, 733 `srilanka`: `lk`, 734 `sudan`: `sd`, 735 `sweden`: `se`, 736 `switzerland`: `ch`, 737 `taiwan`: `tw`, 738 `tanzania`: `tz`, 739 `thailand`: `th`, 740 `turkey`: `tr`, 741 `uganda`: `ug`, 742 `ukraine`: `ua`, 743 `unitedarabemirates`: `ae`, 744 `unitedemirates`: `ae`, 745 `unitedkingdom`: `gb`, 746 `unitedstates`: `us`, 747 `uzbekistan`: `uz`, 748 `yemen`: `ye`, 749 `venezuela`: `ve`, 750 `vietnam`: `vn`, 751 } 752 753 func showSymbols(w *bufio.Writer, r io.Reader, names []string) error { 754 if len(names) == 0 { 755 return showAllSymbols(w) 756 } 757 758 for _, name := range names { 759 // normalize symbol names 760 name = strings.TrimSpace(name) 761 name = strings.ToLower(name) 762 name = strings.ReplaceAll(name, ` `, ``) 763 name = strings.ReplaceAll(name, `-`, ``) 764 name = strings.ReplaceAll(name, `_`, ``) 765 766 if len(name) == 0 { 767 return errors.New(`no symbol name given`) 768 } 769 770 // handle regular symbol-name lookups 771 s := name 772 if alias, ok := symbolAliases[name]; ok { 773 s = alias 774 } 775 sym, ok := names2symbols[s] 776 if !ok { 777 return errors.New(`no symbol named ` + name + ` found`) 778 } 779 780 w.WriteString(sym) 781 if err := endLine(w); err != nil { 782 return err 783 } 784 } 785 786 return nil 787 } 788 789 func showAllSymbols(w *bufio.Writer) error { 790 var keys []string 791 got := make(map[string]struct{}, len(names2symbols)+len(symbolAliases)) 792 793 for k := range names2symbols { 794 got[k] = struct{}{} 795 keys = append(keys, k) 796 } 797 798 for k := range symbolAliases { 799 if _, ok := got[k]; ok { 800 continue 801 } 802 got[k] = struct{}{} 803 keys = append(keys, k) 804 } 805 806 sort.Strings(keys) 807 808 for _, s := range keys { 809 k := s 810 if alias, ok := symbolAliases[k]; ok { 811 k = alias 812 } 813 814 w.WriteString(s) 815 w.WriteByte('\t') 816 w.WriteString(names2symbols[k]) 817 if err := endLine(w); err != nil { 818 return err 819 } 820 } 821 822 return nil 823 } File: tu/tables.go 1 package main 2 3 import ( 4 "bufio" 5 "io" 6 ) 7 8 func beginCSV(w *bufio.Writer, r io.Reader, args []string) error { 9 rw := newWriterCSV(w) 10 err := rw.Write(args) 11 rw.Flush() 12 13 if err := adaptWriteError(err); err != nil { 14 return err 15 } 16 17 return loopLines(r, func(line []byte) error { 18 w.Write(line) 19 return endLine(w) 20 }) 21 } 22 23 func beginTSV(w *bufio.Writer, r io.Reader, args []string) error { 24 for i, s := range args { 25 if i > 0 { 26 w.WriteByte('\t') 27 } 28 w.WriteString(s) 29 } 30 31 if err := endLine(w); err != nil { 32 return err 33 } 34 35 return loopLines(r, func(line []byte) error { 36 w.Write(line) 37 return endLine(w) 38 }) 39 } 40 41 func csv2lines(w *bufio.Writer, r io.Reader) error { 42 return loopCSV(r, func(row []string) error { 43 return writeLines(w, row) 44 }) 45 } 46 47 func csv2tsv(w *bufio.Writer, r io.Reader) error { 48 return loopCSV(r, func(row []string) error { 49 for i, v := range row { 50 if i > 0 { 51 w.WriteByte('\t') 52 } 53 w.WriteString(v) 54 } 55 return endLine(w) 56 }) 57 } 58 59 func endCSV(w *bufio.Writer, r io.Reader, args []string) error { 60 err := loopLines(r, func(line []byte) error { 61 w.Write(line) 62 return endLine(w) 63 }) 64 65 if err != nil { 66 return err 67 } 68 69 rw := newWriterCSV(w) 70 defer rw.Flush() 71 return adaptWriteError(rw.Write(args)) 72 } 73 74 func endTSV(w *bufio.Writer, r io.Reader, args []string) error { 75 err := loopLines(r, func(line []byte) error { 76 w.Write(line) 77 return endLine(w) 78 }) 79 80 if err != nil { 81 return err 82 } 83 84 for i, s := range args { 85 if i > 0 { 86 w.WriteByte('\t') 87 } 88 w.WriteString(s) 89 } 90 return endLine(w) 91 } 92 93 func ssv2lines(w *bufio.Writer, r io.Reader) error { 94 return loopLines(r, func(line []byte) error { 95 var err error 96 loopFields(line, func(i int, s []byte) (keepGoing bool) { 97 w.Write(s) 98 err = endLine(w) 99 return err == nil 100 }) 101 return err 102 }) 103 } 104 105 func ssv2tsv(w *bufio.Writer, r io.Reader) error { 106 return loopLines(r, func(line []byte) error { 107 // avoid empty output lines; keeps the original string, in case 108 // it's later being treated as TSV, padding and all 109 if len(trimSpaces(line)) == 0 { 110 return nil 111 } 112 113 loopFields(line, func(i int, s []byte) (keepGoing bool) { 114 if i > 0 { 115 w.WriteByte('\t') 116 } 117 w.Write(s) 118 return true 119 }) 120 121 return endLine(w) 122 }) 123 } 124 125 func tsv2lines(w *bufio.Writer, r io.Reader) error { 126 return loopLines(r, func(line []byte) error { 127 var err error 128 loopTSV(line, func(i int, s []byte) (keepGoing bool) { 129 w.Write(s) 130 err = endLine(w) 131 return err == nil 132 }) 133 return err 134 }) 135 } File: tu/tools.txt 1 after [what] 2 ignore starts of lines until the string/marker given; no matches in 3 a line results in an empty line 4 5 after-last [what] 6 ignore starts of lines until the last appearance of the string/marker 7 given; no matches in a line results in an empty line 8 9 aliases 10 show all tool names and their aliases as lines of tab-separated items 11 12 base-64 13 turn input bytes into their base-64 counterparts 14 15 before [what] 16 ignore parts of lines starting from the string/marker given; lines 17 with no matches are kept in full 18 19 before-last [what] 20 ignore parts of lines starting from the last appearance of the 21 string/marker given; lines with no matches are kept in full 22 23 begin [lines...] 24 start output with the lines given, followed by all input lines 25 26 begin-csv [column names...] 27 start output with a CSV (comma-separated values) line, followed by 28 all lines from the input; precede input with a CSV line at the start 29 30 begin-tsv [column names...] 31 start output with a TSV (tab-separated values) line, followed by 32 all lines from the input; precede input with a TSV line at the start 33 34 big-files [min] [folders...] 35 find all files recursively from the folders given, which have at 36 least the number of bytes given 37 38 blow [tab-stop...] 39 expand tabs using up to the number of spaces given (the `tab-stop`); 40 default tab-stop is 4, when not given explicitly; when the tab-stop 41 is less than 1, behaves like tool `drop-tabs` 42 43 book [height] 44 layout input lines side-by-side on 2 columns, like in a book; a 45 useful shell function around this tool is `like a book` (lab): 46 lab() { tu book "$(($(tput lines) - 1))" "$@" | less -KiCRS; } 47 48 breathe [every...] 49 put an extra empty line every few lines (by default every 5); periods 50 less than 1 disable adding extra empty lines 51 52 bytes [filepaths...] 53 concatenate/emit all bytes from all named inputs (files) given: this is 54 one of the few tools which can open files; single dashes mean fully-read 55 standard input; multiple single dashes are allowed, stdin being read only 56 once 57 58 chop-lf 59 ignore the last byte, if it's a line-feed 60 61 common [filepath] [filepath] 62 find all lines 2 inputs have in common (their set-intersection); this 63 is one of the few tools which can open files 64 65 compose [tools...] 66 compose multiple tools pipe-like, allowing their executions to overlap, 67 taking advantage of multiple cores, when available; the double-dash `--` 68 separates each tool/arguments combo used for the internal pipe; the same 69 double-dash is also available as an alias for this tool, for convenience 70 71 count-bytes [files/folders...] 72 count all bytes, including tallies of several bytes of common interest, 73 such as spaces, tabs, nulls, and so on; the result is TSV (tab-separated 74 values) lines, starting with a header line 75 76 crlf 77 make all lines end with a CRLF byte-pair 78 79 csv 80 turn CSV (comma-separated values) lines into single-item lines 81 82 csv2tsv 83 turn CSV (comma-separated values) lines into TSV (tab-separated) ones 84 85 data-uri [files...] 86 encode each response from each path/URI given as a data-URI line, 87 which is like base-64, but starts with a MIME-type declaration 88 89 debase-64 90 decode base64-encoded text into its corresponding bytes 91 92 dedup 93 ensure any input-line appears in the output only once 94 95 drop [strings...] 96 remove all strings given from each line, in the order given 97 98 drop-end [strings...] 99 remove all endings given from each line, if present at the end when 100 checked, and in the order given 101 102 drop-start [strings...] 103 remove all starts given from each line, if present at the start when 104 checked, and in the order given 105 106 drop-tabs 107 remove/ignore all tabs from each line; tool `blow` acts the same way, 108 when given a 0 or negative `tab-stop` values 109 110 each [tool] [arguments...] 111 run the tool for each input line, using the arguments given, followed 112 by the current line as an extra/final argument 113 114 empty-files [folders...] 115 find all empty files recursively from the folders given 116 117 empty-folders [folders...] 118 find all empty folders recursively from the folders given 119 120 end [lines...] 121 output all input lines, followed by the lines given 122 123 end-csv [column names...] 124 output all input lines, followed by a CSV (comma-separated values) 125 line 126 127 end-tsv [column names...] 128 output all input lines, followed by a TSV (tab-separated values) line 129 130 examples 131 show examples explaining how to use this app's tools on the cmd-line 132 133 files [folders...] 134 find all files recursively from the folders given 135 136 first [max lines...] 137 limit output to the number of lines given, if input exceeds it; the 138 default is 1 line, when no number is given 139 140 folders [folders...] 141 find all folders recursively from the folders given 142 143 glue [separator...] 144 put all input lines into a single line, putting the separator given 145 between items; when not given, the default is to use no separator 146 147 gzip 148 gzip-encode/compress input bytes 149 150 help 151 show this app's help message 152 153 hex 154 encode input bytes into ASCII-hexadecimal text 155 156 id3-pic 157 isolate the thumbnail bytes, if available, from the ID3 part of a media 158 file/stream, usually with MP3-encoded sound in it; this tool supports 159 both PIC and APIC data-section types; also available as `mp3-pic` 160 161 join [separator...] 162 join input lines into a single line, putting the separator between 163 adjacent items; the separator defaults to a tab, when not given 164 165 json-0 166 convert/fix JSON/pseudo-JSON input into minimal JSON output; valid JSON 167 input is simply minimized; output is always a single line, which always 168 ends with a line-feed 169 170 jsonl 171 convert lines, each with JSON/pseudo-JSON data, into a valid (minimal) 172 JSON array 173 174 junk [byte count...] 175 emit pseudo-random bytes; if no byte-count is given, the default is to 176 emit 1024 pseudo-random bytes 177 178 last [max lines...] 179 limit output to the last few lines, if input has more lines than the 180 number given; the default is 1 line, when no number is given 181 182 lines [filepaths...] 183 ensure all lines end with a single LF byte, including the last one, 184 ignoring CRLF byte-pairs from the original input; this is one of the 185 few tools which can open files; single dashes mean fully-read standard 186 input; multiple single dashes are allowed, stdin being read only once 187 188 line-up [column count...] 189 line-up input lines into TSV (tab-separated values) lines with up to 190 the number of items given; when the item-count is less than 1, line-up 191 all input lines into a single TSV line 192 193 links 194 emit all hyperlinks found in the input, one per output line 195 196 lower 197 lower-case all symbols 198 199 match-para [regexp...] 200 `match paragraphs` keeps only the paragraphs matching any of the 201 regexes given on any of their lines; all regex matches are done 202 case-insensitively, meaning letter-case differences are ignored 203 204 md-5 205 turn input bytes into their MD-5 hash, as a hexadecimal-ASCII line 206 207 missing [filepath] [filepath] 208 find all lines which are in the 2nd input, but not in the 1st input; 209 this is one of the few tools which can open files 210 211 mumble [tool] [arguments...] 212 run the tool with the arguments given, after starting a line with 213 space-separated tool/argument list, followed by a tab 214 215 n [start...] 216 number lines starting from the number given, or 1 by default; line 217 numbers are separated from the rest of their output line by a tab 218 219 no-pun 220 drop/ignore all common punctuation symbols 221 222 now 223 show the current date and time using the YYYY-MM-DD HH:MM:SS format 224 225 null 226 read nothing, write nothing 227 228 numbers 229 emit only numbers detected from the input, one item per output line 230 231 open [names...] 232 open files/folders/URIs/ports using the system's designated app for 233 each; port numbers open in the main web-browser pointing to localhost 234 235 plain 236 ignore all ANSI-style sequences, leaving all other bytes as given 237 238 primes [count...] 239 show the first n prime numbers, one per line; when not given a count, 240 the default is to show the first 1 million primes 241 242 reflow [max runes] 243 wrap/fold lines, trying to keep to the rune-count given, the exception 244 being single `words` which are longer than that on their own, and thus 245 can't be wrapped `losslessly` 246 247 runes 248 emit each UTF-8 code on its own line, except carriage-returns and 249 line-feeds, which are ignored; empty input-lines are ignored, so no 250 empty output-lines should result; all multi-code symbols, such as 251 country-flags, will get split across lines, resulting in non-sense 252 253 sbs [column count...] 254 `Side By Side` tries to lay input lines into columns; if not given 255 a column-count it tries to guess how many can fit an 80-symbol width; 256 the same automatic behavior happens when given a non-positive count 257 258 sha-1 259 turn input bytes into their SHA-1 hash, as a hexadecimal-ASCII line 260 261 sha-256 262 turn input bytes into their SHA-256 hash, as a hexadecimal-ASCII line 263 264 sha-512 265 turn input bytes into their SHA-512 hash, as a hexadecimal-ASCII line 266 267 show-it [files/folders/URIs/ports...] 268 when given names, works mostly the same as the `open` tool; when not 269 given any name, it auto-detects data from the main input, and pops a 270 new web-browser tab with the content ready to view and/or play 271 272 since [what] 273 ignore starts of lines before the string/marker given, so the marker 274 itself ends in the result; no matches in a line results in an empty 275 line 276 277 since-last [what] 278 ignore starts of lines before the last appearance of the string/marker 279 given, so the marker itself ends in the result; no matches in a line 280 results in an empty line 281 282 skip [line count] 283 ignore the first n lines, possibly ignoring all input, when it has 284 fewer lines than that 285 286 skip-empty 287 skip/ignore empty lines, keeping all others 288 289 skip-last [line count...] 290 ignore the last n lines, possibly ignoring all input, when it has 291 fewer lines than that; the default is 1, when no number is given 292 293 small-files [max] [folders...] 294 find all files recursively from the folders given, which have fewer 295 than the number of bytes given 296 297 squeeze 298 aggressively ignore unneeded spaces, ignoring leading and/or trailing 299 spaces in lines, also `squeezing` runs of multiple spaces into single 300 spaces; spaces around tabs are ignored as well 301 302 ssv 303 turn SSV (space-separated values) lines into single-item lines 304 305 ssv2tsv 306 turn SSV (space-separated values) lines into TSV (tab-separated) ones 307 308 stomp 309 turn runs of consecutive empty lines into single empty lines, thus 310 tightening paragraphs; also ignore leading and trailing empty lines, 311 except for a final line-feed output for any non-empty input; to also 312 trim lines in paragraphs, use tool `trim-par` instead 313 314 strings 315 find all ASCII strings among the input bytes, showing one per line 316 317 symbols [symbol name...] 318 lookup commonly-used symbols by their name(s)/aliases; show all symbols 319 available, with all their aliases, when not given any lookup-names 320 321 tally 322 reverse-sort unique input-lines by their tally counts; each output line 323 is the tally-count, a tab, and the tallied line; output has no header 324 line with the column names, as usual for TSV (tab-separated values) 325 tables; command `begin-tsv` can prepend such a starting line 326 327 today 328 show the current date 329 330 top-files [folders...] 331 find all top-level files from the folders given 332 333 top-folders [folders...] 334 find all top-level folders from the folders given 335 336 trim 337 ignore leading and/or trailing spaces from lines 338 339 trim-trail 340 ignore trailing spaces from lines 341 342 truncate [max runes] 343 limit lines up to the number of runes given; negative values result 344 in empty lines; some symbols which take more than 1 `rune`, such as 345 country flags, may get truncated into non-sense, and always count as 346 multiple runes 347 348 tsv 349 turn TSV (tab-separated values) lines into single-item lines 350 351 un-bzip 352 bzip2-decode/uncompress input bytes 353 354 un-gzip 355 gzip-decode/uncompress input bytes 356 357 un-hex 358 decode input bytes from ASCII-hexadecimal text 359 360 until [what] 361 ignore parts of lines beyond the string/marker given; no matches in 362 a line keeps the whole line as is 363 364 until-last [what] 365 ignore parts of lines beyond the last appearance of the string/marker 366 given; no matches in a line keeps the whole line as is 367 368 utf-8 369 turn both kinds of UTF-16 data into UTF-8, keeping UTF-8 input as 370 given, except ignoring a leading UTF-8 BOM, if present 371 372 uri-encode 373 URI-encode each input line, percent-escaping symbols when needed 374 375 vulgarize 376 turn Latin-1-encoded bytes into UTF-8 runes; the name is taken from 377 the word `vulgarization`, which refers to the historical evolution 378 from latin into various modern languages over the centuries 379 380 with [file] [tool] [arguments...] 381 run the tool with the arguments given, using data from the named-input 382 given before the tool-name 383 384 ymd 385 show the current date using the YYYY-MM-DD format File: tu/utf8.go 1 package main 2 3 import ( 4 "bufio" 5 "io" 6 "unicode/utf16" 7 ) 8 9 // toUTF8 turns UTF-16 bytes (both kinds) and BOMed UTF-8 bytes into 10 // proper UTF-8 bytes: this is one of the few text-related tools which 11 // keeps CRLF sequences verbatim 12 func toUTF8(w *bufio.Writer, r io.Reader) error { 13 br := bufio.NewReader(r) 14 15 a, err := br.ReadByte() 16 if err == io.EOF { 17 return nil 18 } 19 if err != nil { 20 return err 21 } 22 23 b, err := br.ReadByte() 24 if err == io.EOF { 25 w.WriteByte(a) 26 return nil 27 } 28 if err != nil { 29 return err 30 } 31 32 // handle potential leading UTF-8 BOM 33 if a == 0xEF && b == 0xBB { 34 c, err := br.ReadByte() 35 if err == io.EOF { 36 w.WriteByte(a) 37 w.WriteByte(b) 38 return nil 39 } 40 41 if err != nil { 42 return err 43 } 44 45 if c != 0xBF { 46 w.WriteByte(a) 47 w.WriteByte(b) 48 w.WriteByte(c) 49 } 50 51 _, err = io.Copy(w, br) 52 return adaptWriteError(err) 53 } 54 55 // handle leading UTF-16 big-endian BOM 56 if a == 0xFE && b == 0xFF { 57 return deUTF16(w, br, readBytePairBE) 58 } 59 60 // handle leading UTF-16 little-endian BOM 61 if a == 0xFF && b == 0xFE { 62 return deUTF16(w, br, readBytePairLE) 63 } 64 65 // handle lack of leading UTF-16 BOM 66 sym := rune(256*int(b) + int(a)) 67 68 if utf16.IsSurrogate(sym) { 69 a, b, err := readBytePairLE(br) 70 if err == io.EOF { 71 return nil 72 } 73 if err != nil { 74 return err 75 } 76 77 next := rune(256*int(a) + int(b)) 78 sym = utf16.DecodeRune(sym, next) 79 } 80 81 w.WriteRune(sym) 82 return deUTF16(w, br, readBytePairLE) 83 } 84 85 // deUTF16 is used by func toUTF8 86 func deUTF16(w *bufio.Writer, br *bufio.Reader, readPair readPairFunc) error { 87 for { 88 a, b, err := readPair(br) 89 if err == io.EOF { 90 return nil 91 } 92 if err != nil { 93 return err 94 } 95 96 r := rune(256*int(a) + int(b)) 97 if utf16.IsSurrogate(r) { 98 a, b, err := readPair(br) 99 if err == io.EOF { 100 return nil 101 } 102 if err != nil { 103 return err 104 } 105 106 next := rune(256*int(a) + int(b)) 107 r = utf16.DecodeRune(r, next) 108 } 109 110 _, err = w.WriteRune(r) 111 err = adaptWriteError(err) 112 if err != nil { 113 return err 114 } 115 } 116 }