File: tu/bytes.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "bytes"
   6     "compress/bzip2"
   7     "compress/gzip"
   8     "crypto/rand"
   9     "encoding/base64"
  10     "encoding/hex"
  11     "hash"
  12     "io"
  13     "regexp"
  14     "unicode/utf8"
  15 )
  16 
  17 const linksExpr = `https?://[A-Za-z0-9+_.:%-]+(/[A-Za-z0-9+_.%/,#?&=-]*)*`
  18 
  19 var linksMatch = regexp.MustCompile(linksExpr)
  20 
  21 // isSymbolASCII helps the `strings` tool do its job quickly
  22 var isSymbolASCII = [256]bool{
  23     false, false, false, false, false, false, false, false,
  24     false, true, false, false, false, false, false, false,
  25     false, false, false, false, false, false, false, false,
  26     false, false, false, false, false, false, false, false,
  27     true, true, true, true, true, true, true, true,
  28     true, true, true, true, true, true, true, true,
  29     true, true, true, true, true, true, true, true,
  30     true, true, true, true, true, true, true, true,
  31     true, true, true, true, true, true, true, true,
  32     true, true, true, true, true, true, true, true,
  33     true, true, true, true, true, true, true, true,
  34     true, true, true, true, true, true, true, true,
  35     true, true, true, true, true, true, true, true,
  36     true, true, true, true, true, true, true, true,
  37     true, true, true, true, true, true, true, true,
  38     true, true, true, true, true, true, true, false,
  39 }
  40 
  41 // uriUnescapedASCII marks which ASCII bytes don't need escaping
  42 var uriUnescapedASCII = [256]bool{
  43     '0': true, '1': true, '2': true, '3': true, '4': true,
  44     '5': true, '6': true, '7': true, '8': true, '9': true,
  45 
  46     'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true,
  47     'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true,
  48     'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true,
  49     'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
  50     'Y': true, 'Z': true,
  51 
  52     'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true,
  53     'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true,
  54     'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true,
  55     's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
  56     'y': true, 'z': true,
  57 
  58     '-': true, '_': true, '.': true, '~': true,
  59     '/': true,
  60 }
  61 
  62 // latin1 is a direct byte-translation table from Latin-1 bytes into UTF-8
  63 // runes, used by func vulgarize
  64 var latin1 = [256]rune{
  65     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  66     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  67     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  68     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  69     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  70     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  71     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  72     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  73 
  74     0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
  75     0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d,
  76     0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43,
  77     0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
  78     0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
  79     0x50, 0x51, 0x52, 0x53, 0x54, 0x55,
  80     0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b,
  81     0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61,
  82     0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
  83     0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
  84     0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73,
  85     0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
  86     0x7a, 0x7b, 0x7c, 0x7d, 0x7e, utf8.RuneError,
  87 
  88     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  89     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  90     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  91     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  92     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  93     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  94     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  95     utf8.RuneError, utf8.RuneError, utf8.RuneError, utf8.RuneError,
  96 
  97     ' ', '¡', '¢', '£', '¤', '¥', '¦', '§',
  98     '¨', '©', 'ª', '«', '¬', '­', '®', '¯',
  99     '°', '±', '²', '³', '´', 'µ', '¶', '·',
 100     '¸', '¹', 'º', '»', '¼', '½', '¾', '¿',
 101     'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç',
 102     'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï',
 103     'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×',
 104     'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß',
 105     'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç',
 106     'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï',
 107     'ð', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö', '÷',
 108     'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'þ', 'ÿ',
 109 }
 110 
 111 func after(s, what []byte) []byte {
 112     if i := bytes.Index(s, what); i >= 0 {
 113         return s[i+len(what):]
 114     }
 115     return nil
 116 }
 117 
 118 func afterLast(s, what []byte) []byte {
 119     if i := bytes.LastIndex(s, what); i >= 0 {
 120         return s[i+len(what):]
 121     }
 122     return nil
 123 }
 124 
 125 func before(s, what []byte) []byte {
 126     if i := bytes.Index(s, what); i >= 0 {
 127         return s[:i]
 128     }
 129     return s
 130 }
 131 
 132 func beforeLast(s, what []byte) []byte {
 133     if i := bytes.LastIndex(s, what); i >= 0 {
 134         return s[:i]
 135     }
 136     return s
 137 }
 138 
 139 func bytesTool(w *bufio.Writer, r io.Reader, args []string) error {
 140     return handleNamedInputs(args, r, func(path string, r io.Reader) error {
 141         _, err := io.Copy(w, r)
 142         return adaptWriteError(err)
 143     })
 144 }
 145 
 146 // blowTabsLine expands tabs, using the tab-stop count given; non-positive
 147 // tab-stops cause it to ignore tabs altogether
 148 func blowTabsLine(w *bufio.Writer, line []byte, tabStop int) {
 149     if tabStop < 1 {
 150         dropTabsLine(w, line)
 151         return
 152     }
 153 
 154     n := 0
 155 
 156     for _, b := range line {
 157         if b == '\t' {
 158             writeSpaces(w, tabStop-n%tabStop)
 159             n = 0
 160             continue
 161         }
 162 
 163         w.WriteByte(b)
 164         n++
 165     }
 166 }
 167 
 168 // dropPunctuationLine ignores all common punctuation symbols
 169 func dropPunctuationLine(w *bufio.Writer, line []byte) {
 170     for len(line) > 0 {
 171         i := bytes.IndexAny(line, `,.<>;:'"/?[]{}|\!@#$%^&*()-+=~`+"`")
 172         if i < 0 {
 173             w.Write(line)
 174             break
 175         }
 176 
 177         w.Write(line[:i])
 178         line = line[i+1:]
 179     }
 180 }
 181 
 182 // dropTabsLine ignores all tabs
 183 func dropTabsLine(w *bufio.Writer, line []byte) {
 184     for len(line) > 0 {
 185         i := bytes.IndexByte(line, '\t')
 186         if i < 0 {
 187             w.Write(line)
 188             break
 189         }
 190 
 191         w.Write(line[:i])
 192         line = line[i+1:]
 193     }
 194 }
 195 
 196 // debase64 decodes base64-encoded text into its corresponding bytes
 197 func debase64(w *bufio.Writer, r io.Reader) error {
 198     r, err := trimStartDataURI(r)
 199     if err != nil {
 200         return err
 201     }
 202 
 203     dec := base64.NewDecoder(base64.StdEncoding, r)
 204     return copyBytes(w, dec)
 205 }
 206 
 207 // trimStartDataURI is used by func debase64
 208 func trimStartDataURI(r io.Reader) (io.Reader, error) {
 209     var buf [64]byte
 210     n, err := r.Read(buf[:])
 211     start := buf[:n]
 212 
 213     if bytes.HasPrefix(start, []byte{'d', 'a', 't', 'a', ':'}) {
 214         i := bytes.IndexByte(start, ',')
 215         if i < 0 {
 216             i = n - 1
 217         }
 218         start = start[i+1:]
 219     }
 220 
 221     if err == io.EOF {
 222         return bytes.NewReader(start), nil
 223     }
 224     if err != nil {
 225         return nil, err
 226     }
 227 
 228     return io.MultiReader(bytes.NewReader(start), r), nil
 229 }
 230 
 231 func debzip2(w *bufio.Writer, r io.Reader) error {
 232     dec := bzip2.NewReader(r)
 233     _, err := io.Copy(w, dec)
 234     return adaptWriteError(err)
 235 }
 236 
 237 // dedent ignores up to n leading spaces in the byte-slice given
 238 func dedent(s []byte, n int) []byte {
 239     for len(s) > 0 && n > 0 && s[0] == ' ' {
 240         s = s[1:]
 241         n--
 242     }
 243     return s
 244 }
 245 
 246 func degzip(w *bufio.Writer, r io.Reader) error {
 247     dec, err := gzip.NewReader(r)
 248     if err != nil {
 249         return err
 250     }
 251     _, err = io.Copy(w, dec)
 252     return adaptWriteError(err)
 253 }
 254 
 255 func gzipBytes(w *bufio.Writer, r io.Reader) error {
 256     enc := gzip.NewWriter(w)
 257     defer enc.Flush()
 258     _, err := io.Copy(enc, r)
 259     return adaptWriteError(err)
 260 }
 261 
 262 // hasPrefixByte is a simpler, single-byte version of bytes.HasPrefix
 263 func hasPrefixByte(b []byte, prefix byte) bool {
 264     return len(b) > 0 && b[0] == prefix
 265 }
 266 
 267 // hasPrefixFold is a case-insensitive bytes.HasPrefix
 268 func hasPrefixFold(s []byte, prefix []byte) bool {
 269     n := len(prefix)
 270     return len(s) >= n && bytes.EqualFold(s[:n], prefix)
 271 }
 272 
 273 // indexSpaces tries to find the first all-spaces slice in the string given;
 274 // the indices returned are negative when no spaces are found
 275 func indexSpaces(s []byte) (i, j int) {
 276     i = bytes.IndexByte(s, ' ')
 277     if i < 0 {
 278         return -1, -1
 279     }
 280 
 281     j = indexNonSpace(s[i:])
 282     if j < 0 {
 283         return i, len(s)
 284     }
 285 
 286     j += i
 287     return i, j
 288 }
 289 
 290 func indexNonSpace(s []byte) int {
 291     for i, b := range s {
 292         if b == ' ' {
 293             continue
 294         }
 295         return i
 296     }
 297 
 298     return -1
 299 }
 300 
 301 func junk(w *bufio.Writer, r io.Reader, n int) error {
 302     var buf [bufferSize]byte
 303 
 304     for n > 0 {
 305         got, err := rand.Read(buf[:])
 306         if err != nil {
 307             return err
 308         }
 309 
 310         if got > n {
 311             got = n
 312         }
 313         n -= got
 314 
 315         _, err = w.Write(buf[:got])
 316         if err = adaptWriteError(err); err != nil {
 317             return err
 318         }
 319     }
 320 
 321     return nil
 322 }
 323 
 324 // truncateRunes ensures byte-slices can't exceed the rune-count given, unless
 325 // it's negative; negative counts result in empty byte-slices
 326 func truncateRunes(s []byte, max int) []byte {
 327     if max < 1 {
 328         return nil
 329     }
 330 
 331     if len(s) < max {
 332         return s
 333     }
 334 
 335     i := 0
 336     for i < len(s) && max > 0 {
 337         _, size := utf8.DecodeRune(s[i:])
 338         i += size
 339         max--
 340     }
 341 
 342     return s[:i]
 343 }
 344 
 345 // reflowLine handles byte-slices for func reflow
 346 func reflowLine(w *bufio.Writer, line []byte, max int) {
 347     if len(line) < max {
 348         w.Write(line)
 349         return
 350     }
 351 
 352     n := 0
 353     spaces := 0
 354 
 355     for len(line) > 0 {
 356         i, j := indexSpaces(line)
 357         if i < 0 {
 358             pre := utf8.RuneCount(line)
 359             if n+spaces+pre > max {
 360                 w.WriteByte('\n')
 361                 n = 0
 362             }
 363 
 364             if !(n == 0 && spaces == 1) {
 365                 writeSpaces(w, spaces)
 366             }
 367             w.Write(line)
 368             return
 369         }
 370 
 371         pre := utf8.RuneCount(line[:i])
 372         nspaces := j - i
 373 
 374         if n+spaces+pre > max {
 375             w.WriteByte('\n')
 376             n = 0
 377         }
 378 
 379         if !(n == 0 && spaces == 1) {
 380             writeSpaces(w, spaces)
 381         }
 382 
 383         w.Write(line[:i])
 384         line = line[j:]
 385         n += spaces + pre
 386         spaces = nspaces
 387 
 388         if n >= max {
 389             w.WriteByte('\n')
 390             n = 0
 391         }
 392     }
 393 }
 394 
 395 func since(s, what []byte) []byte {
 396     if i := bytes.Index(s, what); i >= 0 {
 397         return s[i:]
 398     }
 399     return nil
 400 }
 401 
 402 func sinceLast(s, what []byte) []byte {
 403     if i := bytes.LastIndex(s, what); i >= 0 {
 404         return s[i:]
 405     }
 406     return nil
 407 }
 408 
 409 // squeezeChunk handles squeezing substrings between tabs for func squeeze
 410 func squeezeChunk(w *bufio.Writer, chunk []byte) {
 411     spaces := false
 412     chunk = trimSpaces(chunk)
 413 
 414     for len(chunk) > 0 {
 415         r, size := utf8.DecodeRune(chunk)
 416 
 417         if r == ' ' {
 418             spaces = true
 419             chunk = chunk[1:]
 420             continue
 421         }
 422 
 423         if spaces {
 424             w.WriteByte(' ')
 425             spaces = false
 426         }
 427 
 428         w.WriteRune(r)
 429         chunk = chunk[size:]
 430     }
 431 }
 432 
 433 func stringsTool(w *bufio.Writer, r io.Reader) error {
 434     ascii := false
 435     var buf [bufferSize]byte
 436 
 437     for {
 438         n, err := r.Read(buf[:])
 439         if n < 1 {
 440             if err == io.EOF {
 441                 err = nil
 442             }
 443             if ascii {
 444                 if err == nil {
 445                     return endLine(w)
 446                 }
 447                 endLine(w)
 448             }
 449             return err
 450         }
 451 
 452         for _, b := range buf[:n] {
 453             if isSymbolASCII[b] {
 454                 ascii = true
 455                 w.WriteByte(b)
 456                 continue
 457             }
 458 
 459             if ascii {
 460                 ascii = false
 461                 if err := endLine(w); err != nil {
 462                     return err
 463                 }
 464             }
 465         }
 466     }
 467 }
 468 
 469 func trimLead(line []byte) []byte {
 470     for len(line) > 0 && line[0] == ' ' {
 471         line = line[1:]
 472     }
 473     return line
 474 }
 475 
 476 func trimLeadWhitespace(line []byte) []byte {
 477     for len(line) > 0 {
 478         switch line[0] {
 479         case ' ', '\t', '\r', '\n':
 480             line = line[1:]
 481         default:
 482             return line
 483         }
 484     }
 485 
 486     return line
 487 }
 488 
 489 func trimSpaces(line []byte) []byte {
 490     return trimTrail(trimLead(line))
 491 }
 492 
 493 func trimTrail(line []byte) []byte {
 494     for len(line) > 0 && line[len(line)-1] == ' ' {
 495         line = line[:len(line)-1]
 496     }
 497     return line
 498 }
 499 
 500 func unHex(w *bufio.Writer, r io.Reader) error {
 501     dec := hex.NewDecoder(r)
 502     return copyBytes(w, dec)
 503 }
 504 
 505 func until(s, what []byte) []byte {
 506     if i := bytes.Index(s, what); i >= 0 {
 507         return s[:i+len(what)]
 508     }
 509     return s
 510 }
 511 
 512 func untilLast(s, what []byte) []byte {
 513     if i := bytes.LastIndex(s, what); i >= 0 {
 514         return s[:i+len(what)]
 515     }
 516     return s
 517 }
 518 
 519 // vulgarize turns Latin-1-encoded bytes into UTF-8 runes; the name is taken
 520 // from the word `vulgarization`, which refers to the historical evolution
 521 // from latin into various modern languages over the centuries
 522 func vulgarize(w *bufio.Writer, r io.Reader) error {
 523     var buf [16 * kb]byte
 524     br := bufio.NewReader(r)
 525 
 526     for {
 527         n, err := br.Read(buf[:])
 528         if n < 1 {
 529             if err == io.EOF {
 530                 return nil
 531             }
 532             return err
 533         }
 534 
 535         for _, b := range buf[:n] {
 536             r := latin1[b]
 537             if r == utf8.RuneError {
 538                 r = 0
 539             }
 540 
 541             _, err := w.WriteRune(r)
 542             if err := adaptWriteError(err); err != nil {
 543                 return err
 544             }
 545         }
 546     }
 547 }
 548 
 549 // hashBytes is a general checksum-calculator-runner of input bytes
 550 func hashBytes(w io.Writer, r io.Reader, h hash.Hash) error {
 551     if err := copyBytes(h, r); err != nil {
 552         return err
 553     }
 554 
 555     _, err := w.Write(h.Sum(nil))
 556     return adaptWriteError(err)
 557 }
 558 
 559 // loopFields is a no-allocation alterative to looping over the results of
 560 // func bytes.Fields
 561 func loopFields(s []byte, handle func(i int, s []byte) (keepGoing bool)) {
 562     // the presence of any tab determines the line as TSV
 563     if bytes.IndexByte(s, '\t') >= 0 {
 564         loopTSV(s, handle)
 565         return
 566     }
 567 
 568     // without any tabs, behave more like func bytes.Fields
 569     s = trimSpaces(s)
 570 
 571     for i := 0; len(s) > 0; i++ {
 572         start, stop := indexSpaces(s)
 573         if start < 0 {
 574             handle(i, s)
 575             break
 576         }
 577 
 578         if !handle(i, s[:start]) {
 579             break
 580         }
 581         s = s[stop:]
 582     }
 583 }
 584 
 585 func loopLinks(s []byte, handle func(i int, s []byte) (keepGoing bool)) {
 586     for i := 0; len(s) > 0; i++ {
 587         m := linksMatch.FindIndex(s)
 588         if m == nil {
 589             break
 590         }
 591 
 592         end := m[1]
 593         if s[end] == '.' {
 594             end--
 595         }
 596 
 597         if !handle(i, s[m[0]:end]) {
 598             break
 599         }
 600         s = s[end:]
 601     }
 602 }
 603 
 604 func loopNumbers(s []byte, handle func(i int, s []byte) (keepGoing bool)) {
 605     for i := 0; len(s) > 0; i++ {
 606         j, k := indexNumber(s)
 607         if j < 0 {
 608             break
 609         }
 610 
 611         if !handle(i, s[j:k]) {
 612             break
 613         }
 614         s = s[k:]
 615     }
 616 }
 617 
 618 // loopTSV is a no-allocation alterative to looping over the results of
 619 // func strings.Split
 620 func loopTSV(s []byte, handle func(i int, s []byte) (keepGoing bool)) {
 621     for i := 0; len(s) > 0; i++ {
 622         j := bytes.IndexByte(s, '\t')
 623         if j < 0 {
 624             handle(i, s)
 625             break
 626         }
 627 
 628         if !handle(i, s[:j]) {
 629             break
 630         }
 631         s = s[j+1:]
 632     }
 633 }
 634 
 635 func indexNumber(s []byte) (i, j int) {
 636     start := bytes.IndexAny(s, `+-0123456789`)
 637     if start < 0 {
 638         return -1, -1
 639     }
 640 
 641     prefix := 0
 642     switch s[start] {
 643     case '+', '-':
 644         prefix = 1
 645     }
 646 
 647     c := countLeadingDigits(s[start+prefix:])
 648     if c < 1 {
 649         return -1, -1
 650     }
 651 
 652     if start+prefix+c >= len(s) {
 653         return start + prefix, len(s)
 654     }
 655 
 656     if s[start+prefix+c] == '.' {
 657         decs := countLeadingDigits(s[start+prefix+c+1:])
 658         if decs < 1 {
 659             return start + prefix, start + prefix + c
 660         }
 661         return start + prefix, start + prefix + c + 1 + decs
 662     }
 663 
 664     return start + prefix, start + prefix + c
 665 }
 666 
 667 func countLeadingDigits(s []byte) int {
 668     n := 0
 669 
 670     for _, b := range s {
 671         switch b {
 672         case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 673             n++
 674         default:
 675             return n
 676         }
 677     }
 678 
 679     return n
 680 }
 681 
 682 // unstyledWidth counts how many runes aren't part of ANSI-style sequences in
 683 // the string given
 684 func unstyledWidth(s []byte) int {
 685     // n counts the number of runes which aren't part of ANSI-style sequences
 686     n := 0
 687 
 688     for len(s) > 0 {
 689         i := bytes.Index(s, []byte{'\x1b', '['})
 690         if i < 0 {
 691             // no more ANSI-style sequences
 692             return n + utf8.RuneCount(s)
 693         }
 694 
 695         // update width-counter, and skip right past the ANSI-style intro
 696         n += utf8.RuneCount(s[:i])
 697         s = s[i:]
 698 
 699         i = indexEndANSI(s)
 700         if i < 0 {
 701             // no matching end of ANSI-sequence, so ignore rest of the string
 702             return n
 703         }
 704 
 705         // skip right past the next `m` byte
 706         s = s[i+1:]
 707     }
 708 
 709     return n
 710 }
 711 
 712 func indexEndANSI(s []byte) int {
 713     for i := 0; i < len(s); i++ {
 714         b := s[i]
 715 
 716         if 'A' <= b && b <= 'H' {
 717             return i
 718         }
 719 
 720         switch b {
 721         case 'J', 'K', 'S', 'T', 'm':
 722             return i
 723         }
 724     }
 725 
 726     // no match found
 727     return -1
 728 }

     File: tu/coby.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "errors"
   6     "io"
   7     "io/fs"
   8     "os"
   9     "path/filepath"
  10     "runtime"
  11     "strconv"
  12     "sync"
  13 )
  14 
  15 // cobyEvent has what the output-reporting task needs to show the results of a
  16 // task which has just completed, perhaps unsuccessfully
  17 type cobyEvent struct {
  18     // Index points to the task's entry in the results-slice
  19     Index int
  20 
  21     // Err is the completed task's error, or lack of
  22     Err error
  23 }
  24 
  25 func coby(w *bufio.Writer, r io.Reader, args []string) error {
  26     // header is the first output line
  27     var header = []string{
  28         `name`,
  29         `bytes`,
  30         `runes`,
  31         `lines`,
  32         `lf`,
  33         `crlf`,
  34         `spaces`,
  35         `tabs`,
  36         `trails`,
  37         `nulls`,
  38         `fulls`,
  39         `highs`,
  40     }
  41 
  42     // show first/heading line right away, to let users know things are
  43     // happening
  44     for i, s := range header {
  45         if i > 0 {
  46             w.WriteByte('\t')
  47         }
  48         w.WriteString(s)
  49     }
  50     // assume an error means later stages/apps in a pipe had enough input and
  51     // quit successfully, so quit successfully too
  52     if err := endLine(w); err != nil {
  53         return err
  54     }
  55 
  56     // names has all filepaths given, ignoring repetitions
  57     names, ok := findAllFiles(unique(args))
  58     if !ok {
  59         return errGeneric
  60     }
  61     if len(names) == 0 {
  62         names = []string{`-`}
  63     }
  64 
  65     // results has all its items updated concurrently: this is safe to do,
  66     // as the tasks update values in separate indices of this slice, and
  67     // when an item is ready to show, its values aren't changing anymore
  68     results := make([]cobyStats, len(names))
  69 
  70     events := make(chan cobyEvent)
  71     go cobyHandleInputs(names, r, results, events)
  72     return cobyHandleOutput(w, results, events)
  73 }
  74 
  75 // cobyHandleInputs launches all the tasks which do the actual work, limiting
  76 // how many inputs are being worked on at the same time
  77 func cobyHandleInputs(names []string, r io.Reader, results []cobyStats, events chan cobyEvent) {
  78     // allow output-reporter task to end, and thus the app
  79     defer close(events)
  80 
  81     // permissions limits how many worker tasks can be active at the same
  82     // time: when given many filepaths to work on, rate-limiting avoids
  83     // a massive number of concurrent tasks which read and process input
  84     permissions := make(chan struct{}, runtime.NumCPU())
  85     defer close(permissions)
  86 
  87     var inputs sync.WaitGroup
  88     for i := range names {
  89         // wait until some concurrency-room is available
  90         permissions <- struct{}{}
  91         inputs.Add(1)
  92 
  93         go func(i int) {
  94             defer inputs.Done()
  95             err := cobyHandleInput(&results[i], names[i], r)
  96             events <- cobyEvent{i, err}
  97             <-permissions
  98         }(i)
  99     }
 100 
 101     // wait for all inputs, before closing the `events` channel
 102     inputs.Wait()
 103 }
 104 
 105 // cobyHandleInput handles each work-item for func cobyHandleInputs
 106 func cobyHandleInput(res *cobyStats, path string, r io.Reader) error {
 107     res.name = path
 108 
 109     if path == `-` {
 110         return res.updateStats(r)
 111     }
 112 
 113     f, err := os.Open(path)
 114     if err != nil {
 115         res.result = resultError
 116         // on windows, file-not-found error messages may mention `CreateFile`,
 117         // even when trying to open files in read-only mode
 118         return errors.New(`can't open file named ` + path)
 119     }
 120     defer f.Close()
 121 
 122     return res.updateStats(f)
 123 }
 124 
 125 // cobyHandleOutput asynchronously updates output as results are known, whether
 126 // it's errors or successful results; returns whether it succeeded, which
 127 // means no errors happened
 128 func cobyHandleOutput(w io.Writer, results []cobyStats, events chan cobyEvent) error {
 129     ok := true
 130     bw := bufio.NewWriter(w)
 131     defer bw.Flush()
 132 
 133     bw.Flush()
 134 
 135     // keep track of which tasks are over, so that on each event all leading
 136     // results which are ready are shown: all of this ensures prompt output
 137     // updates as soon as results come in, while keeping the original order
 138     // of the names/filepaths given
 139     resultsLeft := results
 140 
 141     for v := range events {
 142         if v.Err != nil {
 143             ok = false
 144             bw.Flush()
 145             showError(v.Err)
 146 
 147             // stay in the current loop, in case this failure was keeping
 148             // previous successes from showing up
 149         }
 150 
 151         n := countLeadingReady(resultsLeft)
 152 
 153         for _, res := range resultsLeft[:n] {
 154             if err := cobyShowResult(bw, res); err != nil {
 155                 // assume later stages/apps in a pipe had enough input and
 156                 // quit successfully, so quit successfully too
 157                 return nil
 158             }
 159         }
 160         resultsLeft = resultsLeft[n:]
 161 
 162         // flush output-buffer only if anything new was shown
 163         if n > 0 {
 164             bw.Flush()
 165         }
 166     }
 167 
 168     if ok {
 169         return nil
 170     }
 171     return errGeneric
 172 }
 173 
 174 // cobyShowResult does what it says
 175 func cobyShowResult(w *bufio.Writer, res cobyStats) error {
 176     if res.result == resultError {
 177         return nil
 178     }
 179 
 180     var buf [64]byte
 181     w.WriteString(res.name)
 182     w.Write([]byte{'\t'})
 183     w.Write(strconv.AppendUint(buf[:0], uint64(res.bytes), 10))
 184     w.Write([]byte{'\t'})
 185     w.Write(strconv.AppendUint(buf[:0], uint64(res.runes), 10))
 186     w.Write([]byte{'\t'})
 187     w.Write(strconv.AppendUint(buf[:0], uint64(res.lines), 10))
 188     w.Write([]byte{'\t'})
 189     w.Write(strconv.AppendUint(buf[:0], uint64(res.lf), 10))
 190     w.Write([]byte{'\t'})
 191     w.Write(strconv.AppendUint(buf[:0], uint64(res.crlf), 10))
 192     w.Write([]byte{'\t'})
 193     w.Write(strconv.AppendUint(buf[:0], uint64(res.spaces), 10))
 194     w.Write([]byte{'\t'})
 195     w.Write(strconv.AppendUint(buf[:0], uint64(res.tabs), 10))
 196     w.Write([]byte{'\t'})
 197     w.Write(strconv.AppendUint(buf[:0], uint64(res.trailing), 10))
 198     w.Write([]byte{'\t'})
 199     w.Write(strconv.AppendUint(buf[:0], uint64(res.nulls), 10))
 200     w.Write([]byte{'\t'})
 201     w.Write(strconv.AppendUint(buf[:0], uint64(res.fulls), 10))
 202     w.Write([]byte{'\t'})
 203     w.Write(strconv.AppendUint(buf[:0], uint64(res.highs), 10))
 204     _, err := w.Write([]byte{'\n'})
 205     return err
 206 }
 207 
 208 // unique ensures items only appear once in the result, keeping the original
 209 // slice unchanged
 210 func unique(src []string) []string {
 211     var unique []string
 212     got := make(map[string]struct{})
 213     for _, s := range src {
 214         if _, ok := got[s]; ok {
 215             continue
 216         }
 217         unique = append(unique, s)
 218         got[s] = struct{}{}
 219     }
 220     return unique
 221 }
 222 
 223 // findAllFiles does what it says, given a mix of file/folder paths, finding
 224 // all files recursively in the case of folders
 225 func findAllFiles(paths []string) (found []string, ok bool) {
 226     var unique []string
 227     got := make(map[string]struct{})
 228     ok = true
 229 
 230     for _, root := range paths {
 231         // a dash means standard input
 232         if root == `-` {
 233             if _, ok := got[root]; ok {
 234                 continue
 235             }
 236 
 237             unique = append(unique, root)
 238             got[root] = struct{}{}
 239             continue
 240         }
 241 
 242         _, err := os.Stat(root)
 243         if os.IsNotExist(err) {
 244             ok = false
 245             // on windows, file-not-found error messages may mention `CreateFile`,
 246             // even when trying to open files in read-only mode
 247             err := errors.New(`can't find file/folder named ` + root)
 248             showError(err)
 249             continue
 250         }
 251 
 252         err = filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
 253             if err != nil {
 254                 return err
 255             }
 256 
 257             if d.IsDir() {
 258                 return nil
 259             }
 260 
 261             if _, ok := got[path]; ok {
 262                 return nil
 263             }
 264 
 265             unique = append(unique, path)
 266             got[path] = struct{}{}
 267             return nil
 268         })
 269 
 270         if err != nil {
 271             ok = false
 272             showError(err)
 273         }
 274     }
 275 
 276     return unique, ok
 277 }
 278 
 279 // isZero enables branchless-counting, when xor-compared bytes are used
 280 // as indices for it
 281 var isZero = [256]byte{1}
 282 
 283 // counter makes it easy to change the int-size of almost all counters
 284 type counter int
 285 
 286 // statResult constrains possible result-states/values in type stats
 287 type statResult int
 288 
 289 const (
 290     // resultPending is the default not-yet-ready result-status
 291     resultPending = statResult(0)
 292 
 293     // resultError signals result should show as an error, instead of data
 294     resultError = statResult(1)
 295 
 296     // resultSuccess means result can be shown
 297     resultSuccess = statResult(2)
 298 )
 299 
 300 // cobyStats has all the size-stats for some input, as well as a way to
 301 // skip showing results, in case of an error such as `file not found`
 302 type cobyStats struct {
 303     // bytes counts all bytes read
 304     bytes int
 305 
 306     // lines counts lines, and is 0 only when the byte-count is also 0
 307     lines counter
 308 
 309     // runes counts utf-8 sequences, each of which can use up to 4 bytes and
 310     // is usually a complete symbol: `emoji` country-flags are commonly-used
 311     // counter-examples, as these `symbols` need 2 runes, using 8 bytes each
 312     runes counter
 313 
 314     // maxWidth is maximum byte-width of lines, excluding carriage-returns
 315     // and/or line-feeds
 316     maxWidth counter
 317 
 318     // nulls counts all-bits-off bytes
 319     nulls counter
 320 
 321     // fulls counts all-bits-on bytes
 322     fulls counter
 323 
 324     // highs counts bytes with their `top` (highest-order) bit on
 325     highs counter
 326 
 327     // spaces counts ASCII spaces
 328     spaces counter
 329 
 330     // tabs counts ASCII tabs
 331     tabs counter
 332 
 333     // trailing counts lines with trailing spaces in them
 334     trailing counter
 335 
 336     // lf counts ASCII line-feeds as their own byte-values: this means its
 337     // value will always be at least the same as field `crlf`
 338     lf counter
 339 
 340     // crlf counts ASCII CRLF byte-pairs
 341     crlf counter
 342 
 343     // name is the filepath of the file/source these stats are about
 344     name string
 345 
 346     // results keeps track of whether results are valid and/or ready
 347     result statResult
 348 }
 349 
 350 // updateStats does what it says, reading everything from a reader
 351 func (res *cobyStats) updateStats(r io.Reader) error {
 352     err := res.updateUsing(r)
 353     if err == io.EOF {
 354         err = nil
 355     }
 356 
 357     if err == nil {
 358         res.result = resultSuccess
 359     } else {
 360         res.result = resultError
 361     }
 362     return err
 363 }
 364 
 365 // updateUsing helps func updateStats do its job
 366 func (res *cobyStats) updateUsing(r io.Reader) error {
 367     var width counter
 368     var highRun int
 369     var prev1, prev2 byte
 370     var buf [16 * 1024]byte
 371     var tallies [256]uint64
 372 
 373     for {
 374         n, err := r.Read(buf[:])
 375         if n < 1 {
 376             if err == io.EOF {
 377                 res.tabs = counter(tallies['\t'])
 378                 res.spaces = counter(tallies[' '])
 379                 res.lf = counter(tallies['\n'])
 380                 res.nulls = counter(tallies[0])
 381                 res.fulls = counter(tallies[255])
 382                 for i := 128; i < 256; i++ {
 383                     res.highs += counter(tallies[i])
 384                 }
 385                 return res.handleEnd(width, prev1, highRun)
 386             }
 387             return err
 388         }
 389 
 390         res.bytes += n
 391         chunk := buf[:n]
 392 
 393         for _, b := range chunk {
 394             // count values without branching, because it's fun
 395             tallies[b]++
 396 
 397             // handle non-ASCII runes, assuming input is valid UTF-8
 398             if b >= 128 {
 399                 if highRun < 3 {
 400                     highRun++
 401                 } else {
 402                     highRun = 0
 403                     res.runes++
 404                     width++
 405                 }
 406 
 407                 prev2 = prev1
 408                 prev1 = b
 409                 continue
 410             }
 411 
 412             // handle line-feeds
 413             if b == '\n' {
 414                 res.lines++
 415 
 416                 crlf := count(prev1, '\r')
 417                 res.crlf += crlf
 418 
 419                 // count lines with trailing spaces, whether these end with
 420                 // a CRLF byte-pair or just a line-feed byte
 421                 res.trailing += count(prev1, ' ')
 422                 res.trailing += crlf & count(prev2, ' ')
 423 
 424                 // exclude any CR from the current line's width-count
 425                 width -= crlf
 426                 if res.maxWidth < width {
 427                     res.maxWidth = width
 428                 }
 429 
 430                 prev2 = prev1
 431                 prev1 = b
 432 
 433                 res.runes++
 434                 highRun = 0
 435                 width = 0
 436                 continue
 437             }
 438 
 439             prev2 = prev1
 440             prev1 = b
 441 
 442             res.runes++
 443             highRun = 0
 444             width++
 445         }
 446     }
 447 }
 448 
 449 // handleEnd fixes/finalizes stats when input data end; this func is only
 450 // meant to be used by func updateStats, since it takes some of the latter's
 451 // local variables
 452 func (res *cobyStats) handleEnd(width counter, prev1 byte, highRun int) error {
 453     if prev1 == ' ' {
 454         res.trailing++
 455     }
 456 
 457     if res.maxWidth < width {
 458         res.maxWidth = width
 459     }
 460 
 461     // avoid reporting 0 lines with a non-0 byte-count: this is unlike the
 462     // standard cmd-line tool `wc`
 463     if res.bytes > 0 && prev1 != '\n' {
 464         res.lines++
 465     }
 466 
 467     if highRun > 0 {
 468         res.runes++
 469     }
 470     return nil
 471 }
 472 
 473 // count checks if 2 bytes are the same, returning either 0 or 1, which can
 474 // be added directly/branchlessly to totals
 475 func count(x, y byte) counter {
 476     return counter(isZero[x^y])
 477 }
 478 
 479 // countLeadingReady finds how many items are ready to show at the start of a
 480 // results-slice, which ensures output matches the original item-order
 481 func countLeadingReady(values []cobyStats) int {
 482     for i, v := range values {
 483         if v.result == resultPending {
 484             return i
 485         }
 486     }
 487     return len(values)
 488 }

     File: tu/coby_test.go
   1 package main
   2 
   3 import (
   4     "strings"
   5     "testing"
   6 )
   7 
   8 func TestCount(t *testing.T) {
   9     for x := 0; x < 256; x++ {
  10         for y := 0; y < 256; y++ {
  11             var exp counter
  12             if x == y {
  13                 exp = 1
  14             }
  15 
  16             if got := count(byte(x), byte(y)); got != exp {
  17                 t.Fatalf(`%d, %d: expected %v, but got %v`, x, y, exp, got)
  18                 return
  19             }
  20         }
  21     }
  22 }
  23 
  24 func TestCountLeadingReady(t *testing.T) {
  25     for size := 0; size <= 20; size++ {
  26         for exp := 0; exp < size; exp++ {
  27             values := make([]cobyStats, size)
  28             for i := 0; i < exp; i++ {
  29                 v := resultSuccess
  30                 if i%2 == 1 {
  31                     v = resultError
  32                 }
  33                 values[i].result = v
  34             }
  35 
  36             if got := countLeadingReady(values); got != exp {
  37                 const fs = `size %d: expected %d, instead of %d`
  38                 t.Fatalf(fs, size, exp, got)
  39             }
  40         }
  41     }
  42 }
  43 
  44 func TestStats(t *testing.T) {
  45     var tests = []struct {
  46         Input    string
  47         Expected cobyStats
  48     }{
  49         {
  50             ``,
  51             cobyStats{},
  52         },
  53         {
  54             `abc`,
  55             cobyStats{lines: 1, runes: 3, maxWidth: 3},
  56         },
  57         {
  58             "abc\tdef\r\n",
  59             cobyStats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1},
  60         },
  61         {
  62             "abc\tdef\r\n",
  63             cobyStats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1},
  64         },
  65         {
  66             "abc\tdef \r\n123\t456  789 ",
  67             cobyStats{
  68                 lines: 2, runes: 23, maxWidth: 13,
  69                 spaces: 4, tabs: 2, trailing: 2, lf: 1, crlf: 1,
  70             },
  71         },
  72     }
  73 
  74     for _, tc := range tests {
  75         t.Run(tc.Input, func(t *testing.T) {
  76             var got cobyStats
  77             err := got.updateStats(strings.NewReader(tc.Input))
  78             if err != nil {
  79                 t.Error(err)
  80                 return
  81             }
  82 
  83             tc.Expected.bytes = len(tc.Input)
  84             tc.Expected.result = resultSuccess
  85             if got != tc.Expected {
  86                 t.Fatalf("expected\n%#v,\ngot\n%#v", tc.Expected, got)
  87                 return
  88             }
  89         })
  90     }
  91 }

     File: tu/files.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "errors"
   6     "fmt"
   7     "io"
   8     "io/fs"
   9     "os"
  10     "path/filepath"
  11     "runtime"
  12     "sort"
  13     "strings"
  14     "sync"
  15 )
  16 
  17 // walkFolders is a general multi-folder recursion handler, which standardizes
  18 // such behavior across multiple filesystem-related tools
  19 func walkFolders(top []string, fn func(path string, info fs.DirEntry) error) error {
  20     if len(top) == 0 {
  21         return filepath.WalkDir(`.`, func(p string, d fs.DirEntry, e error) error {
  22             if e != nil {
  23                 return e
  24             }
  25             return fn(p, d)
  26         })
  27     }
  28 
  29     for _, path := range top {
  30         err := filepath.WalkDir(path, func(p string, d fs.DirEntry, e error) error {
  31             if e != nil {
  32                 return e
  33             }
  34             return fn(p, d)
  35         })
  36 
  37         if err != nil {
  38             return err
  39         }
  40     }
  41 
  42     return nil
  43 }
  44 
  45 // allFiles finds all files recursively, from the top-level folders given
  46 func allFiles(w *bufio.Writer, r io.Reader, names []string) error {
  47     return walkFolders(names, func(path string, info fs.DirEntry) error {
  48         if info.IsDir() {
  49             return nil
  50         }
  51         w.WriteString(unixSlashes(path))
  52         return endLine(w)
  53     })
  54 }
  55 
  56 // allFolders finds all folders recursively, from the top-level folders given
  57 func allFolders(w *bufio.Writer, r io.Reader, names []string) error {
  58     return walkFolders(names, func(path string, info fs.DirEntry) error {
  59         if !info.IsDir() {
  60             return nil
  61         }
  62 
  63         s := unixSlashes(path)
  64         w.WriteString(s)
  65         // ensure folder paths end with a slash
  66         if !strings.HasSuffix(s, `/`) {
  67             w.WriteString(`/`)
  68         }
  69         return endLine(w)
  70     })
  71 }
  72 
  73 // bigFiles finds all files which have at least the number of bytes given
  74 // recursively, from all the top-folders given
  75 func bigFiles(w *bufio.Writer, r io.Reader, args []string) error {
  76     if len(args) > 2 {
  77         return errors.New(`only up to 2 args are allowed`)
  78     }
  79 
  80     n, err := demandIntegerArg(args)
  81     if err != nil {
  82         return err
  83     }
  84     min := int64(n)
  85 
  86     return walkFolders(args[1:], func(path string, info fs.DirEntry) error {
  87         if info.IsDir() {
  88             return nil
  89         }
  90 
  91         st, err := info.Info()
  92         if err != nil {
  93             return err
  94         }
  95         if st.Size() < min {
  96             return nil
  97         }
  98 
  99         w.WriteString(unixSlashes(path))
 100         return endLine(w)
 101     })
 102 }
 103 
 104 // emptyFiles finds all empty files recursively, from all the top-folders given
 105 func emptyFiles(w *bufio.Writer, r io.Reader, names []string) error {
 106     return walkFolders(names, func(path string, info fs.DirEntry) error {
 107         if info.IsDir() {
 108             return nil
 109         }
 110 
 111         st, err := info.Info()
 112         if err != nil {
 113             return err
 114         }
 115         if st.Size() > 0 {
 116             return nil
 117         }
 118 
 119         w.WriteString(unixSlashes(path))
 120         return endLine(w)
 121     })
 122 }
 123 
 124 // emptyFolders finds all empty folders recursively, from all the top-folders
 125 // given
 126 func emptyFolders(w *bufio.Writer, r io.Reader, names []string) error {
 127     return walkFolders(names, func(path string, info fs.DirEntry) error {
 128         if !info.IsDir() {
 129             return nil
 130         }
 131 
 132         ent, err := os.ReadDir(path)
 133         if err != nil {
 134             return err
 135         }
 136 
 137         if len(ent) > 0 {
 138             return nil
 139         }
 140 
 141         w.WriteString(unixSlashes(path))
 142         return endLine(w)
 143     })
 144 }
 145 
 146 // groupByFileSize does what it says, emitting paragraphs, where each first
 147 // line is the file-size, followed by all file-paths of that size
 148 func groupByFileSize(w *bufio.Writer, r io.Reader, names []string) error {
 149     bySize := make(map[int64][]string)
 150 
 151     err := walkFolders(names, func(path string, info fs.DirEntry) error {
 152         if info.IsDir() {
 153             return nil
 154         }
 155 
 156         st, err := info.Info()
 157         if err != nil {
 158             return err
 159         }
 160 
 161         n := st.Size()
 162         bySize[n] = append(bySize[n], path)
 163         return nil
 164     })
 165 
 166     if err != nil {
 167         return err
 168     }
 169 
 170     // reverse-sorted keys
 171     sizes := make([]int64, 0, len(bySize))
 172     for k := range bySize {
 173         sizes = append(sizes, k)
 174     }
 175     sort.Slice(sizes, func(i, j int) bool {
 176         return sizes[i] > sizes[j]
 177     })
 178 
 179     for i, sz := range sizes {
 180         if i > 0 {
 181             err := endLine(w)
 182             if err != nil {
 183                 return err
 184             }
 185         }
 186 
 187         writeInt64(w, sz)
 188         if err := endLine(w); err != nil {
 189             return err
 190         }
 191 
 192         names := bySize[sz]
 193         sort.Strings(names)
 194 
 195         for _, s := range names {
 196             w.WriteString(unixSlashes(s))
 197             if err := endLine(w); err != nil {
 198                 return err
 199             }
 200         }
 201     }
 202 
 203     return nil
 204 }
 205 
 206 // smallFiles finds all files which have fewer than the number of bytes given
 207 // recursively, from all the top-folders given
 208 func smallFiles(w *bufio.Writer, r io.Reader, args []string) error {
 209     if len(args) > 2 {
 210         return errors.New(`only up to 2 args are allowed`)
 211     }
 212 
 213     n, err := demandIntegerArg(args)
 214     if err != nil {
 215         return err
 216     }
 217     maxp1 := int64(n)
 218 
 219     return walkFolders(args[1:], func(path string, info fs.DirEntry) error {
 220         if info.IsDir() {
 221             return nil
 222         }
 223 
 224         st, err := info.Info()
 225         if err != nil {
 226             return err
 227         }
 228         if st.Size() >= maxp1 {
 229             return nil
 230         }
 231 
 232         w.WriteString(unixSlashes(path))
 233         return endLine(w)
 234     })
 235 }
 236 
 237 // topFiles finds all top-level files, from the folders given
 238 func topFiles(w *bufio.Writer, r io.Reader, names []string) error {
 239     for _, path := range names {
 240         entries, err := os.ReadDir(path)
 241         if err != nil {
 242             return err
 243         }
 244 
 245         // ensure path is usable for unix-style full-path output
 246         path = strings.TrimSuffix(path, `/`)
 247         path = strings.TrimSuffix(path, `\`)
 248         path = unixSlashes(path)
 249 
 250         for _, e := range entries {
 251             if e.IsDir() {
 252                 continue
 253             }
 254 
 255             w.WriteString(path)
 256             w.WriteString(`/`)
 257             w.WriteString(e.Name())
 258             err := endLine(w)
 259             if err != nil {
 260                 return err
 261             }
 262         }
 263     }
 264 
 265     return nil
 266 }
 267 
 268 // topFolders finds all top-level folders, from the folders given
 269 func topFolders(w *bufio.Writer, r io.Reader, names []string) error {
 270     for _, path := range names {
 271         entries, err := os.ReadDir(path)
 272         if err != nil {
 273             return err
 274         }
 275 
 276         // ensure path is usable for unix-style full-path output
 277         path = strings.TrimSuffix(path, `/`)
 278         path = strings.TrimSuffix(path, `\`)
 279         path = unixSlashes(path)
 280 
 281         for _, e := range entries {
 282             if !e.IsDir() {
 283                 continue
 284             }
 285 
 286             w.WriteString(path)
 287             w.WriteString(`/`)
 288             w.WriteString(e.Name())
 289             err := endLine(w)
 290             if err != nil {
 291                 return err
 292             }
 293         }
 294     }
 295 
 296     return nil
 297 }
 298 
 299 // sourceStats keeps track of all sorts of filetype-related source-code info
 300 type sourceStats struct {
 301     // files counts how many files were used for these stats
 302     files int
 303 
 304     // bytes counts bytes for all files
 305     bytes int64
 306 
 307     // lines counts all lines
 308     lines int
 309 
 310     // code counts non-empty non-comment lines
 311     code int
 312 
 313     // empty counts empty lines
 314     empty int
 315 
 316     // comments counts lines part of multi-line comments, as well
 317     // as lines which are just single-line comments
 318     comments int
 319 }
 320 
 321 // sourceSettings is used to deal with source-code-related stats, and holds
 322 // all sorts of looked-up info about filetypes, as part of an internal table
 323 type sourceSettings struct {
 324     // kind is the full language name, as opposed to its file extension
 325     kind string
 326 
 327     // lineComment is the string which starts a comment lasting for the rest
 328     // of its line
 329     lineComment string
 330 
 331     // startComment is the string which starts a potentially multi-line comment
 332     startComment string
 333 
 334     // endComment is what ends a potentially multi-line comment
 335     endComment string
 336 }
 337 
 338 // update handles a source-code input line, and must be given a pointer to a
 339 // boolean which keeps track of multi-line-comment status across input lines
 340 func (stats *sourceStats) update(line string, ss sourceSettings, mlComment *bool) error {
 341     // count all lines in general
 342     stats.lines++
 343 
 344     // handle she-bang-style first lines for `commentable` file types
 345     if stats.lines == 1 && ss.lineComment != `` && strings.HasPrefix(line, `#!`) {
 346         stats.comments++
 347         return nil
 348     }
 349 
 350     // handle rest of multi-line comments
 351     if *mlComment {
 352         stats.comments++
 353         if strings.Contains(line, ss.endComment) {
 354             *mlComment = false
 355         }
 356         return nil
 357     }
 358 
 359     tr := strings.TrimSpace(line)
 360 
 361     // count empty(ish) lines
 362     if tr == `` {
 363         stats.empty++
 364         return nil
 365     }
 366 
 367     // handle lines with just single-line comments on them
 368     if ss.lineComment != `` && strings.HasPrefix(tr, ss.lineComment) {
 369         stats.comments++
 370         return nil
 371     }
 372 
 373     // handle the start of multi-line comments, and perhaps their
 374     // end, when that's on the same line
 375     if ss.startComment != `` && strings.HasPrefix(tr, ss.startComment) {
 376         stats.comments++
 377         *mlComment = true
 378         if strings.Contains(tr, ss.endComment) {
 379             *mlComment = false
 380         }
 381         return nil
 382     }
 383 
 384     // count regular lines of code
 385     stats.code++
 386     return nil
 387 }
 388 
 389 // add updates existing source-stats results with those from the next file
 390 func (ss sourceStats) add(v sourceStats) sourceStats {
 391     ss.files++
 392     ss.bytes += v.bytes
 393     ss.lines += v.lines
 394     ss.code += v.code
 395     ss.empty += v.empty
 396     ss.comments += v.comments
 397     return ss
 398 }
 399 
 400 var ext2set = map[string]sourceSettings{
 401     `.awk`:  {`awk`, `#`, ``, ``},
 402     `.bash`: {`shell`, `#`, ``, ``},
 403     `.c`:    {`c/c++`, `//`, `/*`, `*/`},
 404     `.cc`:   {`c/c++`, `//`, `/*`, `*/`},
 405     `.cpp`:  {`c/c++`, `//`, `/*`, `*/`},
 406     `.css`:  {`css`, ``, `/*`, `*/`},
 407     `.csv`:  {`csv`, ``, ``, ``},
 408     `.cxx`:  {`c/c++`, `//`, `/*`, `*/`},
 409     `.go`:   {`go`, `//`, `/*`, `*/`},
 410     `.h`:    {`c/c++`, `//`, `/*`, `*/`},
 411     `.hh`:   {`c/c++`, `//`, `/*`, `*/`},
 412     `.hpp`:  {`c/c++`, `//`, `/*`, `*/`},
 413     `.htm`:  {`html`, ``, `<!--`, `-->`},
 414     `.html`: {`html`, ``, `<!--`, `-->`},
 415     `.hxx`:  {`c/c++`, `//`, `/*`, `*/`},
 416     `.java`: {`java`, `//`, `/*`, `*/`},
 417     `.js`:   {`javascript`, `//`, `/*`, `*/`},
 418     `.json`: {`json`, `//`, ``, ``},
 419     `.lua`:  {`lua`, `--`, ``, ``},
 420     `.md`:   {`markdown`, ``, ``, ``},
 421     `.py`:   {`python`, `#`, ``, ``},
 422     `.pyw`:  {`python`, `#`, ``, ``},
 423     `.rb`:   {`ruby`, `#`, ``, ``},
 424     `.rs`:   {`rust`, `//`, `/*`, `*/`},
 425     `.sh`:   {`shell`, `#`, ``, ``},
 426     `.svg`:  {`svg`, ``, `<!--`, `-->`},
 427     `.text`: {`text`, ``, ``, ``},
 428     `.ts`:   {`typescript`, `//`, `/*`, `*/`},
 429     `.tsv`:  {`tsv`, ``, ``, ``},
 430     `.txt`:  {`text`, ``, ``, ``},
 431 }
 432 
 433 // locTool directly implements the `loc` tool
 434 func locTool(w *bufio.Writer, r io.Reader, names []string) error {
 435     if len(names) == 0 {
 436         names = []string{`.`}
 437     }
 438 
 439     var mut sync.Mutex
 440     res := make(map[string]sourceStats)
 441 
 442     max := runtime.NumCPU()
 443     permissions := make(chan struct{}, max)
 444     defer close(permissions)
 445 
 446     err := walkFolders(names, func(path string, info fs.DirEntry) error {
 447         ext := filepath.Ext(path)
 448         ext = strings.ToLower(ext)
 449         ss, ok := ext2set[ext]
 450         if !ok {
 451             return nil
 452         }
 453 
 454         permissions <- struct{}{}
 455         defer func() { <-permissions }()
 456         stats, err := locFile(path)
 457         if err != nil {
 458             return err
 459         }
 460 
 461         // update stats for the filetype
 462         mut.Lock()
 463         defer mut.Unlock()
 464         res[ss.kind] = res[ss.kind].add(stats)
 465         return nil
 466     })
 467 
 468     if err != nil {
 469         return err
 470     }
 471 
 472     keys := make([]string, 0, len(res))
 473     for k := range res {
 474         keys = append(keys, k)
 475     }
 476 
 477     // reverse-sort keys by total lines, from most to least
 478     sort.SliceStable(keys, func(i, j int) bool {
 479         return res[keys[i]].lines > res[keys[j]].lines
 480     })
 481 
 482     // emit results as TSV lines
 483     w.WriteString("type\tfiles\tlines\tcode\tcomments\tempty\tbytes")
 484     if err := endLine(w); err != nil {
 485         return err
 486     }
 487 
 488     for _, k := range keys {
 489         v := res[k]
 490         fmt.Fprintf(w, "%s\t%d\t%d\t%d\t%d\t%d\t%d",
 491             k, v.files, v.lines, v.code, v.comments, v.empty, v.bytes)
 492 
 493         if err := endLine(w); err != nil {
 494             return err
 495         }
 496     }
 497 
 498     return nil
 499 }
 500 
 501 func locFile(path string) (sourceStats, error) {
 502     var stats sourceStats
 503 
 504     // find filetype, avoiding files without a supported extension
 505     ext := filepath.Ext(path)
 506     ext = strings.ToLower(ext)
 507     ss, ok := ext2set[ext]
 508     if !ok {
 509         return stats, nil
 510     }
 511 
 512     // get the file-size ready to use in the result stats
 513     st, err := os.Stat(path)
 514     if err != nil {
 515         return stats, err
 516     }
 517 
 518     // gather source-code-related stats
 519     mlComment := false
 520     stats.bytes = st.Size()
 521     err = handleNamedInput(path, nil, func(r io.Reader) error {
 522         return loopLinesString(r, func(line string) error {
 523             return stats.update(line, ss, &mlComment)
 524         })
 525     })
 526     return stats, err
 527 }

     File: tu/filetypes.go
   1 package main
   2 
   3 import "bytes"
   4 
   5 // all the MIME types used/recognized in this package
   6 const (
   7     aiff    = `audio/aiff`
   8     au      = `audio/basic`
   9     avi     = `video/avi`
  10     avif    = `image/avif`
  11     bmp     = `image/x-bmp`
  12     caf     = `audio/x-caf`
  13     cur     = `image/vnd.microsoft.icon`
  14     css     = `text/css`
  15     csv_    = `text/csv`
  16     djvu    = `image/x-djvu`
  17     elf     = `application/x-elf`
  18     exe     = `application/vnd.microsoft.portable-executable`
  19     flac    = `audio/x-flac`
  20     gif     = `image/gif`
  21     gz      = `application/gzip`
  22     heic    = `image/heic`
  23     htm     = `text/html`
  24     html    = `text/html`
  25     ico     = `image/x-icon`
  26     iso     = `application/octet-stream`
  27     jpg     = `image/jpeg`
  28     jpeg    = `image/jpeg`
  29     js      = `application/javascript`
  30     json    = `application/json`
  31     m4a     = `audio/aac`
  32     m4v     = `video/x-m4v`
  33     mid     = `audio/midi`
  34     mov     = `video/quicktime`
  35     mp4     = `video/mp4`
  36     mp3     = `audio/mpeg`
  37     mpg     = `video/mpeg`
  38     ogg     = `audio/ogg`
  39     opus    = `audio/opus`
  40     pdf     = `application/pdf`
  41     png     = `image/png`
  42     ps      = `application/postscript`
  43     psd     = `image/vnd.adobe.photoshop`
  44     rtf     = `application/rtf`
  45     sqlite3 = `application/x-sqlite3`
  46     svg     = `image/svg+xml`
  47     text    = `text/plain`
  48     tiff    = `image/tiff`
  49     tsv     = `text/tsv`
  50     wasm    = `application/wasm`
  51     wav     = `audio/x-wav`
  52     webp    = `image/webp`
  53     webm    = `video/webm`
  54     xml     = `application/xml`
  55     zip     = `application/zip`
  56     zst     = `application/zstd`
  57 )
  58 
  59 // formatDescriptor ties a file-header pattern to its data-format type
  60 type formatDescriptor struct {
  61     Header []byte
  62     Type   string
  63 }
  64 
  65 // can be anything: ensure this value differs from all other literal bytes
  66 // in the generic-headers table: failing that, its value could cause subtle
  67 // type-misdetection bugs
  68 const cba = 0xFD // 253, which is > 127, the highest-valued ascii symbol
  69 
  70 // dash-streamed m4a format
  71 var m4aDash = []byte{
  72     cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h',
  73     000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1',
  74 }
  75 
  76 // format markers with leading wildcards, which should be checked before the
  77 // normal ones: this is to prevent mismatches with the latter types, even
  78 // though you can make probabilistic arguments which suggest these mismatches
  79 // should be very unlikely in practice
  80 var specialHeaders = []formatDescriptor{
  81     {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a},
  82     {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a},
  83     {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4},
  84     {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4},
  85     {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v},
  86     {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov},
  87     {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic},
  88     {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif},
  89     {m4aDash, m4a},
  90 }
  91 
  92 // sqlite3 database format
  93 var sqlite3db = []byte{
  94     'S', 'Q', 'L', 'i', 't', 'e', ' ',
  95     'f', 'o', 'r', 'm', 'a', 't', ' ', '3',
  96     000,
  97 }
  98 
  99 // windows-variant bitmap file-header, which is followed by a byte-counter for
 100 // the 40-byte infoheader which follows that
 101 var winbmp = []byte{
 102     'B', 'M', cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, 40,
 103 }
 104 
 105 // deja-vu document format
 106 var djv = []byte{
 107     'A', 'T', '&', 'T', 'F', 'O', 'R', 'M', cba, cba, cba, cba, 'D', 'J', 'V',
 108 }
 109 
 110 var doctypeHTML = []byte{
 111     '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E', ' ', 'h', 't', 'm', 'l', '>',
 112 }
 113 
 114 // hdrDispatch groups format-description-groups by their first byte, thus
 115 // shortening total lookups for some data header: notice how the `ftyp` data
 116 // formats aren't handled here, since these can start with any byte, instead
 117 // of the literal value of the any-byte markers they use
 118 var hdrDispatch = [256][]formatDescriptor{
 119     {
 120         {[]byte{000, 000, 001, 0xBA}, mpg},
 121         {[]byte{000, 000, 001, 0xB3}, mpg},
 122         {[]byte{000, 000, 001, 000}, ico},
 123         {[]byte{000, 000, 002, 000}, cur},
 124         {[]byte{000, 'a', 's', 'm'}, wasm},
 125     }, // 0
 126     nil, // 1
 127     nil, // 2
 128     nil, // 3
 129     nil, // 4
 130     nil, // 5
 131     nil, // 6
 132     nil, // 7
 133     nil, // 8
 134     nil, // 9
 135     nil, // 10
 136     nil, // 11
 137     nil, // 12
 138     nil, // 13
 139     nil, // 14
 140     nil, // 15
 141     nil, // 16
 142     nil, // 17
 143     nil, // 18
 144     nil, // 19
 145     nil, // 20
 146     nil, // 21
 147     nil, // 22
 148     nil, // 23
 149     nil, // 24
 150     nil, // 25
 151     {
 152         {[]byte{0x1A, 0x45, 0xDF, 0xA3}, webm},
 153     }, // 26
 154     nil, // 27
 155     nil, // 28
 156     nil, // 29
 157     nil, // 30
 158     {
 159         // {[]byte{0x1F, 0x8B, 0x08, 0x08}, gz},
 160         {[]byte{0x1F, 0x8B, 0x08}, gz},
 161     }, // 31
 162     nil, // 32
 163     nil, // 33 !
 164     nil, // 34 "
 165     {
 166         {[]byte{'#', '!', ' '}, text},
 167         {[]byte{'#', '!', '/'}, text},
 168     }, // 35 #
 169     nil, // 36 $
 170     {
 171         {[]byte{'%', 'P', 'D', 'F'}, pdf},
 172         {[]byte{'%', '!', 'P', 'S'}, ps},
 173     }, // 37 %
 174     nil, // 38 &
 175     nil, // 39 '
 176     {
 177         {[]byte{0x28, 0xB5, 0x2F, 0xFD}, zst},
 178     }, // 40 (
 179     nil, // 41 )
 180     nil, // 42 *
 181     nil, // 43 +
 182     nil, // 44 ,
 183     nil, // 45 -
 184     {
 185         {[]byte{'.', 's', 'n', 'd'}, au},
 186     }, // 46 .
 187     nil, // 47 /
 188     nil, // 48 0
 189     nil, // 49 1
 190     nil, // 50 2
 191     nil, // 51 3
 192     nil, // 52 4
 193     nil, // 53 5
 194     nil, // 54 6
 195     nil, // 55 7
 196     {
 197         {[]byte{'8', 'B', 'P', 'S'}, psd},
 198     }, // 56 8
 199     nil, // 57 9
 200     nil, // 58 :
 201     nil, // 59 ;
 202     {
 203         // func checkDoc is better for these, since it's case-insensitive
 204         {doctypeHTML, html},
 205         {[]byte{'<', 's', 'v', 'g'}, svg},
 206         {[]byte{'<', 'h', 't', 'm', 'l', '>'}, html},
 207         {[]byte{'<', 'h', 'e', 'a', 'd', '>'}, html},
 208         {[]byte{'<', 'b', 'o', 'd', 'y', '>'}, html},
 209         {[]byte{'<', '?', 'x', 'm', 'l'}, xml},
 210     }, // 60 <
 211     nil, // 61 =
 212     nil, // 62 >
 213     nil, // 63 ?
 214     nil, // 64 @
 215     {
 216         {djv, djvu},
 217     }, // 65 A
 218     {
 219         {winbmp, bmp},
 220     }, // 66 B
 221     nil, // 67 C
 222     nil, // 68 D
 223     nil, // 69 E
 224     {
 225         {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff},
 226         {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff},
 227     }, // 70 F
 228     {
 229         {[]byte{'G', 'I', 'F', '8', '7', 'a'}, gif},
 230         {[]byte{'G', 'I', 'F', '8', '9', 'a'}, gif},
 231     }, // 71 G
 232     nil, // 72 H
 233     {
 234         {[]byte{'I', 'D', '3', 2}, mp3}, // ID3-format metadata
 235         {[]byte{'I', 'D', '3', 3}, mp3}, // ID3-format metadata
 236         {[]byte{'I', 'D', '3', 4}, mp3}, // ID3-format metadata
 237         {[]byte{'I', 'I', '*', 000}, tiff},
 238     }, // 73 I
 239     nil, // 74 J
 240     nil, // 75 K
 241     nil, // 76 L
 242     {
 243         {[]byte{'M', 'M', 000, '*'}, tiff},
 244         {[]byte{'M', 'T', 'h', 'd'}, mid},
 245         {[]byte{'M', 'Z', cba, 000, cba, 000}, exe},
 246         // {[]byte{'M', 'Z', 0x90, 000, 003, 000}, exe},
 247         // {[]byte{'M', 'Z', 0x78, 000, 001, 000}, exe},
 248         // {[]byte{'M', 'Z', 'P', 000, 002, 000}, exe},
 249     }, // 77 M
 250     nil, // 78 N
 251     {
 252         {[]byte{'O', 'g', 'g', 'S'}, ogg},
 253     }, // 79 O
 254     {
 255         {[]byte{'P', 'K', 003, 004}, zip},
 256     }, // 80 P
 257     nil, // 81 Q
 258     {
 259         {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp},
 260         {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav},
 261         {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi},
 262     }, // 82 R
 263     {
 264         {sqlite3db, sqlite3},
 265     }, // 83 S
 266     nil, // 84 T
 267     nil, // 85 U
 268     nil, // 86 V
 269     nil, // 87 W
 270     nil, // 88 X
 271     nil, // 89 Y
 272     nil, // 90 Z
 273     nil, // 91 [
 274     nil, // 92 \
 275     nil, // 93 ]
 276     nil, // 94 ^
 277     nil, // 95 _
 278     nil, // 96 `
 279     nil, // 97 a
 280     nil, // 98 b
 281     {
 282         {[]byte{'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf},
 283     }, // 99 c
 284     nil, // 100 d
 285     nil, // 101 e
 286     {
 287         {[]byte{'f', 'L', 'a', 'C'}, flac},
 288     }, // 102 f
 289     nil, // 103 g
 290     nil, // 104 h
 291     nil, // 105 i
 292     nil, // 106 j
 293     nil, // 107 k
 294     nil, // 108 l
 295     nil, // 109 m
 296     nil, // 110 n
 297     nil, // 111 o
 298     nil, // 112 p
 299     nil, // 113 q
 300     nil, // 114 r
 301     nil, // 115 s
 302     nil, // 116 t
 303     nil, // 117 u
 304     nil, // 118 v
 305     nil, // 119 w
 306     nil, // 120 x
 307     nil, // 121 y
 308     nil, // 122 z
 309     {
 310         {[]byte{'{', '\\', 'r', 't', 'f'}, rtf},
 311     }, // 123 {
 312     nil, // 124 |
 313     nil, // 125 }
 314     nil, // 126
 315     {
 316         {[]byte{127, 'E', 'L', 'F'}, elf},
 317     }, // 127
 318     nil, // 128
 319     nil, // 129
 320     nil, // 130
 321     nil, // 131
 322     nil, // 132
 323     nil, // 133
 324     nil, // 134
 325     nil, // 135
 326     nil, // 136
 327     {
 328         {[]byte{0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png},
 329     }, // 137
 330     nil, // 138
 331     nil, // 139
 332     nil, // 140
 333     nil, // 141
 334     nil, // 142
 335     nil, // 143
 336     nil, // 144
 337     nil, // 145
 338     nil, // 146
 339     nil, // 147
 340     nil, // 148
 341     nil, // 149
 342     nil, // 150
 343     nil, // 151
 344     nil, // 152
 345     nil, // 153
 346     nil, // 154
 347     nil, // 155
 348     nil, // 156
 349     nil, // 157
 350     nil, // 158
 351     nil, // 159
 352     nil, // 160
 353     nil, // 161
 354     nil, // 162
 355     nil, // 163
 356     nil, // 164
 357     nil, // 165
 358     nil, // 166
 359     nil, // 167
 360     nil, // 168
 361     nil, // 169
 362     nil, // 170
 363     nil, // 171
 364     nil, // 172
 365     nil, // 173
 366     nil, // 174
 367     nil, // 175
 368     nil, // 176
 369     nil, // 177
 370     nil, // 178
 371     nil, // 179
 372     nil, // 180
 373     nil, // 181
 374     nil, // 182
 375     nil, // 183
 376     nil, // 184
 377     nil, // 185
 378     nil, // 186
 379     nil, // 187
 380     nil, // 188
 381     nil, // 189
 382     nil, // 190
 383     nil, // 191
 384     nil, // 192
 385     nil, // 193
 386     nil, // 194
 387     nil, // 195
 388     nil, // 196
 389     nil, // 197
 390     nil, // 198
 391     nil, // 199
 392     nil, // 200
 393     nil, // 201
 394     nil, // 202
 395     nil, // 203
 396     nil, // 204
 397     nil, // 205
 398     nil, // 206
 399     nil, // 207
 400     nil, // 208
 401     nil, // 209
 402     nil, // 210
 403     nil, // 211
 404     nil, // 212
 405     nil, // 213
 406     nil, // 214
 407     nil, // 215
 408     nil, // 216
 409     nil, // 217
 410     nil, // 218
 411     nil, // 219
 412     nil, // 220
 413     nil, // 221
 414     nil, // 222
 415     nil, // 223
 416     nil, // 224
 417     nil, // 225
 418     nil, // 226
 419     nil, // 227
 420     nil, // 228
 421     nil, // 229
 422     nil, // 230
 423     nil, // 231
 424     nil, // 232
 425     nil, // 233
 426     nil, // 234
 427     nil, // 235
 428     nil, // 236
 429     nil, // 237
 430     nil, // 238
 431     nil, // 239
 432     nil, // 240
 433     nil, // 241
 434     nil, // 242
 435     nil, // 243
 436     nil, // 244
 437     nil, // 245
 438     nil, // 246
 439     nil, // 247
 440     nil, // 248
 441     nil, // 249
 442     nil, // 250
 443     nil, // 251
 444     nil, // 252
 445     nil, // 253
 446     nil, // 254
 447     {
 448         {[]byte{0xFF, 0xD8, 0xFF}, jpg},
 449         {[]byte{0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3},
 450         {[]byte{0xFF, 0xFB}, mp3},
 451     }, // 255
 452 }
 453 
 454 // detectMIME guesses the first appropriate MIME type from the first few
 455 // data bytes given: 24 bytes are enough to detect all supported types
 456 func detectMIME(b []byte) (mimeType string, ok bool) {
 457     t, ok := detectType(b)
 458     if ok {
 459         return t, true
 460     }
 461     return ``, false
 462 }
 463 
 464 // detectType guesses the first appropriate file type for the data given:
 465 // here the type is a a filename extension without the leading dot
 466 func detectType(b []byte) (dotlessExt string, ok bool) {
 467     // empty data, so there's no way to detect anything
 468     if len(b) == 0 {
 469         return ``, false
 470     }
 471 
 472     // check for plain-text web-document formats case-insensitively
 473     kind, ok := checkDoc(b)
 474     if ok {
 475         return kind, true
 476     }
 477 
 478     // check data formats which allow any byte at the start
 479     kind, ok = checkSpecial(b)
 480     if ok {
 481         return kind, true
 482     }
 483 
 484     // check all other supported data formats
 485     headers := hdrDispatch[b[0]]
 486     for _, t := range headers {
 487         if hasPrefixPattern(b[1:], t.Header[1:], cba) {
 488             return t.Type, true
 489         }
 490     }
 491 
 492     // unrecognized data format
 493     return ``, false
 494 }
 495 
 496 // checkDoc tries to guess if the bytes given are the start of HTML, SVG,
 497 // XML, or JSON data
 498 func checkDoc(b []byte) (kind string, ok bool) {
 499     // ignore leading whitespaces
 500     b = trimLeadWhitespace(b)
 501 
 502     // can't detect anything with empty data
 503     if len(b) == 0 {
 504         return ``, false
 505     }
 506 
 507     // handle HTML/SVG/XML documents
 508     if hasPrefixByte(b, '<') {
 509         if hasPrefixFold(b, []byte{'<', '?', 'x', 'm', 'l'}) {
 510             if bytes.Contains(b, []byte{'<', 's', 'v', 'g'}) {
 511                 return svg, true
 512             }
 513             return xml, true
 514         }
 515 
 516         headers := hdrDispatch['<']
 517         for _, v := range headers {
 518             if hasPrefixFold(b, v.Header) {
 519                 return v.Type, true
 520             }
 521         }
 522         return ``, false
 523     }
 524 
 525     // handle JSON with top-level arrays
 526     if hasPrefixByte(b, '[') {
 527         // match [", or [[, or [{, ignoring spaces between
 528         b = trimLeadWhitespace(b[1:])
 529         if len(b) > 0 {
 530             switch b[0] {
 531             case '"', '[', '{':
 532                 return json, true
 533             }
 534         }
 535         return ``, false
 536     }
 537 
 538     // handle JSON with top-level objects
 539     if hasPrefixByte(b, '{') {
 540         // match {", ignoring spaces between: after {, the only valid syntax
 541         // which can follow is the opening quote for the expected object-key
 542         b = trimLeadWhitespace(b[1:])
 543         if hasPrefixByte(b, '"') {
 544             return json, true
 545         }
 546         return ``, false
 547     }
 548 
 549     // checking for a quoted string, any of the JSON keywords, or even a
 550     // number seems too ambiguous to declare the data valid JSON
 551 
 552     // no web-document format detected
 553     return ``, false
 554 }
 555 
 556 // checkSpecial handles special file-format headers, which should be checked
 557 // before the normal file-type headers, since the first-byte dispatch algo
 558 // doesn't work for these
 559 func checkSpecial(b []byte) (kind string, ok bool) {
 560     if len(b) >= 8 && bytes.Index(b, []byte{'f', 't', 'y', 'p'}) == 4 {
 561         for _, t := range specialHeaders {
 562             if hasPrefixPattern(b[4:], t.Header[4:], cba) {
 563                 return t.Type, true
 564             }
 565         }
 566     }
 567     return ``, false
 568 }
 569 
 570 // hasPrefixPattern works like bytes.HasPrefix, except it allows for a special
 571 // value to signal any byte is allowed on specific spots
 572 func hasPrefixPattern(what []byte, pat []byte, wildcard byte) bool {
 573     // if the data are shorter than the pattern to match, there's no match
 574     if len(what) < len(pat) {
 575         return false
 576     }
 577 
 578     // use a slice which ensures the pattern length is never exceeded
 579     what = what[:len(pat)]
 580 
 581     for i, x := range what {
 582         y := pat[i]
 583         if x != y && y != wildcard {
 584             return false
 585         }
 586     }
 587     return true
 588 }

     File: tu/filetypes_test.go
   1 package main
   2 
   3 import (
   4     "bytes"
   5     "strconv"
   6     "testing"
   7 )
   8 
   9 func TestData(t *testing.T) {
  10     t.Run(`could-be-anything constant`, func(t *testing.T) {
  11         if len(hdrDispatch[cba]) != 0 {
  12             const fs = `chosen constant %d collides with header entries`
  13             t.Fatalf(fs, cba)
  14         }
  15     })
  16 
  17     for i, v := range hdrDispatch {
  18         t.Run(`dispatch @ `+strconv.Itoa(i), func(t *testing.T) {
  19             const fs = `expected leading byte to be %d, but got %d instead`
  20             for _, e := range v {
  21                 if e.Header[0] != byte(i) {
  22                     t.Fatalf(fs, i, e.Header[0])
  23                     return
  24                 }
  25             }
  26         })
  27     }
  28 }
  29 
  30 func TestCheckDoc(t *testing.T) {
  31     const (
  32         lf       = "\n"
  33         crlf     = "\r\n"
  34         tab      = "\t"
  35         xmlIntro = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>`
  36     )
  37 
  38     tests := []struct {
  39         Input    string
  40         Expected string
  41     }{
  42         {``, ``},
  43         {`{"abc":123}`, json},
  44         {`[` + lf + ` {"abc":123}`, json},
  45         {`[` + lf + `  {"abc":123}`, json},
  46         {`[` + crlf + tab + `{"abc":123}`, json},
  47 
  48         {``, ``},
  49         {`<?xml?>`, xml},
  50         {`<?xml?><records>`, xml},
  51         {`<?xml?>` + lf + `<records>`, xml},
  52         {`<?xml?><svg>`, svg},
  53         {`<?xml?>` + crlf + `<svg>`, svg},
  54         {xmlIntro + lf + `<svg`, svg},
  55         {xmlIntro + crlf + `<svg`, svg},
  56     }
  57 
  58     for _, tc := range tests {
  59         t.Run(tc.Input, func(t *testing.T) {
  60             res, _ := checkDoc([]byte(tc.Input))
  61             if res != tc.Expected {
  62                 t.Fatalf(`got %v, expected %v instead`, res, tc.Expected)
  63             }
  64         })
  65     }
  66 }
  67 
  68 func TestHasPrefixPattern(t *testing.T) {
  69     var (
  70         data = []byte{
  71             'R', 'I', 'F', 'F', 0xf0, 0xba, 0xc8, 0x2b, 'A', 'V', 'I', ' ',
  72         }
  73         pat = []byte{
  74             'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' ',
  75         }
  76     )
  77 
  78     if !hasPrefixPattern(data, pat, cba) {
  79         t.Fatal(`wildcard pattern not working`)
  80     }
  81 }
  82 
  83 func BenchmarkHasPrefixMatch(b *testing.B) {
  84     var (
  85         data = []byte{
  86             'R', 'I', 'F', 'F', 0xf0, 0xba, 0xc8, 0x2b, 'A', 'V', 'I', ' ',
  87         }
  88         pat = []byte{
  89             'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' ',
  90         }
  91     )
  92 
  93     b.ReportAllocs()
  94     b.ResetTimer()
  95 
  96     for i := 0; i < b.N; i++ {
  97         if !bytes.HasPrefix(data, pat) {
  98             b.Fatal(`pattern was specifically chosen to match, but didn't`)
  99         }
 100     }
 101 }
 102 
 103 func BenchmarkHasPrefixPatternMatch(b *testing.B) {
 104     var (
 105         data = []byte{
 106             'R', 'I', 'F', 'F', 0xf0, 0xba, 0xc8, 0x2b, 'A', 'V', 'I', ' ',
 107         }
 108         pat = []byte{
 109             'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' ',
 110         }
 111     )
 112 
 113     b.ReportAllocs()
 114     b.ResetTimer()
 115 
 116     for i := 0; i < b.N; i++ {
 117         if !hasPrefixPattern(data, pat, cba) {
 118             b.Fatal(`pattern was specifically chosen to match, but didn't`)
 119         }
 120     }
 121 }

     File: tu/fractions.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "errors"
   6     "fmt"
   7     "go/ast"
   8     "go/parser"
   9     "go/token"
  10     "io"
  11     "math"
  12     "math/big"
  13     "strconv"
  14     "strings"
  15 )
  16 
  17 func fractions(w *bufio.Writer, r io.Reader, args []string) error {
  18     for _, src := range args {
  19         expr, err := parser.ParseExpr(src)
  20         if err != nil {
  21             return err
  22         }
  23 
  24         res, err := fracEval{}.eval(expr)
  25         if err != nil {
  26             return err
  27         }
  28 
  29         w.WriteString(res.String())
  30         if err := endLine(w); err != nil {
  31             return err
  32         }
  33     }
  34     return nil
  35 }
  36 
  37 // fracConstants can't use integer values, as some are too big to fit 64 bits
  38 var fracConstants = map[string]string{
  39     `k`:    `1000`,
  40     `kilo`: `1000`,
  41     `mega`: `1000000`,
  42     `giga`: `1000000000`,
  43     `tera`: `1000000000000`,
  44     `exa`:  `1000000000000000`,
  45 
  46     `kb`: `1024`,
  47     `mb`: `1048576`,
  48     `gb`: `1073741824`,
  49     `tb`: `1099511627776`,
  50     `eb`: `1125899906842624`,
  51 
  52     `kib`: `1024`,
  53     `mib`: `1048576`,
  54     `gib`: `1073741824`,
  55     `tib`: `1099511627776`,
  56     `eib`: `1125899906842624`,
  57 
  58     `mol`:  `602214076000000000000000`,
  59     `mole`: `602214076000000000000000`,
  60 }
  61 
  62 var fracUnary = map[string]func(x *big.Rat) (*big.Rat, error){
  63     `+`:           func(x *big.Rat) (*big.Rat, error) { return x, nil },
  64     `-`:           func(x *big.Rat) (*big.Rat, error) { return x.Neg(x), nil },
  65     `abs`:         func(x *big.Rat) (*big.Rat, error) { return x.Abs(x), nil },
  66     `bits`:        fracBits,
  67     `cube`:        fracCube,
  68     `d`:           fracDenom,
  69     `den`:         fracDenom,
  70     `denom`:       fracDenom,
  71     `denominator`: fracDenom,
  72     `digits`:      fracDigits,
  73     `fac`:         fracFactorial,
  74     `fact`:        fracFactorial,
  75     `factorial`:   fracFactorial,
  76     `inv`:         func(x *big.Rat) (*big.Rat, error) { return x.Inv(x), nil },
  77     `inverse`:     func(x *big.Rat) (*big.Rat, error) { return x.Inv(x), nil },
  78     `invert`:      func(x *big.Rat) (*big.Rat, error) { return x.Inv(x), nil },
  79     `n`:           fracNumer,
  80     `neg`:         func(x *big.Rat) (*big.Rat, error) { return x.Neg(x), nil },
  81     `negate`:      func(x *big.Rat) (*big.Rat, error) { return x.Neg(x), nil },
  82     `num`:         fracNumer,
  83     `numer`:       fracNumer,
  84     `numerator`:   fracNumer,
  85     `pow2`:        fracPower2,
  86     `power2`:      fracPower2,
  87     `pow10`:       fracPower10,
  88     `power10`:     fracPower10,
  89     `sgn`:         fracSign,
  90     `sign`:        fracSign,
  91     `sqr`:         func(x *big.Rat) (*big.Rat, error) { return x.Mul(x, x), nil },
  92     `square`:      func(x *big.Rat) (*big.Rat, error) { return x.Mul(x, x), nil },
  93 }
  94 
  95 var fracBinary = map[string]func(x, y *big.Rat) (*big.Rat, error){
  96     `+`: func(x, y *big.Rat) (*big.Rat, error) { return x.Add(x, y), nil },
  97     `-`: func(x, y *big.Rat) (*big.Rat, error) { return x.Sub(x, y), nil },
  98     `*`: func(x, y *big.Rat) (*big.Rat, error) { return x.Mul(x, y), nil },
  99     `/`: fracDiv,
 100     `%`: fracRem,
 101 
 102     `c`:            fracChoose,
 103     `choose`:       fracChoose,
 104     `comb`:         fracChoose,
 105     `combin`:       fracChoose,
 106     `combinations`: fracChoose,
 107     `p`:            fracPermute,
 108     `per`:          fracPermute,
 109     `perm`:         fracPermute,
 110     `permut`:       fracPermute,
 111     `permuts`:      fracPermute,
 112     `permutations`: fracPermute,
 113     `pow`:          fracPower,
 114     `power`:        fracPower,
 115 }
 116 
 117 func fracNumer(x *big.Rat) (*big.Rat, error) {
 118     return big.NewRat(0, 1).SetFrac(x.Num(), big.NewInt(1)), nil
 119 }
 120 
 121 func fracDenom(x *big.Rat) (*big.Rat, error) {
 122     return big.NewRat(0, 1).SetFrac(x.Denom(), big.NewInt(1)), nil
 123 }
 124 
 125 func fracBits(x *big.Rat) (*big.Rat, error) {
 126     if !x.IsInt() || x.Sign() < 0 {
 127         return nil, fmt.Errorf(`only non-negative integers are allowed`)
 128     }
 129     if x.Sign() == 0 {
 130         return big.NewRat(1, 1), nil
 131     }
 132     return big.NewRat(int64(x.Num().BitLen()), 1), nil
 133 }
 134 
 135 func fracCube(x *big.Rat) (*big.Rat, error) {
 136     cube := big.NewRat(1, 1)
 137     cube = cube.Mul(cube, x)
 138     cube = cube.Mul(cube, x)
 139     cube = cube.Mul(cube, x)
 140     return cube, nil
 141 }
 142 
 143 func fracDigits(x *big.Rat) (*big.Rat, error) {
 144     if !x.IsInt() || x.Sign() < 0 {
 145         return nil, fmt.Errorf(`only non-negative integers are allowed`)
 146     }
 147     if x.Sign() == 0 {
 148         return big.NewRat(1, 1), nil
 149     }
 150     bits := int64(x.Num().BitLen())
 151     d := int64(math.Ceil(math.Log10(math.Exp2(float64(bits)))))
 152     return big.NewRat(d, 1), nil
 153 }
 154 
 155 func fracDiv(x, y *big.Rat) (*big.Rat, error) {
 156     if y.Sign() == 0 {
 157         return nil, errors.New(`can't divide by zero`)
 158     }
 159     return x.Quo(x, y), nil
 160 }
 161 
 162 func fracRem(x, y *big.Rat) (*big.Rat, error) {
 163     if y.Sign() == 0 {
 164         return nil, errors.New(`can't divide by zero`)
 165     }
 166 
 167     if !x.IsInt() || !y.IsInt() {
 168         return nil, errors.New(`remainder only supports integers`)
 169     }
 170 
 171     rem := big.NewInt(1)
 172     rem = rem.Rem(x.Num(), y.Num())
 173     return big.NewRat(0, 1).SetFrac(rem, big.NewInt(1)), nil
 174 }
 175 
 176 func fracSign(x *big.Rat) (*big.Rat, error) {
 177     return big.NewRat(int64(x.Sign()), 1), nil
 178 }
 179 
 180 func fracFactorial(x *big.Rat) (*big.Rat, error) {
 181     if x.Sign() < 0 {
 182         return nil, errors.New(`factorial isn't defined for negative numbers`)
 183     }
 184 
 185     if !x.IsInt() {
 186         return nil, errors.New(`factorial is only defined for integer numbers`)
 187     }
 188 
 189     return fracIntFactorial(x.Num()), nil
 190 }
 191 
 192 func fracIntFactorial(n *big.Int) *big.Rat {
 193     one := big.NewInt(1)
 194     fac := big.NewInt(1)
 195     for n.Sign() > 0 {
 196         fac = fac.Mul(fac, n)
 197         n = n.Sub(n, one)
 198     }
 199     return big.NewRat(0, 1).SetFrac(fac, big.NewInt(1))
 200 }
 201 
 202 func fracChoose(n, k *big.Rat) (*big.Rat, error) {
 203     v, err := fracPermute(n, k)
 204     if err != nil {
 205         return v, err
 206     }
 207 
 208     f, err := fracFactorial(k)
 209     if err != nil {
 210         return nil, err
 211     }
 212 
 213     if f.Sign() == 0 {
 214         return nil, fmt.Errorf(`factorial mistakenly resulted in 0`)
 215     }
 216     return v.Quo(v, f), nil
 217 }
 218 
 219 func fracPermute(n, k *big.Rat) (*big.Rat, error) {
 220     if !n.IsInt() || n.Sign() < 0 || !k.IsInt() || k.Sign() < 0 {
 221         return nil, fmt.Errorf(`expected 2 non-negative integers`)
 222     }
 223 
 224     one := big.NewRat(1, 1)
 225     perm := big.NewRat(1, 1)
 226     // end = n - k + 1
 227     end := big.NewRat(1, 1).Set(n)
 228     end = end.Sub(end, k)
 229     end = end.Add(end, one)
 230 
 231     for v := big.NewRat(1, 1).Set(n); v.Cmp(end) >= 0; v = v.Sub(v, one) {
 232         perm = perm.Mul(perm, v)
 233     }
 234     return perm, nil
 235 }
 236 
 237 func fracPower2(x *big.Rat) (*big.Rat, error) {
 238     return fracPower(big.NewRat(2, 1), x)
 239 }
 240 
 241 func fracPower10(x *big.Rat) (*big.Rat, error) {
 242     return fracPower(big.NewRat(10, 1), x)
 243 }
 244 
 245 func fracPower(x *big.Rat, y *big.Rat) (*big.Rat, error) {
 246     if !y.IsInt() {
 247         return nil, errors.New(`only integer exponents are supported`)
 248     }
 249     return fracIntPower(x, y.Num())
 250 }
 251 
 252 func fracIntPower(x *big.Rat, y *big.Int) (*big.Rat, error) {
 253     if x.Sign() == 0 && y.Sign() == 0 {
 254         return nil, errors.New(`zero to the zero power isn't defined`)
 255     }
 256 
 257     if x.Sign() == 0 {
 258         return big.NewRat(0, 1), nil
 259     }
 260     if y.Sign() == 0 {
 261         return big.NewRat(1, 1), nil
 262     }
 263 
 264     return powFractionInPlace(x, y)
 265 }
 266 
 267 // powFractionInPlace calculates values in place: since bignums are pointers
 268 // to their representations, this means the original values will change
 269 func powFractionInPlace(x *big.Rat, y *big.Int) (*big.Rat, error) {
 270     xsign := x.Sign()
 271     ysign := y.Sign()
 272 
 273     // 0 ** 0 is undefined
 274     if xsign == 0 && ysign == 0 {
 275         const msg = `0 to the 0 doesn't make sense`
 276         return nil, errors.New(msg)
 277     }
 278 
 279     // otherwise x ** 0 is 1
 280     if ysign == 0 {
 281         return big.NewRat(1, 1), nil
 282     }
 283 
 284     // x ** (y < 0) is like (1/x) ** -y
 285     if ysign < 0 {
 286         inv := big.NewRat(1, 1).Inv(x)
 287         neg := big.NewInt(1).Neg(y)
 288         return powFractionInPlace(inv, neg)
 289     }
 290 
 291     // 0 ** (y > 0) is 0
 292     if xsign == 0 {
 293         return x, nil
 294     }
 295 
 296     // x ** 0 is 0
 297     if ysign == 0 {
 298         return big.NewRat(0, 1), nil
 299     }
 300 
 301     // x ** 1 is x
 302     if y.IsInt64() && y.Int64() == 1 {
 303         return x, nil
 304     }
 305 
 306     return _powFractionRec(x, y), nil
 307 }
 308 
 309 func _powFractionRec(x *big.Rat, y *big.Int) *big.Rat {
 310     switch y.Sign() {
 311     case -1:
 312         return big.NewRat(0, 1)
 313     case 0:
 314         return big.NewRat(1, 1)
 315     case 1:
 316         if y.IsInt64() && y.Int64() == 1 {
 317             return x
 318         }
 319     }
 320 
 321     yhalf := big.NewInt(0)
 322     oddrem := big.NewInt(0)
 323     yhalf.QuoRem(y, big.NewInt(2), oddrem)
 324 
 325     if oddrem.Sign() == 0 {
 326         xsquare := big.NewRat(0, 1)
 327         return _powFractionRec(xsquare.Mul(x, x), yhalf)
 328     }
 329     prevpow := _powFractionRec(x, y.Sub(y, big.NewInt(1)))
 330     return prevpow.Mul(prevpow, x)
 331 }
 332 
 333 // fracEval is just an excuse to group all sorts of funcs to eval fractions
 334 type fracEval struct{}
 335 
 336 func (fe fracEval) eval(expr ast.Expr) (*big.Rat, error) {
 337     switch expr := expr.(type) {
 338     case *ast.BasicLit:
 339         v, err := fe.literal(expr)
 340         return v, fe.context(expr.Value, err)
 341     case *ast.ParenExpr:
 342         return fe.eval(expr.X)
 343     case *ast.Ident:
 344         v, err := fe.constant(expr)
 345         return v, fe.context(expr.Name, err)
 346     case *ast.UnaryExpr:
 347         v, err := fe.unary(expr)
 348         return v, fe.context(expr.Op.String(), err)
 349     case *ast.BinaryExpr:
 350         v, err := fe.binary(expr)
 351         return v, fe.context(expr.Op.String(), err)
 352     case *ast.SelectorExpr:
 353         v, err := fe.selector(expr)
 354         return v, fe.context(expr.Sel.Name, err)
 355     case *ast.CallExpr:
 356         return fe.call(expr)
 357     default:
 358         return nil, fmt.Errorf(`unsupported expression type %T`, expr)
 359     }
 360 }
 361 
 362 func (fe fracEval) context(s string, err error) error {
 363     if err == nil {
 364         return nil
 365     }
 366     return fmt.Errorf(`%s: %w`, s, err)
 367 }
 368 
 369 func (fe fracEval) normalize(s string) string {
 370     s = strings.TrimSpace(s)
 371     s = strings.ReplaceAll(s, `_`, ``)
 372     s = strings.ToLower(s)
 373     return s
 374 }
 375 
 376 func (fe fracEval) constant(expr *ast.Ident) (*big.Rat, error) {
 377     k := fe.normalize(expr.Name)
 378     if len(k) == 0 {
 379         return nil, errors.New(`empty-name value`)
 380     }
 381 
 382     if s, ok := fracConstants[k]; ok {
 383         if num, ok := big.NewInt(0).SetString(s, 10); ok {
 384             return big.NewRat(0, 1).SetFrac(num, big.NewInt(1)), nil
 385         }
 386         return nil, errors.New(`integer literal not set (?)`)
 387     }
 388     return nil, errors.New(`value named ` + k + ` not found`)
 389 }
 390 
 391 func (fe fracEval) literal(expr *ast.BasicLit) (*big.Rat, error) {
 392     switch expr.Kind {
 393     case token.INT:
 394         s := strings.ReplaceAll(expr.Value, `_`, ``)
 395         if num, ok := big.NewInt(0).SetString(s, 10); ok {
 396             return big.NewRat(0, 1).SetFrac(num, big.NewInt(1)), nil
 397         }
 398         return nil, errors.New(`integer literal not set (?)`)
 399 
 400     case token.FLOAT:
 401         return fe.parse(expr.Value)
 402 
 403     case token.IMAG:
 404         return fe.parse(expr.Value)
 405 
 406     default:
 407         return nil, fmt.Errorf(`unknown literal kind %s`, expr.Kind.String())
 408     }
 409 }
 410 
 411 func (fe fracEval) parse(s string) (*big.Rat, error) {
 412     s = strings.TrimSuffix(s, `i`)
 413     s = strings.ReplaceAll(s, `_`, ``)
 414     den := math.Pow10(countDecimals(s))
 415     s = strings.Replace(s, `.`, ``, 1)
 416     f, err := strconv.ParseFloat(s, 64)
 417     if err != nil {
 418         return nil, err
 419     }
 420     return big.NewRat(int64(f), int64(den)), nil
 421 }
 422 
 423 func (fe fracEval) unary(expr *ast.UnaryExpr) (*big.Rat, error) {
 424     return fe.call1(expr.Op.String(), expr.X)
 425 }
 426 
 427 func (fe fracEval) binary(expr *ast.BinaryExpr) (*big.Rat, error) {
 428     return fe.call2(expr.Op.String(), expr.X, expr.Y)
 429 }
 430 
 431 func (fe fracEval) selector(expr *ast.SelectorExpr) (*big.Rat, error) {
 432     return fe.call1(expr.Sel.Name, expr.X)
 433 }
 434 
 435 func (fe fracEval) call(expr *ast.CallExpr) (*big.Rat, error) {
 436     switch len(expr.Args) {
 437     case 1:
 438         switch f := expr.Fun.(type) {
 439         case *ast.Ident:
 440             v, err := fe.call1(f.Name, expr.Args[0])
 441             return v, fe.context(f.Name, err)
 442         case *ast.SelectorExpr:
 443             v, err := fe.call2(f.Sel.Name, f.X, expr.Args[0])
 444             return v, fe.context(f.Sel.Name, err)
 445         default:
 446             return nil, fmt.Errorf(`unsupported expression type %T`, f)
 447         }
 448 
 449     case 2:
 450         switch f := expr.Fun.(type) {
 451         case *ast.Ident:
 452             v, err := fe.call2(f.Name, expr.Args[0], expr.Args[1])
 453             return v, fe.context(f.Name, err)
 454         default:
 455             return nil, fmt.Errorf(`unsupported expression type %T`, f)
 456         }
 457 
 458     default:
 459         var name string
 460         switch f := expr.Fun.(type) {
 461         case *ast.Ident:
 462             name = f.Name
 463         case *ast.SelectorExpr:
 464             name = f.Sel.Name
 465         default:
 466             return nil, fmt.Errorf(`unsupported expression type %T`, f)
 467         }
 468 
 469         const msg = `only functions with 1 or 2 arguments are supported`
 470         return nil, fe.context(name, errors.New(msg))
 471     }
 472 }
 473 
 474 func (fe fracEval) call1(name string, arg ast.Expr) (*big.Rat, error) {
 475     x, err := fe.eval(arg)
 476     if err != nil {
 477         return nil, err
 478     }
 479 
 480     k := fe.normalize(name)
 481     if len(k) == 0 {
 482         return nil, errors.New(`empty-name function`)
 483     }
 484 
 485     if f, ok := fracUnary[k]; ok {
 486         return f(x)
 487     }
 488     return nil, errors.New(`function named ` + k + ` not found`)
 489 }
 490 
 491 func (fe fracEval) call2(name string, arg1, arg2 ast.Expr) (*big.Rat, error) {
 492     x, err := fe.eval(arg1)
 493     if err != nil {
 494         return nil, err
 495     }
 496     y, err := fe.eval(arg2)
 497     if err != nil {
 498         return nil, err
 499     }
 500 
 501     k := fe.normalize(name)
 502     if len(k) == 0 {
 503         return nil, errors.New(`empty-name function`)
 504     }
 505 
 506     if f, ok := fracBinary[k]; ok {
 507         return f(x, y)
 508     }
 509     return nil, errors.New(`function named ` + k + ` not found`)
 510 }

     File: tu/go.mod
   1 module tt
   2 
   3 go 1.18

     File: tu/id3pic.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "bytes"
   6     "encoding/binary"
   7     "errors"
   8     "io"
   9     "mime"
  10 )
  11 
  12 // id3Picture isolates the thumbnail bytes from the id3/mp3 stream given
  13 func id3Picture(w *bufio.Writer, r io.Reader) error {
  14     _, err := pickID3Picture(w, r)
  15     if err == io.EOF {
  16         return errors.New(`no thumbnail found`)
  17     }
  18     return err
  19 }
  20 
  21 // pickID3Picture isolates the thumbnail bytes from the id3/mp3 stream given,
  22 // also returning the detected MIME-type
  23 func pickID3Picture(w io.Writer, r io.Reader) (mimetype string, err error) {
  24     // http://www.unixgods.org/Ruby/ID3/docs/ID3_comparison.html
  25 
  26     br := bufio.NewReader(r)
  27 
  28     for {
  29         b, err := br.ReadByte()
  30         if err != nil {
  31             return ``, err
  32         }
  33 
  34         switch b {
  35         case 'A':
  36             // check for an `APIC` section
  37             ok, err := matchBytes(br, []byte{'P', 'I', 'C'})
  38             if err != nil {
  39                 return ``, err
  40             }
  41             if ok {
  42                 return handleAPIC(w, br)
  43             }
  44 
  45         case 'P':
  46             // check for a `PIC` section
  47             ok, err := matchBytes(br, []byte{'I', 'C'})
  48             if err != nil {
  49                 return ``, err
  50             }
  51             if ok {
  52                 return handlePIC(w, br)
  53             }
  54         }
  55     }
  56 }
  57 
  58 // matchBytes is used by func id3Picture to skip right past the byte-sequence
  59 // given
  60 func matchBytes(br *bufio.Reader, data []byte) (bool, error) {
  61     cur := data[:]
  62 
  63     for {
  64         if len(cur) == 0 {
  65             return true, nil
  66         }
  67 
  68         b, err := br.ReadByte()
  69         if err != nil {
  70             return false, err
  71         }
  72 
  73         if b != cur[0] {
  74             err = br.UnreadByte()
  75             return false, err
  76         }
  77 
  78         cur = cur[1:]
  79     }
  80 }
  81 
  82 // handleAPIC is used by func id3Picture
  83 func handleAPIC(w io.Writer, br *bufio.Reader) (mimeType string, err error) {
  84     // section-size seems stored as 4 little-endian bytes
  85     var size uint32
  86     err = binary.Read(br, binary.LittleEndian, &size)
  87     if err != nil {
  88         const msg = `failed to detect thumbnail-payload size`
  89         return ``, errors.New(msg)
  90     }
  91 
  92     kind, n, err := getThumbnailTypeAPIC(br)
  93     if err != nil {
  94         const msg = `failed to sync to start of thumbnail data`
  95         return ``, errors.New(msg)
  96     }
  97 
  98     mimeType = string(kind)
  99     size -= uint32(n)
 100 
 101     for {
 102         b, err := br.ReadByte()
 103         if err != nil {
 104             const msg = `failed to sync to comment before thumbnail`
 105             return mimeType, errors.New(msg)
 106         }
 107         size--
 108 
 109         if b == 0 {
 110             // some podcasts add an extra null-terminated `image` tag, right
 111             // after misidentifying the MIME-type of the picture to follow
 112             junk := []byte{'i', 'm', 'a', 'g', 'e', 0}
 113             extra, err := br.Peek(len(junk))
 114             if err != nil {
 115                 return mimeType, err
 116             }
 117             if bytes.Equal(extra, junk) {
 118                 br.Discard(len(junk))
 119             }
 120 
 121             _, err = io.Copy(w, io.LimitReader(br, int64(size)))
 122             return mimeType, adaptWriteError(err)
 123         }
 124     }
 125 }
 126 
 127 // handlePIC is used by func id3Picture
 128 func handlePIC(w io.Writer, br *bufio.Reader) (mimeType string, err error) {
 129     // http://www.unixgods.org/Ruby/ID3/docs/id3v2-00.html#PIC
 130 
 131     var buf [8]byte
 132     n, err := br.Read(buf[:3])
 133     if err != nil || n != 3 {
 134         const msg = `failed to detect thumbnail-payload size`
 135         return ``, errors.New(msg)
 136     }
 137 
 138     // thumbnail-payload-size seems stored as 3 big-endian bytes
 139     var size uint32
 140     size += 256 * 256 * uint32(buf[0])
 141     size += 256 * uint32(buf[1])
 142     size += uint32(buf[2])
 143 
 144     // skip the text encoding
 145     n, err = br.Read(buf[:5])
 146     if err != nil || n != 5 {
 147         const msg = `failed to read thumbnail-payload type`
 148         return ``, errors.New(msg)
 149     }
 150 
 151     // skip a null-delimited string
 152     _, err = br.ReadString(0)
 153     if err != nil {
 154         const msg = `failed to read thumbnail-payload description`
 155         return ``, errors.New(msg)
 156     }
 157 
 158     var ext [4]byte
 159     ext[0] = '.'
 160     ext[1] = buf[1]
 161     ext[2] = buf[2]
 162     ext[3] = buf[3]
 163 
 164     // use made-up file-extension to detect MIME-type, then copy all
 165     // thumbnail bytes
 166     mimeType = mime.TypeByExtension(string(ext[:]))
 167     _, err = io.Copy(w, io.LimitReader(br, int64(size)))
 168     return mimeType, adaptWriteError(err)
 169 }
 170 
 171 // getThumbnailTypeAPIC is used by func handleAPIC
 172 func getThumbnailTypeAPIC(br *bufio.Reader) ([]byte, int, error) {
 173     var kind []byte
 174     n, err := meetBytes(br, []byte(`image/`))
 175     if err != nil {
 176         return nil, n, err
 177     }
 178 
 179     kind = append(kind, `image/`...)
 180     for {
 181         b, err := br.ReadByte()
 182         if err != nil {
 183             return kind, n, err
 184         }
 185         n++
 186 
 187         if b == 0 {
 188             return kind, n, nil
 189         }
 190         kind = append(kind, b)
 191     }
 192 }
 193 
 194 // meetBytes is used by func getThumbnailTypeAPIC to skip right past the
 195 // byte-sequence given
 196 func meetBytes(br *bufio.Reader, data []byte) (int, error) {
 197     n := 0
 198     cur := data[:]
 199 
 200     for {
 201         if len(cur) == 0 {
 202             return n, nil
 203         }
 204 
 205         b, err := br.ReadByte()
 206         if err != nil {
 207             return n, err
 208         }
 209         n++
 210 
 211         if b == cur[0] {
 212             cur = cur[1:]
 213         } else {
 214             cur = data
 215         }
 216     }
 217 }

     File: tu/info.txt
   1 tu [tool...] [arguments...]
   2 
   3 Tiny and Useful tools is a collection of various command-line tools: most of
   4 these are line-oriented text-processors, but others even work with non-text
   5 bytes in general.
   6 
   7 Tool names have various aliases, and you can use any dashes/minuses and/or
   8 underscores freely in the names, since they're ignored during tool look-up;
   9 the only exceptions to that are `--` and `-`, which are hard-coded aliases
  10 for the `compose` tool.
  11 
  12 Tool `lines` is one of the exceptions, and allows any number of filepaths,
  13 with a single dash meaning standard-input. It even allows using single
  14 dashes multiple times: in that case, standard-input is only read once and
  15 kept for later (re)uses.

     File: tu/io.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "bytes"
   6     "encoding/csv"
   7     "errors"
   8     "fmt"
   9     "io"
  10     "os"
  11     "strconv"
  12 )
  13 
  14 const (
  15     kb = 1024
  16     mb = 1024 * kb
  17     gb = 1024 * mb
  18 
  19     bufferSize = 16 * kb
  20 )
  21 
  22 // utf8BOM is used by older windows apps, but it's useless, as UTF-8 has
  23 // only 1 guaranteed byte-order
  24 var utf8BOM = []byte{0xef, 0xbb, 0xbf}
  25 
  26 // errNoMoreOutput isn't meant to ever show, and is just an excuse to quickly
  27 // quit the app successfully
  28 var errNoMoreOutput = errors.New(`no more output`)
  29 
  30 func loopCSV(r io.Reader, fn func(row []string) error) error {
  31     rr := newReaderCSV(r)
  32     rr.ReuseRecord = true
  33 
  34     for {
  35         row, err := rr.Read()
  36         if err == io.EOF {
  37             return nil
  38         }
  39 
  40         if err != nil {
  41             return err
  42         }
  43 
  44         if err := fn(row); err != nil {
  45             return err
  46         }
  47     }
  48 }
  49 
  50 type chunkHandler func(pos int, chunk []byte) (keepGoing bool)
  51 
  52 func loopChunksBackward(rs io.ReadSeeker, buf []byte, fn chunkHandler) error {
  53     if len(buf) == 0 {
  54         return errors.New(`internal error: read-buffer given is empty`)
  55     }
  56 
  57     pos, err := rs.Seek(0, io.SeekEnd)
  58     if err == io.EOF {
  59         return nil
  60     }
  61     if err != nil {
  62         return err
  63     }
  64 
  65     if pos < 1 {
  66         fn(0, nil)
  67         return nil
  68     }
  69 
  70     // first (backward-chunk) read may be smaller than the others
  71     chunkSize := pos % int64(len(buf))
  72     if chunkSize == 0 {
  73         chunkSize = int64(len(buf))
  74     }
  75 
  76     for {
  77         where := pos - chunkSize
  78         if where < 0 {
  79             where = 0
  80         }
  81 
  82         // ensure full-buffer reads for all later (backward-chunk) reads
  83         chunkSize = int64(len(buf))
  84 
  85         pos, err = rs.Seek(where, io.SeekStart)
  86         if err != nil {
  87             return err
  88         }
  89 
  90         read, err := rs.Read(buf[:chunkSize])
  91         if err == io.EOF {
  92             return nil
  93         }
  94         if err != nil {
  95             return err
  96         }
  97 
  98         if !fn(int(pos), buf[:read]) {
  99             _, err = rs.Seek(pos, io.SeekStart)
 100             return err
 101         }
 102 
 103         if where < 1 {
 104             _, err = rs.Seek(0, io.SeekStart)
 105             return err
 106         }
 107     }
 108 }
 109 
 110 func loopLines(r io.Reader, fn func(line []byte) error) error {
 111     sc := bufio.NewScanner(r)
 112     sc.Buffer(nil, 8*gb)
 113 
 114     for i := 0; sc.Scan(); i++ {
 115         s := sc.Bytes()
 116         if i == 0 {
 117             s = bytes.TrimPrefix(s, utf8BOM)
 118         }
 119 
 120         if err := fn(s); err != nil {
 121             return err
 122         }
 123     }
 124 
 125     return sc.Err()
 126 }
 127 
 128 func loopLinesString(r io.Reader, fn func(line string) error) error {
 129     sc := bufio.NewScanner(r)
 130     sc.Buffer(nil, 8*gb)
 131 
 132     for i := 0; sc.Scan(); i++ {
 133         s := sc.Text()
 134         if i == 0 && startsWith(s, utf8BOM) {
 135             s = s[len(utf8BOM):]
 136         }
 137 
 138         if err := fn(s); err != nil {
 139             return err
 140         }
 141     }
 142 
 143     return sc.Err()
 144 }
 145 
 146 func loadSetPair(paths []string, r io.Reader) ([]string, []string, error) {
 147     if len(paths) == 1 {
 148         paths = []string{paths[0], `-`}
 149     }
 150     if len(paths) != 2 {
 151         return nil, nil, fmt.Errorf(`expected 2 args, but was given %d`, len(paths))
 152     }
 153 
 154     if paths[0] == `-` && paths[1] == `-` {
 155         u, err := loadUniqueLines(r)
 156         return u, u, err
 157     }
 158 
 159     var x, y []string
 160 
 161     err := handleNamedInput(paths[0], r, func(r io.Reader) error {
 162         u, err := loadUniqueLines(r)
 163         x = u
 164         return err
 165     })
 166 
 167     if err != nil {
 168         return x, y, err
 169     }
 170 
 171     err = handleNamedInput(paths[1], r, func(r io.Reader) error {
 172         u, err := loadUniqueLines(r)
 173         y = u
 174         return err
 175     })
 176 
 177     return x, y, err
 178 }
 179 
 180 func loadUniqueLines(r io.Reader) ([]string, error) {
 181     var lines []string
 182     got := make(map[string]struct{})
 183 
 184     err := loopLines(r, func(line []byte) error {
 185         s := string(line)
 186         if _, ok := got[s]; ok {
 187             return nil
 188         }
 189 
 190         got[s] = struct{}{}
 191         lines = append(lines, s)
 192         return nil
 193     })
 194 
 195     return lines, err
 196 }
 197 
 198 type namedInputHandler func(path string, r io.Reader) error
 199 
 200 func handleNamedInputs(paths []string, r io.Reader, handle namedInputHandler) error {
 201     if len(paths) == 0 {
 202         return handle(`-`, os.Stdin)
 203     }
 204 
 205     // find if single-dash was given more than once
 206     dashes := 0
 207     reuseStdin := false
 208     for _, s := range paths {
 209         if s == `-` {
 210             dashes++
 211         }
 212         if dashes > 1 {
 213             // counting 2 dashes is enough
 214             reuseStdin = true
 215             break
 216         }
 217     }
 218 
 219     var stdin []byte
 220     gotStdin := false
 221 
 222     for _, s := range paths {
 223         if s == `-` {
 224             // handle multiple single-dashes by slurping stdin the first
 225             // time, then `replaying` for every `dash`
 226             if reuseStdin {
 227                 if !gotStdin {
 228                     gotStdin = true
 229                     b, err := io.ReadAll(os.Stdin)
 230                     if err != nil {
 231                         return err
 232                     }
 233                     stdin = b
 234                 }
 235 
 236                 if err := handle(s, bytes.NewReader(stdin)); err != nil {
 237                     return err
 238                 }
 239                 continue
 240             }
 241 
 242             // no need to slurp stdin, if it's only being used once
 243             if err := handle(s, r); err != nil {
 244                 return err
 245             }
 246             continue
 247         }
 248 
 249         // handle actual files
 250         err := handleFile(s, func(r io.Reader) error { return handle(s, r) })
 251         if err != nil {
 252             return err
 253         }
 254     }
 255 
 256     return nil
 257 }
 258 
 259 func handleNamedInput(path string, r io.Reader, handle func(r io.Reader) error) error {
 260     if path == `-` {
 261         return handle(r)
 262     }
 263 
 264     // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) {
 265     //  resp, err := http.Get(path)
 266     //  if err != nil {
 267     //      return err
 268     //  }
 269     //  defer resp.Body.Close()
 270     //  return handle(resp.Body)
 271     // }
 272 
 273     return handleFile(path, handle)
 274 }
 275 
 276 func handleFile(path string, handle func(r io.Reader) error) error {
 277     f, err := os.Open(path)
 278     if err != nil {
 279         return errors.New(`can't open file named ` + path)
 280     }
 281     defer f.Close()
 282     return handle(f)
 283 }
 284 
 285 func endLine(w io.Writer) error {
 286     _, err := w.Write([]byte{'\n'})
 287     return adaptWriteError(err)
 288 }
 289 
 290 // func adaptReadError(err error) error {
 291 //  if err == nil || err == io.EOF {
 292 //      return nil
 293 //  }
 294 //  return err
 295 // }
 296 
 297 func adaptWriteError(err error) error {
 298     if err == nil {
 299         return nil
 300     }
 301     return errNoMoreOutput
 302 }
 303 
 304 func copyBytes(w io.Writer, r io.Reader) error {
 305     _, err := io.Copy(w, r)
 306     return adaptWriteError(err)
 307 }
 308 
 309 func copyLines(w *bufio.Writer, r io.Reader) error {
 310     return loopLines(r, func(line []byte) error {
 311         w.Write(line)
 312         return endLine(w)
 313     })
 314 }
 315 
 316 func writeInt64(w io.Writer, n int64) error {
 317     var buf [32]byte
 318     _, err := w.Write(strconv.AppendInt(buf[:0], n, 10))
 319     return adaptWriteError(err)
 320 }
 321 
 322 func writeLines(w *bufio.Writer, lines []string) error {
 323     for _, s := range lines {
 324         w.WriteString(s)
 325         if err := endLine(w); err != nil {
 326             return err
 327         }
 328     }
 329 
 330     return nil
 331 }
 332 
 333 func writeSpaces(w *bufio.Writer, n int) {
 334     const (
 335         spaces16 = `                `
 336         spaces32 = spaces16 + spaces16
 337         spaces   = spaces32 + spaces32
 338     )
 339 
 340     if n < 1 {
 341         return
 342     }
 343 
 344     for n >= len(spaces) {
 345         w.WriteString(spaces)
 346         n -= len(spaces)
 347     }
 348     w.WriteString(spaces[:n])
 349 }
 350 
 351 func newReaderCSV(r io.Reader) *csv.Reader {
 352     rr := csv.NewReader(r)
 353     rr.Comma = ','
 354     rr.FieldsPerRecord = -1
 355     rr.LazyQuotes = true
 356     return rr
 357 }
 358 
 359 func newWriterCSV(w io.Writer) *csv.Writer {
 360     rw := csv.NewWriter(w)
 361     rw.Comma = ','
 362     rw.UseCRLF = false
 363     return rw
 364 }
 365 
 366 // readPairFunc narrows source-code lines below
 367 type readPairFunc func(*bufio.Reader) (byte, byte, error)
 368 
 369 // readBytePairBE gets you a pair of bytes in big-endian (original) order
 370 func readBytePairBE(br *bufio.Reader) (byte, byte, error) {
 371     a, err := br.ReadByte()
 372     if err != nil {
 373         return a, 0, err
 374     }
 375     b, err := br.ReadByte()
 376     return a, b, err
 377 }
 378 
 379 // readBytePairLE gets you a pair of bytes in little-endian order
 380 func readBytePairLE(br *bufio.Reader) (byte, byte, error) {
 381     a, b, err := readBytePairBE(br)
 382     return b, a, err
 383 }

     File: tu/json0.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "bytes"
   6     "errors"
   7     "io"
   8     "strconv"
   9 )
  10 
  11 // linePosError is a more descriptive kind of error, showing the source of
  12 // the input-related problem, as 1-based a line/pos number pair in front
  13 // of the error message
  14 type linePosError struct {
  15     // line is the 1-based line count from the input
  16     line int
  17 
  18     // pos is the 1-based `horizontal` position in its line
  19     pos int
  20 
  21     // err is the error message to `decorate` with the position info
  22     err error
  23 }
  24 
  25 // Error satisfies the error interface
  26 func (lpe linePosError) Error() string {
  27     where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos)
  28     return where + `: ` + lpe.err.Error()
  29 }
  30 
  31 var (
  32     errCommentEarlyEnd = errors.New(`unexpected early-end of comment`)
  33     errInputEarlyEnd   = errors.New(`expected end of input data`)
  34     errInvalidComment  = errors.New(`expected / or *`)
  35     errInvalidHex      = errors.New(`expected a base-16 digit`)
  36     errInvalidToken    = errors.New(`invalid JSON token`)
  37     errNoDigits        = errors.New(`expected numeric digits`)
  38     errNoStringQuote   = errors.New(`expected " or '`)
  39     errNoArrayComma    = errors.New(`missing comma between array values`)
  40     errNoObjectComma   = errors.New(`missing comma between key-value pairs`)
  41     errStringEarlyEnd  = errors.New(`unexpected early-end of string`)
  42     errExtraBytes      = errors.New(`unexpected extra input bytes`)
  43 
  44     // errNoMoreOutput is a generic dummy output-error, which is meant to be
  45     // ultimately ignored, being just an excuse to quit the app immediately
  46     // and successfully
  47     // errNoMoreOutput = errors.New(`no more output`)
  48 )
  49 
  50 // isIdentifier improves control-flow of func jsonReader.key, when it handles
  51 // unquoted object keys
  52 var isIdentifier = [256]bool{
  53     '_': true,
  54 
  55     '0': true, '1': true, '2': true, '3': true, '4': true,
  56     '5': true, '6': true, '7': true, '8': true, '9': true,
  57 
  58     'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true,
  59     'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true,
  60     'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true,
  61     'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
  62     'Y': true, 'Z': true,
  63 
  64     'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true,
  65     'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true,
  66     'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true,
  67     's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
  68     'y': true, 'z': true,
  69 }
  70 
  71 // matchHex both figures out if a byte is a valid ASCII hex-digit, by not
  72 // being 0, and normalizes letter-case for the hex letters
  73 var matchHex = [256]byte{
  74     '0': '0', '1': '1', '2': '2', '3': '3', '4': '4',
  75     '5': '5', '6': '6', '7': '7', '8': '8', '9': '9',
  76     'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F',
  77     'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F',
  78 }
  79 
  80 // escapedStringBytes helps func stringValue treat all string bytes quickly
  81 // and correctly, using their officially-supported JSON escape sequences
  82 //
  83 // https://www.rfc-editor.org/rfc/rfc8259#section-7
  84 var escapedStringBytes = [256][]byte{
  85     {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'},
  86     {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'},
  87     {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'},
  88     {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'},
  89     {'\\', 'b'}, {'\\', 't'},
  90     {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'},
  91     {'\\', 'f'}, {'\\', 'r'},
  92     {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'},
  93     {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'},
  94     {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'},
  95     {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'},
  96     {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'},
  97     {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'},
  98     {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'},
  99     {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'},
 100     {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'},
 101     {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39},
 102     {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47},
 103     {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55},
 104     {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63},
 105     {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71},
 106     {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79},
 107     {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87},
 108     {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95},
 109     {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103},
 110     {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111},
 111     {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119},
 112     {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127},
 113     {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135},
 114     {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143},
 115     {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151},
 116     {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159},
 117     {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167},
 118     {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175},
 119     {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183},
 120     {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191},
 121     {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199},
 122     {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207},
 123     {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215},
 124     {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223},
 125     {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231},
 126     {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239},
 127     {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247},
 128     {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255},
 129 }
 130 
 131 // json0 converts JSON/pseudo-JSON into (valid) minimal JSON, except for an
 132 // extra/single line-feed at the end of the output
 133 func json0(w *bufio.Writer, r io.Reader) error {
 134     br := bufio.NewReader(r)
 135     jr := jsonReader{br, 1, 1}
 136     if err := jr.run(w); err != nil {
 137         return err
 138     }
 139     return endLine(w)
 140 }
 141 
 142 // jsonl converts lines, each with JSON/pseudo-JSON data, into a (valid)
 143 // minimal JSON array
 144 func jsonl(w *bufio.Writer, r io.Reader) error {
 145     i := 0
 146 
 147     err := loopLines(r, func(line []byte) error {
 148         if i == 0 {
 149             w.WriteByte('[')
 150         } else {
 151             w.WriteByte(',')
 152         }
 153         i++
 154 
 155         br := bufio.NewReader(bytes.NewReader(line))
 156         jr := jsonReader{br, 1, 1}
 157         // make errors refer to the right line number
 158         jr.line = i + 1
 159         return jr.run(w)
 160     })
 161 
 162     if err != nil {
 163         return err
 164     }
 165 
 166     if i > 0 {
 167         w.WriteByte(']')
 168     }
 169     return endLine(w)
 170 }
 171 
 172 // jsonReader reads data via a buffer, keeping track of the input position:
 173 // this in turn allows showing much more useful errors, when these happen
 174 type jsonReader struct {
 175     // r is the actual reader
 176     r *bufio.Reader
 177 
 178     // line is the 1-based line-counter for input bytes, and gives errors
 179     // useful position info
 180     line int
 181 
 182     // pos is the 1-based `horizontal` position in its line, and gives
 183     // errors useful position info
 184     pos int
 185 }
 186 
 187 // improveError makes any error more useful, by giving it info about the
 188 // current input-position, as a 1-based line/within-line-position pair
 189 func (jr jsonReader) improveError(err error) error {
 190     if _, ok := err.(linePosError); ok {
 191         return err
 192     }
 193 
 194     if err == io.EOF {
 195         return linePosError{jr.line, jr.pos, errInputEarlyEnd}
 196     }
 197     if err != nil {
 198         return linePosError{jr.line, jr.pos, err}
 199     }
 200     return nil
 201 }
 202 
 203 // run does all the work for func json0, and each input line's work for func
 204 // jsonl
 205 func (jr *jsonReader) run(w *bufio.Writer) error {
 206     // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order
 207     // mark) gives no useful info if present, as UTF-8 leaves no ambiguity
 208     // about byte-order by design
 209     jr.skipUTF8BOM()
 210 
 211     // ignore leading whitespace and/or comments
 212     if err := jr.seekNext(); err != nil {
 213         return err
 214     }
 215 
 216     // handle a single top-level JSON value
 217     if err := jr.value(w); err != nil {
 218         return err
 219     }
 220 
 221     // ignore trailing whitespace and/or comments
 222     if err := jr.seekNext(); err != nil {
 223         return err
 224     }
 225 
 226     // beyond trailing whitespace and/or comments, any more bytes
 227     // make the whole input data invalid JSON
 228     if _, ok := jr.peekByte(); ok {
 229         return jr.improveError(errExtraBytes)
 230     }
 231     return nil
 232 }
 233 
 234 // demandSyntax fails with an error when the next byte isn't the one given;
 235 // when it is, the byte is then read/skipped, and a nil error is returned
 236 func (jr *jsonReader) demandSyntax(syntax byte) error {
 237     chunk, err := jr.r.Peek(1)
 238     if err == io.EOF {
 239         return jr.improveError(errInputEarlyEnd)
 240     }
 241     if err != nil {
 242         return jr.improveError(err)
 243     }
 244 
 245     if len(chunk) < 1 || chunk[0] != syntax {
 246         msg := `expected ` + string(rune(syntax))
 247         return jr.improveError(errors.New(msg))
 248     }
 249 
 250     jr.readByte()
 251     return nil
 252 }
 253 
 254 // updatePosInfo does what it says, given the byte just read separately
 255 func (jr *jsonReader) updatePosInfo(b byte) {
 256     if b == '\n' {
 257         jr.line += 1
 258         jr.pos = 1
 259     } else {
 260         jr.pos++
 261     }
 262 }
 263 
 264 // peekByte simplifies control-flow for various other funcs
 265 func (jr jsonReader) peekByte() (b byte, ok bool) {
 266     chunk, err := jr.r.Peek(1)
 267     if err == nil && len(chunk) >= 1 {
 268         return chunk[0], true
 269     }
 270     return 0, false
 271 }
 272 
 273 // readByte does what it says, updating the reader's position info
 274 func (jr *jsonReader) readByte() (b byte, err error) {
 275     b, err = jr.r.ReadByte()
 276     if err == nil {
 277         jr.updatePosInfo(b)
 278         return b, nil
 279     }
 280     return b, jr.improveError(err)
 281 }
 282 
 283 // seekNext skips/seeks the next token, ignoring runs of whitespace symbols
 284 // and comments, either single-line (starting with //) or general (starting
 285 // with /* and ending with */)
 286 func (jr *jsonReader) seekNext() error {
 287     for {
 288         b, ok := jr.peekByte()
 289         if !ok {
 290             return nil
 291         }
 292 
 293         // case ' ', '\t', '\f', '\v', '\r', '\n':
 294         if b <= 32 {
 295             // keep skipping whitespace bytes
 296             b, _ := jr.readByte()
 297             jr.updatePosInfo(b)
 298             continue
 299         }
 300 
 301         if b != '/' {
 302             // reached the next token
 303             return nil
 304         }
 305 
 306         if err := jr.skipComment(); err != nil {
 307             return err
 308         }
 309 
 310         // after comments, keep looking for more whitespace and/or comments
 311     }
 312 }
 313 
 314 // skipComment helps func seekNext skip over comments, simplifying the latter
 315 // func's control-flow
 316 func (jr *jsonReader) skipComment() error {
 317     err := jr.demandSyntax('/')
 318     if err != nil {
 319         return err
 320     }
 321 
 322     b, ok := jr.peekByte()
 323     if !ok {
 324         return jr.improveError(errInputEarlyEnd)
 325     }
 326 
 327     switch b {
 328     case '/':
 329         // handle single-line comments
 330         return jr.skipLine()
 331 
 332     case '*':
 333         // handle (potentially) multi-line comments
 334         return jr.skipGeneralComment()
 335 
 336     default:
 337         return jr.improveError(errInvalidComment)
 338     }
 339 }
 340 
 341 // skipLine handles single-line comments for func skipComment
 342 func (jr *jsonReader) skipLine() error {
 343     for {
 344         b, err := jr.r.ReadByte()
 345         if err == io.EOF {
 346             // end of input is fine in this case
 347             return nil
 348         }
 349         if err != nil {
 350             return err
 351         }
 352 
 353         jr.updatePosInfo(b)
 354         if b == '\n' {
 355             jr.line++
 356             return nil
 357         }
 358     }
 359 }
 360 
 361 // skipGeneralComment handles (potentially) multi-line comments for func
 362 // skipComment
 363 func (jr *jsonReader) skipGeneralComment() error {
 364     var prev byte
 365     for {
 366         b, err := jr.readByte()
 367         if err != nil {
 368             return jr.improveError(errCommentEarlyEnd)
 369         }
 370 
 371         if prev == '*' && b == '/' {
 372             return nil
 373         }
 374         if b == '\n' {
 375             jr.line++
 376         }
 377         prev = b
 378     }
 379 }
 380 
 381 // skipUTF8BOM does what it says, if a UTF-8 BOM is present
 382 func (jr *jsonReader) skipUTF8BOM() {
 383     lead, err := jr.r.Peek(3)
 384     if err == nil && bytes.HasPrefix(lead, []byte{0xef, 0xbb, 0xbf}) {
 385         jr.readByte()
 386         jr.readByte()
 387         jr.readByte()
 388         jr.pos += 3
 389     }
 390 }
 391 
 392 // outputByte is a small wrapper on func WriteByte, which adapts any error
 393 // into a custom dummy output-error, which is in turn meant to be ignored,
 394 // being just an excuse to quit the app immediately and successfully
 395 func outputByte(w *bufio.Writer, b byte) error {
 396     err := w.WriteByte(b)
 397     if err == nil {
 398         return nil
 399     }
 400     return errNoMoreOutput
 401 }
 402 
 403 // array handles arrays for func value
 404 func (jr *jsonReader) array(w *bufio.Writer) error {
 405     if err := jr.demandSyntax('['); err != nil {
 406         return err
 407     }
 408     w.WriteByte('[')
 409 
 410     for n := 0; true; n++ {
 411         // there may be whitespace/comments before the next comma
 412         if err := jr.seekNext(); err != nil {
 413             return err
 414         }
 415 
 416         // handle commas between values, as well as trailing ones
 417         comma := false
 418         b, _ := jr.peekByte()
 419         if b == ',' {
 420             jr.readByte()
 421             comma = true
 422 
 423             // there may be whitespace/comments before an ending ']'
 424             if err := jr.seekNext(); err != nil {
 425                 return err
 426             }
 427             b, _ = jr.peekByte()
 428         }
 429 
 430         // handle end of array
 431         if b == ']' {
 432             jr.readByte()
 433             w.WriteByte(']')
 434             return nil
 435         }
 436 
 437         // don't forget commas between adjacent values
 438         if n > 0 {
 439             if !comma {
 440                 return errNoArrayComma
 441             }
 442             if err := outputByte(w, ','); err != nil {
 443                 return err
 444             }
 445         }
 446 
 447         // handle the next value
 448         if err := jr.seekNext(); err != nil {
 449             return err
 450         }
 451         if err := jr.value(w); err != nil {
 452             return err
 453         }
 454     }
 455 
 456     // make the compiler happy
 457     return nil
 458 }
 459 
 460 // digits helps various number-handling funcs do their job
 461 func (jr *jsonReader) digits(w *bufio.Writer) error {
 462     for n := 0; true; n++ {
 463         b, _ := jr.peekByte()
 464 
 465         // support `nice` long numbers by ignoring their underscores
 466         if b == '_' {
 467             jr.readByte()
 468             continue
 469         }
 470 
 471         if '0' <= b && b <= '9' {
 472             jr.readByte()
 473             w.WriteByte(b)
 474             continue
 475         }
 476 
 477         if n == 0 {
 478             return errNoDigits
 479         }
 480         return nil
 481     }
 482 
 483     // make the compiler happy
 484     return nil
 485 }
 486 
 487 // dot handles pseudo-JSON numbers which start with a decimal dot
 488 func (jr *jsonReader) dot(w *bufio.Writer) error {
 489     if err := jr.demandSyntax('.'); err != nil {
 490         return err
 491     }
 492     w.Write([]byte{'0', '.'})
 493     return jr.digits(w)
 494 }
 495 
 496 // key is used by func object and generalizes func stringValue, by allowing
 497 // unquoted object keys; it's not used anywhere else, as allowing unquoted
 498 // string values is ambiguous with actual JSON-keyword values null, false, and
 499 // true
 500 func (jr *jsonReader) key(w *bufio.Writer) error {
 501     quote, ok := jr.peekByte()
 502     if quote == '"' || quote == '\'' {
 503         return jr.stringValue(w)
 504     }
 505     if !ok {
 506         return jr.improveError(errStringEarlyEnd)
 507     }
 508 
 509     w.WriteByte('"')
 510     for {
 511         if b, _ := jr.peekByte(); isIdentifier[b] {
 512             jr.readByte()
 513             w.WriteByte(b)
 514             continue
 515         }
 516 
 517         w.WriteByte('"')
 518         return nil
 519     }
 520 }
 521 
 522 // trySimpleInner tries to handle (more quickly) inner-strings where all bytes
 523 // are unescaped ASCII symbols: this is a very common case for strings, and is
 524 // almost always the case for object keys; returns whether it succeeded, so
 525 // this func's caller knows knows if it needs to do anything, the slower way
 526 func trySimpleInner(w *bufio.Writer, jr *jsonReader, quote byte) (gotIt bool) {
 527     chunk, _ := jr.r.Peek(64)
 528 
 529     for i, b := range chunk {
 530         if b < 32 || b > 127 || b == '\\' {
 531             return false
 532         }
 533         if b != quote {
 534             continue
 535         }
 536 
 537         // bulk-writing the chunk is this func's whole point
 538         w.WriteByte('"')
 539         w.Write(chunk[:i])
 540         w.WriteByte('"')
 541 
 542         jr.r.Discard(i + 1)
 543         return true
 544     }
 545 
 546     // maybe the inner-string is ok, but it's just longer than the chunk
 547     return false
 548 }
 549 
 550 // keyword demands the exact keyword/string given to it
 551 func (jr *jsonReader) keyword(w *bufio.Writer, kw []byte) error {
 552     for rest := kw; len(rest) > 0; rest = rest[1:] {
 553         b, err := jr.readByte()
 554         if err == nil && b == rest[0] {
 555             // keywords given to this func have no line-feeds
 556             jr.pos++
 557             continue
 558         }
 559 
 560         msg := `expected JSON value ` + string(kw)
 561         return jr.improveError(errors.New(msg))
 562     }
 563 
 564     w.Write(kw)
 565     return nil
 566 }
 567 
 568 // negative handles numbers starting with a negative sign for func value
 569 func (jr *jsonReader) negative(w *bufio.Writer) error {
 570     if err := jr.demandSyntax('-'); err != nil {
 571         return err
 572     }
 573 
 574     w.WriteByte('-')
 575     if b, _ := jr.peekByte(); b == '.' {
 576         jr.readByte()
 577         w.Write([]byte{'0', '.'})
 578         return jr.digits(w)
 579     }
 580     return jr.number(w)
 581 }
 582 
 583 // number handles numeric values/tokens, including invalid-JSON cases, such
 584 // as values starting with a decimal dot
 585 func (jr *jsonReader) number(w *bufio.Writer) error {
 586     // handle integer digits
 587     if err := jr.digits(w); err != nil {
 588         return err
 589     }
 590 
 591     // handle optional decimal digits, starting with a leading dot
 592     if b, _ := jr.peekByte(); b == '.' {
 593         jr.readByte()
 594         w.WriteByte('.')
 595         return jr.digits(w)
 596     }
 597     return nil
 598 }
 599 
 600 // object handles objects for func value
 601 func (jr *jsonReader) object(w *bufio.Writer) error {
 602     if err := jr.demandSyntax('{'); err != nil {
 603         return err
 604     }
 605     w.WriteByte('{')
 606 
 607     for npairs := 0; true; npairs++ {
 608         // there may be whitespace/comments before the next comma
 609         if err := jr.seekNext(); err != nil {
 610             return err
 611         }
 612 
 613         // handle commas between key-value pairs, as well as trailing ones
 614         comma := false
 615         b, _ := jr.peekByte()
 616         if b == ',' {
 617             jr.readByte()
 618             comma = true
 619 
 620             // there may be whitespace/comments before an ending '}'
 621             if err := jr.seekNext(); err != nil {
 622                 return err
 623             }
 624             b, _ = jr.peekByte()
 625         }
 626 
 627         // handle end of object
 628         if b == '}' {
 629             jr.readByte()
 630             w.WriteByte('}')
 631             return nil
 632         }
 633 
 634         // don't forget commas between adjacent key-value pairs
 635         if npairs > 0 {
 636             if !comma {
 637                 return errNoObjectComma
 638             }
 639             if err := outputByte(w, ','); err != nil {
 640                 return err
 641             }
 642         }
 643 
 644         // handle the next pair's key
 645         if err := jr.seekNext(); err != nil {
 646             return err
 647         }
 648         if err := jr.key(w); err != nil {
 649             return err
 650         }
 651 
 652         // demand a colon right after the key
 653         if err := jr.seekNext(); err != nil {
 654             return err
 655         }
 656         if err := jr.demandSyntax(':'); err != nil {
 657             return err
 658         }
 659         w.WriteByte(':')
 660 
 661         // handle the next pair's value
 662         if err := jr.seekNext(); err != nil {
 663             return err
 664         }
 665         if err := jr.value(w); err != nil {
 666             return err
 667         }
 668     }
 669 
 670     // make the compiler happy
 671     return nil
 672 }
 673 
 674 // positive handles numbers starting with a positive sign for func value
 675 func (jr *jsonReader) positive(w *bufio.Writer) error {
 676     if err := jr.demandSyntax('+'); err != nil {
 677         return err
 678     }
 679 
 680     // valid JSON isn't supposed to have leading pluses on numbers, so
 681     // emit nothing for it, unlike for negative numbers
 682 
 683     if b, _ := jr.peekByte(); b == '.' {
 684         jr.readByte()
 685         w.Write([]byte{'0', '.'})
 686         return jr.digits(w)
 687     }
 688     return jr.number(w)
 689 }
 690 
 691 // stringValue handles strings for funcs value and key, and supports both
 692 // single-quotes and double-quotes, always emitting the latter in the output,
 693 // of course
 694 func (jr *jsonReader) stringValue(w *bufio.Writer) error {
 695     quote, ok := jr.peekByte()
 696     if !ok || (quote != '"' && quote != '\'') {
 697         return errNoStringQuote
 698     }
 699 
 700     jr.readByte()
 701     // try the quicker all-unescaped-ASCII handler
 702     if trySimpleInner(w, jr, quote) {
 703         return nil
 704     }
 705 
 706     // it's a non-trivial inner-string, so handle it byte-by-byte
 707     w.WriteByte('"')
 708     escaped := false
 709 
 710     for {
 711         b, err := jr.r.ReadByte()
 712         if err != nil {
 713             if err == io.EOF {
 714                 return jr.improveError(errStringEarlyEnd)
 715             }
 716             return jr.improveError(err)
 717         }
 718 
 719         if !escaped {
 720             if b == '\\' {
 721                 escaped = true
 722                 continue
 723             }
 724 
 725             // handle end of string
 726             if b == quote {
 727                 return outputByte(w, '"')
 728             }
 729 
 730             w.Write(escapedStringBytes[b])
 731             jr.updatePosInfo(b)
 732             continue
 733         }
 734 
 735         // handle escaped items
 736         escaped = false
 737 
 738         switch b {
 739         case 'u':
 740             // \u needs exactly 4 hex-digits to follow it
 741             w.Write([]byte{'\\', 'u'})
 742             if err := copyHex(w, 4, jr); err != nil {
 743                 return jr.improveError(err)
 744             }
 745 
 746         case 'x':
 747             // JSON only supports 4 escaped hex-digits, so pad the 2
 748             // expected hex-digits with 2 zeros
 749             w.Write([]byte{'\\', 'u', '0', '0'})
 750             if err := copyHex(w, 2, jr); err != nil {
 751                 return jr.improveError(err)
 752             }
 753 
 754         case 't', 'f', 'r', 'n', 'b', '\\', '"':
 755             // handle valid-JSON escaped string sequences
 756             w.WriteByte('\\')
 757             w.WriteByte(b)
 758 
 759         // case '\'':
 760         //  // escaped single-quotes aren't standard JSON, but they can
 761         //  // be handy when the input uses non-standard single-quoted
 762         //  // strings
 763         //  w.WriteByte('\'')
 764 
 765         default:
 766             // return jr.decorateError(unexpectedByte{b})
 767             w.Write(escapedStringBytes[b])
 768         }
 769     }
 770 }
 771 
 772 // copyHex handles a run of hex-digits for func stringValue, starting right
 773 // after the leading `\u` (or `\x`) part; this func doesn't `improve` its
 774 // errors with position info: that's up to the caller
 775 func copyHex(w *bufio.Writer, n int, jr *jsonReader) error {
 776     for i := 0; i < n; i++ {
 777         b, err := jr.r.ReadByte()
 778         if err == io.EOF {
 779             return errStringEarlyEnd
 780         }
 781         if err != nil {
 782             return err
 783         }
 784 
 785         jr.updatePosInfo(b)
 786 
 787         if b := matchHex[b]; b != 0 {
 788             w.WriteByte(b)
 789             continue
 790         }
 791 
 792         return errInvalidHex
 793     }
 794 
 795     return nil
 796 }
 797 
 798 // value is a generic JSON-token/value handler, which allows the recursive
 799 // behavior to handle any kind of JSON/pseudo-JSON input
 800 func (jr *jsonReader) value(w *bufio.Writer) error {
 801     chunk, err := jr.r.Peek(1)
 802     if err == nil && len(chunk) >= 1 {
 803         return jr.dispatch(w, chunk[0])
 804     }
 805 
 806     if err == io.EOF {
 807         return jr.improveError(errInputEarlyEnd)
 808     }
 809     return jr.improveError(errInputEarlyEnd)
 810 }
 811 
 812 // dispatch simplifies control-flow for func value
 813 func (jr *jsonReader) dispatch(w *bufio.Writer, b byte) error {
 814     switch b {
 815     case 'f':
 816         return jr.keyword(w, []byte{'f', 'a', 'l', 's', 'e'})
 817     case 'n':
 818         return jr.keyword(w, []byte{'n', 'u', 'l', 'l'})
 819     case 't':
 820         return jr.keyword(w, []byte{'t', 'r', 'u', 'e'})
 821     case '.':
 822         return jr.dot(w)
 823     case '+':
 824         return jr.positive(w)
 825     case '-':
 826         return jr.negative(w)
 827     case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 828         return jr.number(w)
 829     case '\'', '"':
 830         return jr.stringValue(w)
 831     case '[':
 832         return jr.array(w)
 833     case '{':
 834         return jr.object(w)
 835     default:
 836         return jr.improveError(errInvalidToken)
 837     }
 838 }

     File: tu/json0_test.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "bytes"
   6     "io"
   7     "strings"
   8     "testing"
   9 )
  10 
  11 func TestJSON0(t *testing.T) {
  12     var tests = []struct {
  13         Input    string
  14         Expected string
  15     }{
  16         {`false`, `false`},
  17         {`null`, `null`},
  18         {`  true  `, `true`},
  19 
  20         {`0`, `0`},
  21         {`1`, `1`},
  22         {`2`, `2`},
  23         {`3`, `3`},
  24         {`4`, `4`},
  25         {`5`, `5`},
  26         {`6`, `6`},
  27         {`7`, `7`},
  28         {`8`, `8`},
  29         {`9`, `9`},
  30 
  31         {`  .345`, `0.345`},
  32         {` -.345`, `-0.345`},
  33         {` +.345`, `0.345`},
  34         {` +123.345`, `123.345`},
  35         {` +.345`, `0.345`},
  36         {` 123.34523`, `123.34523`},
  37         {` 123.34_523`, `123.34523`},
  38         {` 123_456.123`, `123456.123`},
  39 
  40         {`""`, `""`},
  41         {`''`, `""`},
  42         {`"\""`, `"\""`},
  43         {`'\"'`, `"\""`},
  44         {`'\''`, `"'"`},
  45         {`'abc\u0e9A'`, `"abc\u0E9A"`},
  46         {`'abc\x1f[0m'`, `"abc\u001F[0m"`},
  47 
  48         {`[  ]`, `[]`},
  49         {`[ , ]`, `[]`},
  50         {`[.345, false,null , ]`, `[0.345,false,null]`},
  51 
  52         {`{  }`, `{}`},
  53         {`{ , }`, `{}`},
  54 
  55         {
  56             `{ 'abc': .345, "def"  : false, 'xyz':null , }`,
  57             `{"abc":0.345,"def":false,"xyz":null}`,
  58         },
  59 
  60         {`{0problems:123,}`, `{"0problems":123}`},
  61         {`{0_problems:123}`, `{"0_problems":123}`},
  62     }
  63 
  64     for _, tc := range tests {
  65         t.Run(tc.Input, func(t *testing.T) {
  66             var out strings.Builder
  67             w := bufio.NewWriter(&out)
  68             r := bufio.NewReader(strings.NewReader(tc.Input))
  69             if err := json0(w, r); err != nil && err != io.EOF {
  70                 t.Fatal(err)
  71                 return
  72             }
  73             // don't forget to flush the buffer, or output will be empty
  74             w.Flush()
  75 
  76             s := out.String()
  77             s = strings.TrimSuffix(s, "\n")
  78             if s != tc.Expected {
  79                 t.Fatalf("<got>\n%s\n<expected>\n%s", s, tc.Expected)
  80                 return
  81             }
  82         })
  83     }
  84 }
  85 
  86 func TestEscapedStringBytes(t *testing.T) {
  87     var escaped = map[rune][]byte{
  88         '\x00': {'\\', 'u', '0', '0', '0', '0'},
  89         '\x01': {'\\', 'u', '0', '0', '0', '1'},
  90         '\x02': {'\\', 'u', '0', '0', '0', '2'},
  91         '\x03': {'\\', 'u', '0', '0', '0', '3'},
  92         '\x04': {'\\', 'u', '0', '0', '0', '4'},
  93         '\x05': {'\\', 'u', '0', '0', '0', '5'},
  94         '\x06': {'\\', 'u', '0', '0', '0', '6'},
  95         '\x07': {'\\', 'u', '0', '0', '0', '7'},
  96         '\x0b': {'\\', 'u', '0', '0', '0', 'b'},
  97         '\x0e': {'\\', 'u', '0', '0', '0', 'e'},
  98         '\x0f': {'\\', 'u', '0', '0', '0', 'f'},
  99         '\x10': {'\\', 'u', '0', '0', '1', '0'},
 100         '\x11': {'\\', 'u', '0', '0', '1', '1'},
 101         '\x12': {'\\', 'u', '0', '0', '1', '2'},
 102         '\x13': {'\\', 'u', '0', '0', '1', '3'},
 103         '\x14': {'\\', 'u', '0', '0', '1', '4'},
 104         '\x15': {'\\', 'u', '0', '0', '1', '5'},
 105         '\x16': {'\\', 'u', '0', '0', '1', '6'},
 106         '\x17': {'\\', 'u', '0', '0', '1', '7'},
 107         '\x18': {'\\', 'u', '0', '0', '1', '8'},
 108         '\x19': {'\\', 'u', '0', '0', '1', '9'},
 109         '\x1a': {'\\', 'u', '0', '0', '1', 'a'},
 110         '\x1b': {'\\', 'u', '0', '0', '1', 'b'},
 111         '\x1c': {'\\', 'u', '0', '0', '1', 'c'},
 112         '\x1d': {'\\', 'u', '0', '0', '1', 'd'},
 113         '\x1e': {'\\', 'u', '0', '0', '1', 'e'},
 114         '\x1f': {'\\', 'u', '0', '0', '1', 'f'},
 115 
 116         '\t': {'\\', 't'},
 117         '\f': {'\\', 'f'},
 118         '\b': {'\\', 'b'},
 119         '\r': {'\\', 'r'},
 120         '\n': {'\\', 'n'},
 121         '\\': {'\\', '\\'},
 122         '"':  {'\\', '"'},
 123     }
 124 
 125     if n := len(escapedStringBytes); n != 256 {
 126         t.Fatalf(`expected 256 entries, instead of %d`, n)
 127         return
 128     }
 129 
 130     for i, v := range escapedStringBytes {
 131         exp := []byte{byte(i)}
 132         if esc, ok := escaped[rune(i)]; ok {
 133             exp = esc
 134         }
 135 
 136         if !bytes.Equal(v, exp) {
 137             t.Fatalf("%d: expected %#v, got %#v", i, exp, v)
 138             return
 139         }
 140     }
 141 }

     File: tu/lines.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "bytes"
   6     "crypto/md5"
   7     "crypto/sha1"
   8     "crypto/sha256"
   9     "crypto/sha512"
  10     "encoding/base64"
  11     "encoding/hex"
  12     "errors"
  13     "fmt"
  14     "hash"
  15     "io"
  16     "math"
  17     "mime"
  18     "os"
  19     "path/filepath"
  20     "regexp"
  21     "sort"
  22     "strconv"
  23     "strings"
  24     "unicode"
  25     "unicode/utf8"
  26 )
  27 
  28 func begin(w *bufio.Writer, r io.Reader, args []string) error {
  29     for _, s := range args {
  30         w.WriteString(s)
  31         if err := endLine(w); err != nil {
  32             return err
  33         }
  34     }
  35 
  36     return copyLines(w, r)
  37 }
  38 
  39 func blowTabs(w *bufio.Writer, r io.Reader, n int) error {
  40     return loopLines(r, func(line []byte) error {
  41         blowTabsLine(w, line, n)
  42         return endLine(w)
  43     })
  44 }
  45 
  46 func breathe(w *bufio.Writer, r io.Reader, n int) error {
  47     if n < 1 {
  48         return loopLines(r, func(line []byte) error {
  49             w.Write(line)
  50             return endLine(w)
  51         })
  52     }
  53 
  54     i := 0
  55 
  56     return loopLines(r, func(line []byte) error {
  57         if i%n == 0 && i > 0 {
  58             if err := endLine(w); err != nil {
  59                 return err
  60             }
  61         }
  62 
  63         i++
  64         w.Write(line)
  65         return endLine(w)
  66     })
  67 }
  68 
  69 func common(w *bufio.Writer, r io.Reader, args []string) error {
  70     src1, src2, err := loadSetPair(args, r)
  71     if err != nil {
  72         return err
  73     }
  74 
  75     got := make(map[string]struct{}, len(src1))
  76     for _, s := range src1 {
  77         got[s] = struct{}{}
  78     }
  79 
  80     for _, s := range src2 {
  81         if _, ok := got[s]; !ok {
  82             continue
  83         }
  84 
  85         w.WriteString(s)
  86         if err := endLine(w); err != nil {
  87             return err
  88         }
  89     }
  90 
  91     return nil
  92 }
  93 
  94 func crlf(w *bufio.Writer, line []byte) error {
  95     w.Write(line)
  96     w.WriteByte('\r')
  97     return endLine(w)
  98 }
  99 
 100 func dataURI(w *bufio.Writer, r io.Reader, args []string) error {
 101     for _, path := range args {
 102         if path == `-` {
 103             return errors.New(`standard input (-) not supported`)
 104         }
 105     }
 106 
 107     for _, path := range args {
 108         err := handleNamedInput(path, r, func(r io.Reader) error {
 109             kind := mime.TypeByExtension(filepath.Ext(path))
 110             if len(kind) == 0 {
 111                 return errors.New(path + `: can't guess a MIME type`)
 112             }
 113 
 114             w.WriteString(`data:`)
 115             w.WriteString(kind)
 116             w.WriteString(`;base64,`)
 117             return toBase64(w, r)
 118         })
 119 
 120         if err != nil {
 121             return err
 122         }
 123     }
 124 
 125     return nil
 126 }
 127 
 128 func dedup(w *bufio.Writer, r io.Reader) error {
 129     got := make(map[string]struct{})
 130 
 131     return loopLines(r, func(line []byte) error {
 132         s := string(line)
 133         if _, ok := got[s]; ok {
 134             return nil
 135         }
 136 
 137         got[s] = struct{}{}
 138         w.Write(line)
 139         return endLine(w)
 140     })
 141 }
 142 
 143 func dropEnd(w *bufio.Writer, r io.Reader, suffixes []string) error {
 144     return loopLinesString(r, func(line string) error {
 145         for _, s := range suffixes {
 146             line = strings.TrimSuffix(line, s)
 147         }
 148         w.WriteString(line)
 149         return endLine(w)
 150     })
 151 }
 152 
 153 func dropPunctuation(w *bufio.Writer, r io.Reader) error {
 154     return loopLines(r, func(line []byte) error {
 155         dropPunctuationLine(w, line)
 156         return endLine(w)
 157     })
 158 }
 159 
 160 func dropStart(w *bufio.Writer, r io.Reader, prefixes []string) error {
 161     return loopLinesString(r, func(line string) error {
 162         for _, s := range prefixes {
 163             line = strings.TrimPrefix(line, s)
 164         }
 165         w.WriteString(line)
 166         return endLine(w)
 167     })
 168 }
 169 
 170 func dropTabs(w *bufio.Writer, r io.Reader) error {
 171     return loopLines(r, func(line []byte) error {
 172         dropTabsLine(w, line)
 173         return endLine(w)
 174     })
 175 }
 176 
 177 // chopLF ignores the last byte, if it's a line-feed
 178 func chopLF(w *bufio.Writer, r io.Reader) error {
 179     i := 0
 180     return loopLines(r, func(line []byte) error {
 181         if i > 0 {
 182             if err := endLine(w); err != nil {
 183                 return err
 184             }
 185         }
 186 
 187         i++
 188         w.Write(line)
 189         return nil
 190     })
 191 }
 192 
 193 func end(w *bufio.Writer, r io.Reader, args []string) error {
 194     if err := copyLines(w, r); err != nil {
 195         return err
 196     }
 197     return writeLines(w, args)
 198 }
 199 
 200 func first(w *bufio.Writer, r io.Reader, n int) error {
 201     if n < 1 {
 202         return nil
 203     }
 204 
 205     return loopLines(r, func(line []byte) error {
 206         if n < 1 {
 207             return errNoMoreOutput
 208         }
 209 
 210         n--
 211         w.Write(line)
 212         return endLine(w)
 213     })
 214 }
 215 
 216 // func drop(w *bufio.Writer, r io.Reader, args []string) error {
 217 //  return loopLinesString(r, func(line string) error {
 218 //      for _, what := range args {
 219 //          line = strings.ReplaceAll(line, what, ``)
 220 //      }
 221 //      w.WriteString(line)
 222 //      return endLine(w)
 223 //  })
 224 // }
 225 
 226 func drop(w *bufio.Writer, r io.Reader, args []string) error {
 227     if len(args) == 0 {
 228         return copyLines(w, r)
 229     }
 230 
 231     if len(args) == 1 {
 232         return dropString(w, r, []byte(args[0]))
 233     }
 234 
 235     var bb1, bb2 bytes.Buffer
 236     avoid := make([][]byte, 0, len(args))
 237     for _, s := range args {
 238         avoid = append(avoid, []byte(s))
 239     }
 240 
 241     return loopLines(r, func(line []byte) error {
 242         src := &bb1
 243         dest := &bb2
 244         src.Reset()
 245         src.Write(line)
 246 
 247         for _, what := range avoid {
 248             s := src.Bytes()
 249             dest.Reset()
 250 
 251             for len(s) > 0 {
 252                 i := bytes.Index(s, what)
 253                 if i < 0 {
 254                     dest.Write(s)
 255                     break
 256                 }
 257 
 258                 dest.Write(s[:i])
 259                 s = s[i+len(what):]
 260             }
 261 
 262             src, dest = dest, src
 263         }
 264 
 265         // any loop results in a buffer-swap, so the final result always
 266         // ends in the `src` buffer, confusingly
 267         w.Write(src.Bytes())
 268         return endLine(w)
 269     })
 270 }
 271 
 272 // dropString handles the 1-argument case for func drop more efficiently,
 273 // by avoiding copying data into swappable byte-buffer pairs
 274 func dropString(w *bufio.Writer, r io.Reader, what []byte) error {
 275     return loopLines(r, func(line []byte) error {
 276         for len(line) > 0 {
 277             i := bytes.Index(line, what)
 278             if i < 0 {
 279                 w.Write(line)
 280                 break
 281             }
 282 
 283             w.Write(line[:i])
 284             line = line[i+len(what):]
 285         }
 286 
 287         return endLine(w)
 288     })
 289 }
 290 
 291 func glue(w *bufio.Writer, r io.Reader, args []string) error {
 292     sep, err := optionalStringArg(args, ``)
 293     if err != nil {
 294         return err
 295     }
 296 
 297     i := 0
 298     err = loopLines(r, func(line []byte) error {
 299         if i > 0 {
 300             w.WriteString(sep)
 301         }
 302         i++
 303         w.Write(line)
 304         return nil
 305     })
 306 
 307     if err != nil {
 308         return err
 309     }
 310 
 311     if i > 0 {
 312         return endLine(w)
 313     }
 314     return nil
 315 }
 316 
 317 func indent(w *bufio.Writer, r io.Reader, n int) error {
 318     return loopLines(r, func(line []byte) error {
 319         writeSpaces(w, n)
 320         w.Write(line)
 321         return endLine(w)
 322     })
 323 }
 324 
 325 func join(w *bufio.Writer, r io.Reader, args []string) error {
 326     if len(args) > 1 {
 327         return fmt.Errorf(`multiple arguments not supported`)
 328     }
 329 
 330     sep := "\t"
 331     if len(args) == 1 {
 332         sep = args[0]
 333     }
 334 
 335     i := 0
 336     err := loopLines(r, func(line []byte) error {
 337         if i > 0 {
 338             _, err := w.WriteString(sep)
 339             if err := adaptWriteError(err); err != nil {
 340                 return err
 341             }
 342         }
 343         i++
 344 
 345         w.Write(line)
 346         return nil
 347     })
 348 
 349     if err != nil {
 350         return err
 351     }
 352 
 353     if i > 0 {
 354         return endLine(w)
 355     }
 356     return nil
 357 }
 358 
 359 func last(w *bufio.Writer, r io.Reader, n int) error {
 360     if n < 1 {
 361         return nil
 362     }
 363 
 364     if rs, ok := r.(io.ReadSeeker); ok && rs != os.Stdin {
 365         if err := seekLastLinesApprox(rs, n); err != nil {
 366             return err
 367         }
 368     }
 369 
 370     if n == 1 {
 371         return lastLine(w, r)
 372     }
 373 
 374     latest := newStringRing(n)
 375     err := loopLines(r, func(line []byte) error {
 376         latest.Bring(string(line))
 377         return nil
 378     })
 379 
 380     if err != nil {
 381         return err
 382     }
 383 
 384     first, second := latest.Halves()
 385     if err := writeLines(w, first); err != nil {
 386         return err
 387     }
 388     return writeLines(w, second)
 389 }
 390 
 391 // lastLine handles the special 1-line case for func last more efficiently,
 392 // as it doesn't involve copying things into ring-buffers
 393 func lastLine(w *bufio.Writer, r io.Reader) error {
 394     var last []byte
 395     err := loopLines(r, func(line []byte) error {
 396         last = line
 397         return nil
 398     })
 399 
 400     if err != nil {
 401         return err
 402     }
 403 
 404     w.Write(last)
 405     return endLine(w)
 406 }
 407 
 408 // seekLastLinesApprox positions a reader `approximately` where the last n
 409 // lines start; the position `sought` is never after the start of the first
 410 // of those trailing lines
 411 func seekLastLinesApprox(rs io.ReadSeeker, n int) error {
 412     if n < 1 {
 413         return nil
 414     }
 415 
 416     first := true
 417     var buf [bufferSize]byte
 418 
 419     return loopChunksBackward(rs, buf[:], func(pos int, chunk []byte) (keepGoing bool) {
 420         // handle trailing line-feed by effectively ignoring it
 421         if first {
 422             first = false
 423             if len(chunk) > 0 && chunk[len(chunk)-1] == '\n' {
 424                 n++
 425             }
 426         }
 427 
 428         c := bytes.Count(chunk, []byte{'\n'})
 429         if n >= c {
 430             n -= c
 431             return true
 432         }
 433 
 434         if n < 1 {
 435             // first of the trailing lines may start in a previous chunk
 436             return bytes.IndexByte(chunk, '\n') < 0
 437         }
 438         // search ended in this chunk
 439         return false
 440     })
 441 }
 442 
 443 func lines(w *bufio.Writer, r io.Reader, args []string) error {
 444     return handleNamedInputs(args, r, func(path string, r io.Reader) error {
 445         return copyLines(w, r)
 446     })
 447 }
 448 
 449 func lineUp(w *bufio.Writer, r io.Reader, perLine int) error {
 450     if perLine < 1 {
 451         return lineAllUp(w, r)
 452     }
 453 
 454     i := 0
 455     err := loopLines(r, func(line []byte) error {
 456         if i > 0 {
 457             if i%perLine != 0 {
 458                 w.WriteByte('\t')
 459             } else {
 460                 if err := endLine(w); err != nil {
 461                     return err
 462                 }
 463             }
 464         }
 465         i++
 466 
 467         w.Write(line)
 468         return nil
 469     })
 470 
 471     if err != nil {
 472         return err
 473     }
 474 
 475     if i > 0 {
 476         return endLine(w)
 477     }
 478     return nil
 479 }
 480 
 481 func lineAllUp(w *bufio.Writer, r io.Reader) error {
 482     i := 0
 483     err := loopLines(r, func(line []byte) error {
 484         if i > 0 {
 485             w.WriteByte('\t')
 486         }
 487         i++
 488         w.Write(line)
 489         return nil
 490     })
 491 
 492     if err != nil {
 493         return err
 494     }
 495 
 496     if i > 0 {
 497         return endLine(w)
 498     }
 499     return nil
 500 }
 501 
 502 func links(w *bufio.Writer, r io.Reader) error {
 503     return loopLines(r, func(line []byte) error {
 504         var err error
 505         loopLinks(line, func(i int, s []byte) (keepGoing bool) {
 506             w.Write(s)
 507             err = endLine(w)
 508             return err == nil
 509         })
 510         return err
 511     })
 512 }
 513 
 514 func lower(w *bufio.Writer, line []byte) error {
 515     for len(line) > 0 {
 516         r, size := utf8.DecodeRune(line)
 517         r = unicode.ToLower(r)
 518         w.WriteRune(r)
 519         line = line[size:]
 520     }
 521     return endLine(w)
 522 }
 523 
 524 func matchParagraphsFold(w *bufio.Writer, r io.Reader, args []string) error {
 525     // no expressions means match all paragraphs
 526     if len(args) == 0 {
 527         return stomp(w, r)
 528     }
 529 
 530     // turn arguments into case-insensitive regexes
 531     var match []*regexp.Regexp
 532     for _, s := range args {
 533         if !strings.HasPrefix(s, `(?i)`) {
 534             s = `(?i)` + s
 535         }
 536 
 537         m, err := regexp.Compile(s)
 538         if err != nil {
 539             return err
 540         }
 541         match = append(match, m)
 542     }
 543 
 544     shown := 0
 545     didMatch := false
 546     var par bytes.Buffer
 547 
 548     err := loopLines(r, func(line []byte) error {
 549         if len(bytes.TrimSpace(line)) == 0 {
 550             if !didMatch {
 551                 par.Reset()
 552                 return nil
 553             }
 554 
 555             if par.Len() == 0 {
 556                 didMatch = false
 557                 return nil
 558             }
 559 
 560             if shown > 0 {
 561                 w.WriteByte('\n')
 562             }
 563             _, err := w.Write(par.Bytes())
 564 
 565             shown++
 566             par.Reset()
 567             didMatch = false
 568             return adaptWriteError(err)
 569         }
 570 
 571         // remember all paragraph lines, even if no match has happened for
 572         // it yet, since a match can happen on its last line, for example
 573         par.Write(line)
 574         par.WriteByte('\n')
 575 
 576         // no need to test later lines from already-matched paragraphs
 577         if didMatch {
 578             return nil
 579         }
 580 
 581         for _, m := range match {
 582             if m.Match(line) {
 583                 didMatch = true
 584                 return nil
 585             }
 586         }
 587         return nil
 588     })
 589 
 590     if err != nil {
 591         return err
 592     }
 593 
 594     // don't forget to show the last matched paragraph
 595     if !didMatch || par.Len() == 0 {
 596         return nil
 597     }
 598 
 599     if shown > 0 {
 600         w.WriteByte('\n')
 601     }
 602     _, err = w.Write(par.Bytes())
 603     return adaptWriteError(err)
 604 }
 605 
 606 func md5Hex(w *bufio.Writer, r io.Reader) error {
 607     return hashHex(w, r, md5.New())
 608 }
 609 
 610 func missing(w *bufio.Writer, r io.Reader, args []string) error {
 611     src1, src2, err := loadSetPair(args, r)
 612     if err != nil {
 613         return err
 614     }
 615 
 616     got := make(map[string]struct{}, len(src1))
 617     for _, s := range src1 {
 618         got[s] = struct{}{}
 619     }
 620 
 621     for _, s := range src2 {
 622         if _, ok := got[s]; ok {
 623             continue
 624         }
 625 
 626         w.WriteString(s)
 627         if err := endLine(w); err != nil {
 628             return err
 629         }
 630     }
 631 
 632     return nil
 633 }
 634 
 635 func numbers(w *bufio.Writer, r io.Reader) error {
 636     return loopLines(r, func(line []byte) error {
 637         var err error
 638         loopNumbers(line, func(i int, s []byte) (keepGoing bool) {
 639             w.Write(s)
 640             err = endLine(w)
 641             return err == nil
 642         })
 643         return err
 644     })
 645 }
 646 
 647 func null(w *bufio.Writer, r io.Reader) error {
 648     return nil
 649 }
 650 
 651 func numberLines(w *bufio.Writer, r io.Reader, n int) error {
 652     return loopLines(r, func(line []byte) error {
 653         n++
 654         writeInt64(w, int64(n-1))
 655         w.WriteByte('\t')
 656         w.Write(line)
 657         return endLine(w)
 658     })
 659 }
 660 
 661 func primes(w *bufio.Writer, r io.Reader, count int) error {
 662     // 2 is the only even prime, and the smallest one
 663     if count > 0 {
 664         w.WriteString(`2`)
 665         if err := endLine(w); err != nil {
 666             return err
 667         }
 668         count--
 669     }
 670 
 671     for n := 3; count > 0; n += 2 {
 672         if isOddDiv(n) {
 673             continue
 674         }
 675 
 676         var buf [32]byte
 677         w.Write(strconv.AppendInt(buf[:0], int64(n), 10))
 678         if err := endLine(w); err != nil {
 679             return err
 680         }
 681         count--
 682     }
 683 
 684     return nil
 685 }
 686 
 687 // isOddDiv helps func primes do its job efficiently
 688 func isOddDiv(n int) bool {
 689     max := int(math.Sqrt(float64(n)))
 690 
 691     for div := 3; div <= max; div += 2 {
 692         if n%div == 0 {
 693             return true
 694         }
 695     }
 696 
 697     return false
 698 }
 699 
 700 func reflow(w *bufio.Writer, r io.Reader, max int) error {
 701     return loopLines(r, func(line []byte) error {
 702         reflowLine(w, line, max)
 703         return endLine(w)
 704     })
 705 }
 706 
 707 func runes(w *bufio.Writer, r io.Reader) error {
 708     return loopLines(r, func(line []byte) error {
 709         for len(line) > 0 {
 710             r, size := utf8.DecodeRune(line)
 711             line = line[size:]
 712 
 713             w.WriteRune(r)
 714             if err := endLine(w); err != nil {
 715                 return err
 716             }
 717         }
 718 
 719         return nil
 720     })
 721 }
 722 
 723 func sha1Hex(w *bufio.Writer, r io.Reader) error {
 724     return hashHex(w, r, sha1.New())
 725 }
 726 
 727 func sha256Hex(w *bufio.Writer, r io.Reader) error {
 728     return hashHex(w, r, sha256.New())
 729 }
 730 
 731 func sha512Hex(w *bufio.Writer, r io.Reader) error {
 732     return hashHex(w, r, sha512.New())
 733 }
 734 
 735 func skip(w *bufio.Writer, r io.Reader, n int) error {
 736     return loopLines(r, func(line []byte) error {
 737         if n > 0 {
 738             n--
 739             return nil
 740         }
 741 
 742         w.Write(line)
 743         return endLine(w)
 744     })
 745 }
 746 
 747 func skipEmpty(w *bufio.Writer, line []byte) error {
 748     if len(line) == 0 {
 749         return nil
 750     }
 751 
 752     w.Write(line)
 753     return endLine(w)
 754 }
 755 
 756 func skipLast(w *bufio.Writer, r io.Reader, n int) error {
 757     if n < 1 {
 758         return copyLines(w, r)
 759     }
 760 
 761     latest := newStringRing(n)
 762     return loopLines(r, func(line []byte) error {
 763         if latest.Len() < n {
 764             latest.Bring(string(line))
 765             return nil
 766         }
 767 
 768         w.WriteString(latest.Earliest())
 769         latest.Bring(string(line))
 770         return endLine(w)
 771     })
 772 }
 773 
 774 func squeeze(w *bufio.Writer, line []byte) error {
 775     line = trimSpaces(line)
 776 
 777     for len(line) > 0 {
 778         i := bytes.IndexByte(line, '\t')
 779         if i < 0 {
 780             squeezeChunk(w, line)
 781             break
 782         }
 783 
 784         squeezeChunk(w, line[:i])
 785         w.WriteByte('\t')
 786         line = line[i+1:]
 787     }
 788 
 789     return endLine(w)
 790 }
 791 
 792 func stomp(w *bufio.Writer, r io.Reader) error {
 793     empty := 0
 794     nonEmpty := 0
 795 
 796     return loopLines(r, func(line []byte) error {
 797         if len(line) == 0 {
 798             empty++
 799             return nil
 800         }
 801 
 802         if empty > 0 && nonEmpty > 0 {
 803             if err := endLine(w); err != nil {
 804                 return err
 805             }
 806         }
 807 
 808         empty = 0
 809         nonEmpty++
 810         w.Write(line)
 811         return endLine(w)
 812     })
 813 }
 814 
 815 func tally(w *bufio.Writer, r io.Reader) error {
 816     tally := make(map[string]int)
 817 
 818     err := loopLines(r, func(line []byte) error {
 819         tally[string(line)]++
 820         return nil
 821     })
 822 
 823     if err != nil {
 824         return err
 825     }
 826 
 827     sorted := make([]string, 0, len(tally))
 828     for k := range tally {
 829         sorted = append(sorted, k)
 830     }
 831 
 832     // reverse-sort keys by their tally-count
 833     sort.SliceStable(sorted, func(i, j int) bool {
 834         return tally[sorted[i]] > tally[sorted[j]]
 835     })
 836 
 837     for _, k := range sorted {
 838         writeInt64(w, int64(tally[k]))
 839         w.WriteByte('\t')
 840         w.WriteString(k)
 841         if err := endLine(w); err != nil {
 842             return err
 843         }
 844     }
 845     return nil
 846 }
 847 
 848 func uriEncode(w *bufio.Writer, line []byte) error {
 849     // s := url.PathEscape(string(line))
 850     // w.WriteString(s)
 851     // return endLine(w)
 852 
 853     for len(line) > 0 {
 854         r, size := utf8.DecodeRune(line)
 855         line = line[size:]
 856 
 857         if r < 128 && uriUnescapedASCII[r] {
 858             w.WriteByte(byte(r))
 859             continue
 860         }
 861 
 862         const hex = `0123456789ABCDEF`
 863         const l = byte(len(hex))
 864         w.WriteByte('%')
 865         w.WriteByte(hex[byte(r)/l])
 866         w.WriteByte(hex[byte(r)%l])
 867     }
 868 
 869     return endLine(w)
 870 }
 871 
 872 // toBase64 is named to avoid clashing with package `base64`
 873 func toBase64(w *bufio.Writer, r io.Reader) error {
 874     enc := base64.NewEncoder(base64.StdEncoding, w)
 875     if err := copyBytes(enc, r); err != nil {
 876         return err
 877     }
 878 
 879     // can't defer-call this, as it must happen before ending the line
 880     enc.Close()
 881     return endLine(w)
 882 }
 883 
 884 // toHex is named to avoid clashing with package `hex`
 885 func toHex(w *bufio.Writer, r io.Reader) error {
 886     enc := hex.NewEncoder(w)
 887     if err := copyBytes(enc, r); err != nil {
 888         return err
 889     }
 890     return endLine(w)
 891 }
 892 
 893 // hashHex hash-encodes data as a hex-ASCII line
 894 func hashHex(w *bufio.Writer, r io.Reader, h hash.Hash) error {
 895     err := hashBytes(hex.NewEncoder(w), r, h)
 896     if err != nil {
 897         return err
 898     }
 899     return endLine(w)
 900 }

     File: tu/lines_test.go
   1 package main
   2 
   3 import (
   4     "fmt"
   5     "strconv"
   6     "strings"
   7     "testing"
   8 )
   9 
  10 func TestPureLineTools(t *testing.T) {
  11     var tests = []struct {
  12         Tool     string
  13         Input    string
  14         Expected string
  15     }{
  16         {`squeeze`, ``, ``},
  17         {`squeeze`, `  `, "\n"},
  18         {`squeeze`, `abc  def`, "abc def\n"},
  19         {`squeeze`, "abc \t def", "abc\tdef\n"},
  20         {`trim`, ``, ``},
  21         {`trim`, `  `, "\n"},
  22         {`trim`, `abc  def`, "abc  def\n"},
  23         {`trim`, "abc \t def", "abc \t def\n"},
  24     }
  25 
  26     for i, tc := range tests {
  27         t.Run(strconv.Itoa(i), func(t *testing.T) {
  28             var sb strings.Builder
  29 
  30             err := run(&sb, strings.NewReader(tc.Input), tc.Tool, nil)
  31             if err != nil {
  32                 t.Fatal(err)
  33                 return
  34             }
  35 
  36             got := sb.String()
  37             if got != tc.Expected {
  38                 t.Fatalf("got\n%q\ninstead of\n%q", got, tc.Expected)
  39                 return
  40             }
  41         })
  42     }
  43 }
  44 
  45 func TestLineIntTools(t *testing.T) {
  46     var tests = []struct {
  47         Tool     string
  48         Input    string
  49         Arg      int
  50         Expected string
  51     }{
  52         {`blow`, ``, 3, ``},
  53         {`blow`, ``, 0, ``},
  54         {`blow`, ``, -3, ``},
  55         {`blow`, `abc def`, 3, "abc def\n"},
  56         {`blow`, "abc\tdef", 3, "abc   def\n"},
  57         {`blow`, "abc\tdef", 4, "abc def\n"},
  58         {`blow`, "abc\tdef", 0, "abcdef\n"},
  59         {`blow`, "abc\tdef", -1, "abcdef\n"},
  60 
  61         {`indent`, ``, 3, ``},
  62         {`indent`, "abc\ndef", 3, "   abc\n   def\n"},
  63         {`indent`, "abc\ndef", 0, "abc\ndef\n"},
  64         {`indent`, "abc\ndef", -10, "abc\ndef\n"},
  65 
  66         {`dedent`, "   abc\n   def\n", 3, "abc\ndef\n"},
  67         {`dedent`, "   abc\n   def\n", 1, "  abc\n  def\n"},
  68         {`dedent`, "   abc\n   def\n", 0, "   abc\n   def\n"},
  69         {`dedent`, "   abc\n   def\n", -5, "   abc\n   def\n"},
  70 
  71         {`last`, "abc\ndef\n", -5, ``},
  72         {`last`, "abc\ndef\n", 0, ``},
  73         {`last`, "abc\ndef\n123\n456\n", 3, "def\n123\n456\n"},
  74         {`last`, "abc\ndef\n123\n456\n", 1, "456\n"},
  75         {`last`, "abc\ndef\n123\n456\n", 2, "123\n456\n"},
  76         {`last`, "abc\ndef\n123\n456\n", 200, "abc\ndef\n123\n456\n"},
  77         {`last`, "abc\ndef\n", -5, ``},
  78 
  79         {`skip-last`, "abc\ndef\n", 0, "abc\ndef\n"},
  80         {`skip-last`, "abc\ndef\n123\n456\n", -20, "abc\ndef\n123\n456\n"},
  81         {`skip-last`, "abc\ndef\n123\n456\n", 4, ``},
  82         {`skip-last`, "abc\ndef\n123\n456\n", 3, "abc\n"},
  83         {`skip-last`, "abc\ndef\n123\n456\n", 1, "abc\ndef\n123\n"},
  84         {`skip-last`, "abc\ndef\n123\n456\n", 2, "abc\ndef\n"},
  85         {`skip-last`, "abc\ndef\n123\n456\n", 200, ``},
  86     }
  87 
  88     for _, tc := range tests {
  89         name := fmt.Sprintf(`%s(%d): %q`, tc.Tool, tc.Arg, tc.Input)
  90 
  91         t.Run(name, func(t *testing.T) {
  92             var out strings.Builder
  93             args := []string{strconv.Itoa(tc.Arg)}
  94 
  95             err := run(&out, strings.NewReader(tc.Input), tc.Tool, args)
  96             if err != nil {
  97                 t.Fatal(err)
  98                 return
  99             }
 100 
 101             got := out.String()
 102             if got != tc.Expected {
 103                 t.Fatalf("got\n%q\ninstead of\n%q", got, tc.Expected)
 104                 return
 105             }
 106         })
 107     }
 108 }

     File: tu/main.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "errors"
   6     "os"
   7     "sort"
   8 
   9     _ "embed"
  10 )
  11 
  12 //go:embed examples.sh
  13 var examples string
  14 
  15 //go:embed info.txt
  16 var info string
  17 
  18 //go:embed tldr.sh
  19 var tldr string
  20 
  21 //go:embed tools.txt
  22 var tools string
  23 
  24 // errGeneric's message isn't meant to show, opting to quit the app with a
  25 // generic error code instead; it's used when other errors were shown before
  26 var errGeneric = errors.New(`failed`)
  27 
  28 func main() {
  29     addMetaTools()
  30 
  31     if len(os.Args) < 2 {
  32         run(os.Stderr, os.Stdin, `help`, nil)
  33         showError(errors.New(`no tool name given`))
  34         os.Exit(1)
  35     }
  36 
  37     switch os.Args[1] {
  38     case `-h`, `--h`, `-help`, `--help`:
  39         run(os.Stdout, os.Stdin, `help`, nil)
  40         return
  41     }
  42 
  43     err := run(os.Stdout, os.Stdin, os.Args[1], os.Args[2:])
  44     if errors.Is(err, errGeneric) {
  45         os.Exit(1)
  46     }
  47     if err != nil {
  48         showError(err)
  49         os.Exit(1)
  50     }
  51 }
  52 
  53 func aliases(w *bufio.Writer) error {
  54     aliases := make(map[string][]string)
  55     for k, v := range toolNameAliases {
  56         aliases[v] = append(aliases[v], k)
  57     }
  58 
  59     keys := make([]string, 0, len(aliases))
  60     for k := range aliases {
  61         keys = append(keys, k)
  62         sort.Strings(aliases[k])
  63     }
  64     sort.Strings(keys)
  65 
  66     for _, k := range keys {
  67         for i, v := range aliases[k] {
  68             if i > 0 {
  69                 w.WriteByte('\t')
  70             }
  71             w.WriteString(v)
  72         }
  73 
  74         if err := endLine(w); err != nil {
  75             return err
  76         }
  77     }
  78 
  79     return nil
  80 }
  81 
  82 func showExamples(w *bufio.Writer) error {
  83     w.WriteString(examples)
  84     return nil
  85 }
  86 
  87 func showHelp(w *bufio.Writer) error {
  88     w.WriteString(info)
  89     w.WriteString("\n\nTools Available\n\n\n")
  90     w.WriteString(tools)
  91     return nil
  92 }
  93 
  94 func showTLDR(w *bufio.Writer) error {
  95     w.WriteString(tldr)
  96     return nil
  97 }
  98 
  99 // showError standardizes how errors from this app look
 100 func showError(err error) {
 101     if err == nil {
 102         return
 103     }
 104 
 105     os.Stderr.WriteString("\x1b[31m")
 106     os.Stderr.WriteString(err.Error())
 107     os.Stderr.WriteString("\x1b[0m\n")
 108 }

     File: tu/mit-license.txt
   1 The MIT License (MIT)
   2 
   3 Copyright © 2024 pacman64
   4 
   5 Permission is hereby granted, free of charge, to any person obtaining a copy of
   6 this software and associated documentation files (the “Software”), to deal
   7 in the Software without restriction, including without limitation the rights to
   8 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9 of the Software, and to permit persons to whom the Software is furnished to do
  10 so, subject to the following conditions:
  11 
  12 The above copyright notice and this permission notice shall be included in all
  13 copies or substantial portions of the Software.
  14 
  15 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21 SOFTWARE.

     File: tu/other.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "errors"
   6     "io"
   7     "os/exec"
   8     "path/filepath"
   9     "runtime"
  10     "strings"
  11     "time"
  12 )
  13 
  14 // open tries to open/show/start all files/folder/URIs given, even other apps
  15 func openTool(w *bufio.Writer, r io.Reader, names []string) error {
  16     if len(names) == 0 || (len(names) == 1 && names[0] == `-`) {
  17         return errors.New(`expected files/folders/URIs`)
  18     }
  19 
  20     for _, s := range names {
  21         if err := popupName(s); err != nil {
  22             return err
  23         }
  24     }
  25     return nil
  26 }
  27 
  28 // popupName is used by func open to flatten its control-flow
  29 func popupName(s string) error {
  30     if f := strings.HasPrefix; f(s, `https://`) || f(s, `http://`) {
  31         return popup(s)
  32     }
  33 
  34     if strings.HasPrefix(s, `:`) {
  35         _, err := parsePortNumber(s[1:])
  36         if err == nil {
  37             return popup(`http://127.0.0.1` + s)
  38         }
  39     }
  40 
  41     s, err := filepath.Abs(s)
  42     if err != nil {
  43         return err
  44     }
  45     return popup(s)
  46 }
  47 
  48 // popup tries to open the file/folder/URI given using the system default
  49 // handlers for these
  50 func popup(what string) error {
  51     switch runtime.GOOS {
  52     case `windows`:
  53         const how = `url.dll,FileProtocolHandler`
  54         return exec.Command(`rundll32`, how, what).Run()
  55     case `darwin`:
  56         return exec.Command(`open`, what).Run()
  57     default:
  58         return exec.Command(`xdg-open`, what).Run()
  59     }
  60 }
  61 
  62 func today(w *bufio.Writer) error {
  63     var buf [32]byte
  64     const fmt = `2006-01-02 Mon Jan 02`
  65     w.Write(time.Now().AppendFormat(buf[:0], fmt))
  66     return endLine(w)
  67 }
  68 
  69 func now(w *bufio.Writer) error {
  70     var buf [32]byte
  71     const fmt = `2006-01-02 15:04:05 Jan Mon`
  72     w.Write(time.Now().AppendFormat(buf[:0], fmt))
  73     return endLine(w)
  74 }
  75 
  76 func ymd(w *bufio.Writer) error {
  77     var buf [16]byte
  78     w.Write(time.Now().AppendFormat(buf[:0], `2006-01-02`))
  79     return endLine(w)
  80 }

     File: tu/overview.txt
   1 # General
   2 
   3 This command-line app brings many tools, tiny and/or useful, into one. This
   4 approach consolidates what would otherwise be a smattering of files, which
   5 is always nice, and even saves file-space, as Go apps each carry a file-size
   6 overhead of megabytes.
   7 
   8 The first argument given is looked-up as a tool name, or one of their many
   9 aliases. Tools are implemented as simple funcs, with their specific types
  10 determining how exactly they're called.
  11 
  12 Tool names/aliases are always lower-cased, and any dashes/underscores given
  13 in them are ignored, except for `--` and `-`, which are hard-coded aliases
  14 for the `compose` tool.
  15 
  16 
  17 # io.go
  18 
  19 Source file io.go standardizes how this app's tools read input, and how they
  20 emit output, via various funcs and types.
  21 
  22 
  23 # main.go
  24 
  25 Source file main.go defines the app's starting point, and a few help-related
  26 tools/variables.
  27 
  28 
  29 # running.go
  30 
  31 Source file running.go has all tool-lookup tables, as well as the tool-name
  32 aliases. That file also defines higher-order tools, which run/call other
  33 tools, the main examples being the `compose` and `each` tools.
  34 
  35 Func `run` does the lookup, delegating the actual running to `dispatchFunc`,
  36 the latter choosing its exact behavior based on the type of the looked-up
  37 func: some tool funcs are called for each input line, while other funcs are
  38 simply given an io.Reader for them to handle, for example.
  39 
  40 
  41 # strings.go
  42 
  43 Source file strings.go defines various string-related funcs for tools to use,
  44 along with a few string-related types, such as the custom ring-buffer type
  45 named `stringRing`.
  46 
  47 
  48 # Other Source Files
  49 
  50 bytes.go       byte-slice-oriented tools, whether binary or line-based
  51 coby.go        `COunt BYtes`
  52 examples.sh    shell-runnable examples shown by tool `examples`
  53 fractions.go   `fractions`, an arbitrary-precision rational calculator
  54 id3pic.go      `id3-pic`, a thumbnail-picture extraction tool
  55 info.txt       part of the help message
  56 json0.go       `json-0` and `jsonl`, 2 useful JSON fixers/squeezers
  57 lines.go       string-oriented tools, mostly line-based
  58 other.go       argument-oriented tools which don't read input
  59 plain.go       `plain`
  60 sbs.go         `Side By Side`
  61 symbols.go     `symbols`, along with the long lookup-tables it uses
  62 tables.go      tools about tabular formats, like CSV and TSV
  63 tldr.sh        pseudo-examples shown by tool `tldr`
  64 tools.txt      descriptions of most tools, shown with the help message
  65 utf8.go        `utf-8`

     File: tu/plain.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "bytes"
   6     "io"
   7 )
   8 
   9 // plainState is a custom type used in func plain and its helper funcs
  10 type plainState int
  11 
  12 const (
  13     plainNormal = plainState(0)
  14     plainEscape = plainState(1)
  15     plainANSI   = plainState(2)
  16 )
  17 
  18 // plain ignores all ANSI-style sequences
  19 func plain(w *bufio.Writer, r io.Reader) error {
  20     state := plainNormal
  21     var buf [bufferSize]byte
  22 
  23     for {
  24         n, err := r.Read(buf[:])
  25 
  26         if n < 1 {
  27             if state == plainEscape {
  28                 w.WriteByte('\x1b')
  29             }
  30 
  31             if err == io.EOF {
  32                 return nil
  33             }
  34             return err
  35         }
  36 
  37         chunk := buf[:n]
  38 
  39         // special-handle chunks where there can't be any ANSI-sequences:
  40         // bytes.IndexByte is a really quick check, and when it can't find
  41         // escape-bytes, whole chunks can be bulk-copied into the output
  42         if state == plainNormal {
  43             if i := bytes.IndexByte(chunk, '\x1b'); i < 0 {
  44                 w.Write(chunk)
  45                 continue
  46             }
  47         }
  48 
  49         for len(chunk) > 0 {
  50             switch state {
  51             case plainNormal:
  52                 chunk, state = plainHandleNormal(w, chunk)
  53             case plainEscape:
  54                 chunk, state = plainHandleEscape(w, chunk)
  55             case plainANSI:
  56                 chunk, state = plainHandleANSI(chunk)
  57             }
  58         }
  59     }
  60 }
  61 
  62 // plainHandleNormal is used by func plain
  63 func plainHandleNormal(w *bufio.Writer, chunk []byte) ([]byte, plainState) {
  64     for len(chunk) > 0 {
  65         b := chunk[0]
  66         chunk = chunk[1:]
  67 
  68         if b == '\x1b' {
  69             return chunk, plainEscape
  70         } else {
  71             w.WriteByte(b)
  72         }
  73     }
  74 
  75     return nil, plainNormal
  76 }
  77 
  78 // plainHandleEscape is used by func plain
  79 func plainHandleEscape(w *bufio.Writer, chunk []byte) ([]byte, plainState) {
  80     if len(chunk) > 0 {
  81         b := chunk[0]
  82         chunk = chunk[1:]
  83 
  84         if b == '[' {
  85             return chunk, plainANSI
  86         } else {
  87             w.WriteByte('\x1b')
  88             w.WriteByte(b)
  89             return chunk, plainNormal
  90         }
  91     }
  92 
  93     return nil, plainEscape
  94 }
  95 
  96 // plainHandleANSI is used by func plain
  97 func plainHandleANSI(chunk []byte) ([]byte, plainState) {
  98     for len(chunk) > 0 {
  99         b := chunk[0]
 100         chunk = chunk[1:]
 101 
 102         // turn lowercase ASCII letters into uppercase ones
 103         b &= ^byte(32)
 104 
 105         if 'A' <= b && b <= 'Z' {
 106             return chunk, plainNormal
 107         }
 108     }
 109 
 110     return nil, plainANSI
 111 }

     File: tu/running.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "errors"
   6     "fmt"
   7     "io"
   8     "strconv"
   9     "strings"
  10 )
  11 
  12 type fallbackIntTool struct {
  13     // tool is the func to run
  14     tool func(w *bufio.Writer, r io.Reader, n int) error
  15 
  16     // n is the default int argument to use, when none was given explicitly
  17     n int
  18 }
  19 
  20 func (fit fallbackIntTool) run(w *bufio.Writer, r io.Reader, args []string) error {
  21     switch len(args) {
  22     case 0:
  23         return fit.tool(w, r, fit.n)
  24 
  25     case 1:
  26         n, err := strconv.Atoi(strings.ReplaceAll(args[0], `_`, ``))
  27         if err != nil {
  28             const fs = `expected an integer argument, but was given %s`
  29             return fmt.Errorf(fs, args[0])
  30         }
  31         return fit.tool(w, r, n)
  32 
  33     case 2:
  34         return handleNamedInput(args[1], r, func(r io.Reader) error {
  35             n, err := strconv.Atoi(strings.ReplaceAll(args[0], `_`, ``))
  36             if err != nil {
  37                 const fs = `expected an integer argument, but was given %s`
  38                 return fmt.Errorf(fs, args[0])
  39             }
  40             return fit.tool(w, r, n)
  41         })
  42 
  43     default:
  44         const fs = `expected at most 1 argument, but was given %d`
  45         return fmt.Errorf(fs, len(args))
  46     }
  47 }
  48 
  49 // toolNameAliases translates synonyms into proper keys in table name2tool;
  50 // unit-tests force it to have self-aliases, as well as entries for all tools
  51 var toolNameAliases = map[string]string{
  52     `after`:          `after`,
  53     `afterlast`:      `afterlast`,
  54     `pastlast`:       `afterlast`,
  55     `aliases`:        `aliases`,
  56     `base64`:         `base64`,
  57     `before`:         `before`,
  58     `beforelast`:     `beforelast`,
  59     `begin`:          `begin`,
  60     `begincsv`:       `begincsv`,
  61     `begintsv`:       `begintsv`,
  62     `bigfiles`:       `bigfiles`,
  63     `blow`:           `blow`,
  64     `expand`:         `blow`,
  65     `book`:           `book`,
  66     `br`:             `breathe`,
  67     `breathe`:        `breathe`,
  68     `bytes`:          `bytes`,
  69     `cat`:            `bytes`,
  70     `choplf`:         `choplf`,
  71     `common`:         `common`,
  72     `compose`:        `compose`,
  73     `intersection`:   `common`,
  74     `coby`:           `countbytes`,
  75     `countbytes`:     `countbytes`,
  76     `crlf`:           `crlf`,
  77     `doslines`:       `crlf`,
  78     `windowslines`:   `crlf`,
  79     `winlines`:       `crlf`,
  80     `csv`:            `csv`,
  81     `csv2tsv`:        `csv2tsv`,
  82     `csvtotsv`:       `csv2tsv`,
  83     `datauri`:        `datauri`,
  84     `datauris`:       `datauri`,
  85     `debase64`:       `debase64`,
  86     `unbase64`:       `debase64`,
  87     `dedent`:         `dedent`,
  88     `dedup`:          `dedup`,
  89     `deduplicate`:    `dedup`,
  90     `unique`:         `dedup`,
  91     `drop`:           `drop`,
  92     `dropall`:        `drop`,
  93     `dropend`:        `dropend`,
  94     `dropsuffix`:     `dropend`,
  95     `dropsuffixes`:   `dropend`,
  96     `dropstart`:      `dropstart`,
  97     `dropprefix`:     `dropstart`,
  98     `dropprefixes`:   `dropstart`,
  99     `droptabs`:       `droptabs`,
 100     `each`:           `each`,
 101     `emptyfiles`:     `emptyfiles`,
 102     `emptyfolders`:   `emptyfolders`,
 103     `end`:            `end`,
 104     `endcsv`:         `endcsv`,
 105     `endtsv`:         `endtsv`,
 106     `examples`:       `examples`,
 107     `allfiles`:       `files`,
 108     `files`:          `files`,
 109     `first`:          `first`,
 110     `limit`:          `first`,
 111     `allfolders`:     `folders`,
 112     `folders`:        `folders`,
 113     `calc`:           `frac`,
 114     `frac`:           `frac`,
 115     `fracal`:         `frac`,
 116     `fracalc`:        `frac`,
 117     `glue`:           `glue`,
 118     `gbs`:            `groupbysize`,
 119     `groupbysize`:    `groupbysize`,
 120     `gz`:             `gzip`,
 121     `gzip`:           `gzip`,
 122     `help`:           `help`,
 123     `hex`:            `hex`,
 124     `id3image`:       `id3pic`,
 125     `id3img`:         `id3pic`,
 126     `id3pic`:         `id3pic`,
 127     `id3pict`:        `id3pic`,
 128     `id3picture`:     `id3pic`,
 129     `mp3image`:       `id3pic`,
 130     `mp3img`:         `id3pic`,
 131     `mp3pic`:         `id3pic`,
 132     `mp3pict`:        `id3pic`,
 133     `mp3picture`:     `id3pic`,
 134     `indent`:         `indent`,
 135     `join`:           `join`,
 136     `j0`:             `json0`,
 137     `jl`:             `jsonl`,
 138     `json0`:          `json0`,
 139     `jsonl`:          `jsonl`,
 140     `jsonlines`:      `jsonl`,
 141     `junk`:           `junk`,
 142     `last`:           `last`,
 143     `lines`:          `lines`,
 144     `unixify`:        `lines`,
 145     `lineup`:         `lineup`,
 146     `hyperlinks`:     `links`,
 147     `links`:          `links`,
 148     `countloc`:       `loc`,
 149     `loc`:            `loc`,
 150     `lower`:          `lower`,
 151     `lowercase`:      `lower`,
 152     `matchpar`:       `matchpara`,
 153     `matchpara`:      `matchpara`,
 154     `matchparagraph`: `matchpara`,
 155     `mpar`:           `matchpara`,
 156     `mpara`:          `matchpara`,
 157     `paramatch`:      `matchpara`,
 158     `parmatch`:       `matchpara`,
 159     `pmatch`:         `matchpara`,
 160     `md5`:            `md5`,
 161     `md5hex`:         `md5`,
 162     `md5sum`:         `md5`,
 163     `missing`:        `missing`,
 164     `mumble`:         `mumble`,
 165     `n`:              `n`,
 166     `droppun`:        `nopun`,
 167     `droppunc`:       `nopun`,
 168     `droppunct`:      `nopun`,
 169     `nopun`:          `nopun`,
 170     `nopunc`:         `nopun`,
 171     `nopunct`:        `nopun`,
 172     `now`:            `now`,
 173     `nil`:            `null`,
 174     `nothing`:        `null`,
 175     `null`:           `null`,
 176     `numbers`:        `numbers`,
 177     `open`:           `open`,
 178     `destyle`:        `plain`,
 179     `plain`:          `plain`,
 180     `plaintext`:      `plain`,
 181     `unstyle`:        `plain`,
 182     `primes`:         `primes`,
 183     `reflow`:         `reflow`,
 184     `reprose`:        `reflow`,
 185     `runes`:          `runes`,
 186     `sbs`:            `sbs`,
 187     `sidebyside`:     `sbs`,
 188     `sha1`:           `sha1`,
 189     `sha1hex`:        `sha1`,
 190     `sha1sum`:        `sha1`,
 191     `sha256`:         `sha256`,
 192     `sha256hex`:      `sha256`,
 193     `sha256sum`:      `sha256`,
 194     `sha512`:         `sha512`,
 195     `sha512hex`:      `sha512`,
 196     `sha512sum`:      `sha512`,
 197     `showit`:         `si`,
 198     `si`:             `si`,
 199     `since`:          `since`,
 200     `sincelast`:      `sincelast`,
 201     `butfirst`:       `skip`,
 202     `skip`:           `skip`,
 203     `butlast`:        `skiplast`,
 204     `skipfirst`:      `skip`,
 205     `skiplast`:       `skiplast`,
 206     `skipempty`:      `skipempty`,
 207     `smallfiles`:     `smallfiles`,
 208     `squeeze`:        `squeeze`,
 209     `ssv`:            `ssv`,
 210     `fields2tsv`:     `ssv2tsv`,
 211     `ssv2tsv`:        `ssv2tsv`,
 212     `fieldstotsv`:    `ssv2tsv`,
 213     `ssvtotsv`:       `ssv2tsv`,
 214     `stomp`:          `stomp`,
 215     `strings`:        `strings`,
 216     `symbol`:         `symbols`,
 217     `symbols`:        `symbols`,
 218     `tally`:          `tally`,
 219     `tldr`:           `tldr`,
 220     `today`:          `today`,
 221     `topfiles`:       `topfiles`,
 222     `topfolders`:     `topfolders`,
 223     `strip`:          `trim`,
 224     `stripend`:       `trimtrail`,
 225     `stripright`:     `trimtrail`,
 226     `striptrail`:     `trimtrail`,
 227     `striptrails`:    `trimtrail`,
 228     `trim`:           `trim`,
 229     `trimend`:        `trimtrail`,
 230     `trimright`:      `trimtrail`,
 231     `trimtrail`:      `trimtrail`,
 232     `trimtrails`:     `trimtrail`,
 233     `trunc`:          `truncate`,
 234     `truncate`:       `truncate`,
 235     `tsv`:            `tsv`,
 236     `debz`:           `unbzip`,
 237     `debz2`:          `unbzip`,
 238     `debzip`:         `unbzip`,
 239     `debzip2`:        `unbzip`,
 240     `unbz`:           `unbzip`,
 241     `unbz2`:          `unbzip`,
 242     `unbzip`:         `unbzip`,
 243     `unbzip2`:        `unbzip`,
 244     `degz`:           `ungzip`,
 245     `degzip`:         `ungzip`,
 246     `ungz`:           `ungzip`,
 247     `ungzip`:         `ungzip`,
 248     `dehex`:          `unhex`,
 249     `unhex`:          `unhex`,
 250     `until`:          `until`,
 251     `upto`:           `until`,
 252     `untillast`:      `untillast`,
 253     `uptolast`:       `untillast`,
 254     `uri`:            `uriencode`,
 255     `uriencode`:      `uriencode`,
 256     `utf8`:           `utf8`,
 257     `vulgarize`:      `vulgarize`,
 258     `with`:           `with`,
 259     `ymd`:            `ymd`,
 260 }
 261 
 262 // name2tool turns canonical names into callable funcs; table toolNameAliases
 263 // adapts many dashless lowercased names into keys/names for this table
 264 var name2tool = map[string]any{
 265     `after`:        after,
 266     `afterlast`:    afterLast,
 267     `aliases`:      aliases,
 268     `base64`:       toBase64,
 269     `before`:       before,
 270     `beforelast`:   beforeLast,
 271     `begin`:        begin,
 272     `begincsv`:     beginCSV,
 273     `begintsv`:     beginTSV,
 274     `bigfiles`:     bigFiles,
 275     `blow`:         fallbackIntTool{blowTabs, 4},
 276     `book`:         book,
 277     `breathe`:      fallbackIntTool{breathe, 5},
 278     `bytes`:        bytesTool,
 279     `choplf`:       chopLF,
 280     `common`:       common,
 281     `compose`:      nil,
 282     `countbytes`:   coby,
 283     `crlf`:         crlf,
 284     `csv`:          csv2lines,
 285     `csv2tsv`:      csv2tsv,
 286     `datauri`:      dataURI,
 287     `debase64`:     debase64,
 288     `dedent`:       dedent,
 289     `dedup`:        dedup,
 290     `drop`:         drop,
 291     `dropend`:      dropEnd,
 292     `dropstart`:    dropStart,
 293     `droptabs`:     dropTabs,
 294     `each`:         nil,
 295     `emptyfiles`:   emptyFiles,
 296     `emptyfolders`: emptyFolders,
 297     `end`:          end,
 298     `endcsv`:       endCSV,
 299     `endtsv`:       endTSV,
 300     `examples`:     showExamples,
 301     `files`:        allFiles,
 302     `first`:        fallbackIntTool{first, 1},
 303     `folders`:      allFolders,
 304     `frac`:         fractions,
 305     `glue`:         glue,
 306     `groupbysize`:  groupByFileSize,
 307     `gzip`:         gzipBytes,
 308     `help`:         showHelp,
 309     `hex`:          toHex,
 310     `id3pic`:       id3Picture,
 311     `indent`:       indent,
 312     `join`:         join,
 313     `json0`:        json0,
 314     `jsonl`:        jsonl,
 315     `junk`:         fallbackIntTool{junk, 1_024},
 316     `last`:         fallbackIntTool{last, 1},
 317     `lines`:        lines,
 318     `lineup`:       fallbackIntTool{lineUp, 0},
 319     `links`:        links,
 320     `loc`:          locTool,
 321     `lower`:        lower,
 322     `matchpara`:    matchParagraphsFold,
 323     `md5`:          md5Hex,
 324     `missing`:      missing,
 325     `mumble`:       nil,
 326     `n`:            fallbackIntTool{numberLines, 1},
 327     `nopun`:        dropPunctuation,
 328     `now`:          now,
 329     `null`:         null,
 330     `numbers`:      numbers,
 331     `open`:         openTool,
 332     `plain`:        plain,
 333     `primes`:       fallbackIntTool{primes, 1_000_000},
 334     `reflow`:       reflow,
 335     `runes`:        runes,
 336     `sbs`:          fallbackIntTool{sbs, 0},
 337     `sha1`:         sha1Hex,
 338     `sha256`:       sha256Hex,
 339     `sha512`:       sha512Hex,
 340     `si`:           si,
 341     `since`:        since,
 342     `sincelast`:    sinceLast,
 343     `skip`:         skip,
 344     `skipempty`:    skipEmpty,
 345     `skiplast`:     fallbackIntTool{skipLast, 1},
 346     `smallfiles`:   smallFiles,
 347     `squeeze`:      squeeze,
 348     `ssv`:          ssv2lines,
 349     `ssv2tsv`:      ssv2tsv,
 350     `stomp`:        stomp,
 351     `strings`:      stringsTool,
 352     `symbols`:      showSymbols,
 353     `tally`:        tally,
 354     `today`:        today,
 355     `tldr`:         showTLDR,
 356     `topfiles`:     topFiles,
 357     `topfolders`:   topFolders,
 358     `trim`:         trimSpaces,
 359     `trimtrail`:    trimTrail,
 360     `truncate`:     truncateRunes,
 361     `tsv`:          tsv2lines,
 362     `unbzip`:       debzip2,
 363     `ungzip`:       degzip,
 364     `unhex`:        unHex,
 365     `until`:        until,
 366     `untillast`:    untilLast,
 367     `uriencode`:    uriEncode,
 368     `utf8`:         toUTF8,
 369     `vulgarize`:    vulgarize,
 370     `with`:         nil,
 371     `ymd`:          ymd,
 372 }
 373 
 374 func addMetaTools() {
 375     // enable special tools which circularly-refer to the tool-lookup table;
 376     // the compiler complains if these are setup in the lookup-table itself
 377     name2tool[`compose`] = compose
 378     name2tool[`each`] = each
 379     name2tool[`mumble`] = mumble
 380     name2tool[`with`] = runWith
 381 }
 382 
 383 func each(w *bufio.Writer, r io.Reader, args []string) error {
 384     if len(args) < 1 {
 385         return errors.New(`expected at least 1 arg, but was given none`)
 386     }
 387 
 388     tool := args[0]
 389     pars := make([]string, 0, len(args))
 390     pars = append(pars, args[1:]...)
 391     return loopLinesString(r, func(line string) error {
 392         return run(w, r, tool, append(pars, line))
 393     })
 394 }
 395 
 396 func mumble(w *bufio.Writer, r io.Reader, args []string) error {
 397     if len(args) < 1 {
 398         return errors.New(`expected at least 1 arg, but was given none`)
 399     }
 400 
 401     for i, s := range args {
 402         if i > 0 {
 403             w.WriteByte(' ')
 404         }
 405         w.WriteString(s)
 406     }
 407     w.WriteByte('\t')
 408     return run(w, r, args[0], args[1:])
 409 }
 410 
 411 // run handles lookup and dispatch for all tools in this app
 412 func run(w io.Writer, r io.Reader, tool string, args []string) error {
 413     key := strings.TrimSpace(tool)
 414 
 415     // handle custom aliases for the `compose` tool
 416     switch key {
 417     case `-`, `--`:
 418         bw := bufio.NewWriterSize(w, bufferSize)
 419         defer bw.Flush()
 420         return compose(bw, r, args)
 421     }
 422 
 423     key = strings.ToLower(key)
 424     key = strings.ReplaceAll(key, `-`, ``)
 425     key = strings.ReplaceAll(key, `_`, ``)
 426     if alias, ok := toolNameAliases[key]; ok {
 427         key = alias
 428     }
 429 
 430     if key == `` {
 431         return errors.New(`no tool-name given`)
 432     }
 433 
 434     fn, ok := name2tool[key]
 435     if !ok {
 436         return fmt.Errorf(`no tool named %q`, tool)
 437     }
 438 
 439     bw := bufio.NewWriterSize(w, bufferSize)
 440     defer bw.Flush()
 441 
 442     switch len(args) {
 443     case 1:
 444         switch fn.(type) {
 445         case
 446             func(line []byte) []byte,
 447             func(w *bufio.Writer, r io.Reader) error,
 448             func(w *bufio.Writer, line []byte) error,
 449             func(w *bufio.Writer, line string) error:
 450             return handleFile(args[0], func(r io.Reader) error {
 451                 return dispatchFunc(bw, r, fn, nil)
 452             })
 453         }
 454 
 455     case 2:
 456         switch fn.(type) {
 457         case
 458             func(line []byte, arg []byte) []byte,
 459             func(line []byte, n int) []byte,
 460             func(w *bufio.Writer, r io.Reader, n int) error:
 461             path := args[len(args)-1]
 462             rest := args[:len(args)-1]
 463             return handleFile(path, func(r io.Reader) error {
 464                 return dispatchFunc(bw, r, fn, rest)
 465             })
 466         }
 467     }
 468 
 469     err := dispatchFunc(bw, r, fn, args)
 470     if err == errNoMoreOutput {
 471         // deliberately quit the app successfully and right away
 472         return nil
 473     }
 474 
 475     if err != nil {
 476         return fmt.Errorf(`%s: %w`, tool, err)
 477     }
 478     return nil
 479 }
 480 
 481 // runWith implements tool `with`
 482 func runWith(w *bufio.Writer, r io.Reader, args []string) error {
 483     if len(args) < 2 {
 484         return fmt.Errorf(`expected at least 2 args, but was given %d`, len(args))
 485     }
 486 
 487     path := args[0]
 488     tool := args[1]
 489     args = args[2:]
 490 
 491     return handleFile(path, func(r io.Reader) error {
 492         return run(w, r, tool, args)
 493     })
 494 }
 495 
 496 // unsupportedFuncType is a custom error type which enables unit-tests to
 497 // automatically check if all entries in the func-dispatch table are callable
 498 type unsupportedFuncType struct {
 499     fn any
 500 }
 501 
 502 // Error implements the error interface
 503 func (uft unsupportedFuncType) Error() string {
 504     return fmt.Sprintf(`unsupported func type %T`, uft.fn)
 505 }
 506 
 507 // dispatchFunc is used by func run, making the latter more readable
 508 func dispatchFunc(w *bufio.Writer, r io.Reader, fn any, args []string) error {
 509     switch tool := fn.(type) {
 510     case func(line []byte) []byte:
 511         return loopLines(r, func(line []byte) error {
 512             w.Write(tool(line))
 513             return endLine(w)
 514         })
 515 
 516     case func(line string) string:
 517         return loopLinesString(r, func(line string) error {
 518             w.WriteString(tool(line))
 519             return endLine(w)
 520         })
 521 
 522     case func(line []byte, arg []byte) []byte:
 523         if len(args) != 1 {
 524             return fmt.Errorf(`expected 1 arg, but was given %d`, len(args))
 525         }
 526         x := []byte(args[0])
 527         return loopLines(r, func(line []byte) error {
 528             w.Write(tool(line, x))
 529             return endLine(w)
 530         })
 531 
 532     case func(line []byte, n int) []byte:
 533         n, err := demandIntegerArg(args)
 534         if err != nil {
 535             return err
 536         }
 537         return loopLines(r, func(line []byte) error {
 538             w.Write(tool(line, n))
 539             return endLine(w)
 540         })
 541 
 542     case func(w *bufio.Writer) error:
 543         return tool(w)
 544 
 545     case func(w *bufio.Writer, args []string) error:
 546         return tool(w, args)
 547 
 548     case func(w *bufio.Writer, r io.Reader) error:
 549         return tool(w, r)
 550 
 551     case func(w *bufio.Writer, r io.Reader, args []string) error:
 552         return tool(w, r, args)
 553 
 554     case func(w *bufio.Writer, line []byte) error:
 555         if len(args) != 0 {
 556             return fmt.Errorf(`expected no args, but was given %d`, len(args))
 557         }
 558         return loopLines(r, func(line []byte) error {
 559             return tool(w, line)
 560         })
 561 
 562     case func(w *bufio.Writer, line string) error:
 563         if len(args) != 0 {
 564             return fmt.Errorf(`expected no args, but was given %d`, len(args))
 565         }
 566         return loopLinesString(r, func(line string) error {
 567             return tool(w, line)
 568         })
 569 
 570     case func(w *bufio.Writer, r io.Reader, n int) error:
 571         n, err := demandIntegerArg(args)
 572         if err != nil {
 573             return err
 574         }
 575         return tool(w, r, n)
 576 
 577     case fallbackIntTool:
 578         return tool.run(w, r, args)
 579 
 580     default:
 581         return unsupportedFuncType{tool}
 582     }
 583 }
 584 
 585 // demandIntegerArg helps various tools handle a single non-optional int arg
 586 func demandIntegerArg(args []string) (int, error) {
 587     if len(args) != 1 {
 588         return 0, fmt.Errorf(`expected 1 argument, but was given %d`, len(args))
 589     }
 590 
 591     n, err := strconv.Atoi(strings.ReplaceAll(args[0], `_`, ``))
 592     if err != nil {
 593         const fs = `expected 1 integer argument, but was given %s`
 594         return n, fmt.Errorf(fs, args[0])
 595     }
 596     return n, nil
 597 }
 598 
 599 // optionalIntegerArg helps various tools handle a single optional int arg
 600 // func optionalIntegerArg(args []string, fallback int) (int, error) {
 601 //  if len(args) == 0 {
 602 //      return fallback, nil
 603 //  }
 604 
 605 //  if len(args) > 1 {
 606 //      const fs = `expected at most 1 argument, but was given %d`
 607 //      return fallback, fmt.Errorf(fs, len(args))
 608 //  }
 609 
 610 //  n, err := strconv.Atoi(strings.ReplaceAll(args[0], `_`, ``))
 611 //  if err != nil {
 612 //      const fs = `expected at most 1 integer argument, but was given %s`
 613 //      return n, fmt.Errorf(fs, args[0])
 614 //  }
 615 //  return n, nil
 616 // }
 617 
 618 // optionalIntegerArg helps various tools handle a single optional int arg
 619 func optionalStringArg(args []string, fallback string) (string, error) {
 620     if len(args) == 0 {
 621         return fallback, nil
 622     }
 623 
 624     if len(args) > 1 {
 625         const fs = `expected at most 1 argument, but was given %d`
 626         return fallback, fmt.Errorf(fs, len(args))
 627     }
 628 
 629     return fallback, nil
 630 }
 631 
 632 // compose runs a chain of commands asynchronously, but still keeping their
 633 // implied I/O order
 634 func compose(w *bufio.Writer, r io.Reader, rest []string) error {
 635     cmds := splitSliceNonEmpty(rest, `--`)
 636     return composeAsyncRec(w, r, cmds)
 637 }
 638 
 639 // composeAsyncRec handles the recursion for func composeAsync; the code to
 640 // merge the error-channels looks slightly `simplifiable`, but trying to do
 641 // so can lead to ugly concurrency bugs; things seem to work, so keep as is
 642 func composeAsyncRec(w io.Writer, r io.Reader, cmds [][]string) error {
 643     if len(cmds) == 0 {
 644         return nil
 645     }
 646 
 647     // check, even if func splitSliceNonEmpty is supposed to prevent this
 648     if len(cmds[0]) == 0 {
 649         return errors.New(`internal error: unexpected empty command-slice`)
 650     }
 651     tool := cmds[0][0]
 652     args := cmds[0][1:]
 653 
 654     // handle the last subcommand/tool in the chain
 655     if len(cmds) == 1 {
 656         return run(w, r, tool, args)
 657     }
 658 
 659     // handle the steps along the way, gathering a single error result
 660     errch := make(chan error)
 661     defer close(errch)
 662 
 663     go func() {
 664         nextpipe, curpipe := io.Pipe()
 665 
 666         curerrch := make(chan error)
 667         defer close(curerrch)
 668         nexterrch := make(chan error)
 669         defer close(nexterrch)
 670 
 671         // start the current task asynchronously
 672         go func() {
 673             // directly using io.Pipe can lead to an astonishing number of
 674             // empty/tiny byte-slices being passed around channels, which
 675             // slows things down considerably when dealing with many data
 676             w := bufio.NewWriterSize(curpipe, bufferSize)
 677 
 678             // ensure clean-up in case current tool panics
 679             defer curpipe.Close()
 680             defer w.Flush()
 681 
 682             // make sequence of all steps explicit, to ensure things are
 683             // happening in the correct order
 684             err := run(w, r, tool, args)
 685             w.Flush()
 686             curpipe.Close()
 687             curerrch <- err
 688         }()
 689 
 690         // start all later tasks asynchronously, by way of recursion
 691         go func() {
 692             // ensure clean-up in case later tools panic
 693             defer nextpipe.Close()
 694 
 695             // make sequence of all steps explicit
 696             err := composeAsyncRec(w, nextpipe, cmds[1:])
 697             nextpipe.Close()
 698             nexterrch <- err
 699         }()
 700 
 701         // wait for completion of all tasks, in any order: this is done
 702         // by waiting for 2 tasks, since the latter of these handles all
 703         // later tasks, by way of recursion
 704 
 705         select {
 706         case err := <-curerrch:
 707             if adaptWriteError(err) != nil {
 708                 // wait for the later tasks to end, ignoring their error
 709                 <-nexterrch
 710 
 711                 // return error from the current task
 712                 errch <- err
 713                 return
 714             }
 715 
 716             // wait for later tasks to end, ignoring their error
 717             errch <- <-nexterrch
 718             return
 719 
 720         case err := <-nexterrch:
 721             // try to explicitly end the current task sooner; multiple
 722             // closures of io.Pipe `values` are allowed: their Close
 723             // funcs do nothing when called after the first time
 724             curpipe.Close()
 725 
 726             if adaptWriteError(err) != nil {
 727                 // wait for current task to end, ignoring its error
 728                 <-curerrch
 729 
 730                 // return error from later tasks
 731                 errch <- err
 732                 return
 733             }
 734 
 735             // wait for current task to end
 736             errch <- <-curerrch
 737             return
 738         }
 739     }()
 740 
 741     // wait for a definitive error/result from the async tasks
 742     return <-errch
 743 }

     File: tu/running_test.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "bytes"
   6     "errors"
   7     "io"
   8     "testing"
   9 )
  10 
  11 func TestToolAliases(t *testing.T) {
  12     for k, v := range toolNameAliases {
  13         t.Run(k, func(t *testing.T) {
  14             if !isValidTableString(k) {
  15                 t.Fatalf(`unexpected string %q in table`, k)
  16                 return
  17             }
  18 
  19             if !isValidTableString(v) {
  20                 t.Fatalf(`unexpected string %q in table`, v)
  21                 return
  22             }
  23 
  24             if _, ok := name2tool[v]; !ok {
  25                 t.Fatalf(`alias %q doesn't lead to a func`, k)
  26                 return
  27             }
  28         })
  29     }
  30 
  31     for k, v := range toolNameAliases {
  32         t.Run(k, func(t *testing.T) {
  33             if !isValidTableString(k) {
  34                 t.Fatalf(`unexpected string %q in table`, k)
  35                 return
  36             }
  37 
  38             if _, ok := toolNameAliases[v]; !ok {
  39                 t.Fatalf(`no self-alias for %q`, v)
  40                 return
  41             }
  42         })
  43     }
  44 }
  45 
  46 func isValidTableString(s string) bool {
  47     for _, r := range s {
  48         if 'a' <= r && r <= 'z' {
  49             continue
  50         }
  51         if '0' <= r && r <= '9' {
  52             continue
  53         }
  54 
  55         return false
  56     }
  57 
  58     return true
  59 }
  60 
  61 func TestToolTable(t *testing.T) {
  62     addMetaTools()
  63 
  64     for name, tool := range name2tool {
  65         // avoid some of the more `interactive` tools, and tools which
  66         // visibly open other apps/files
  67         switch name {
  68         case `si`:
  69             continue
  70         }
  71 
  72         t.Run(name, func(t *testing.T) {
  73             w := bufio.NewWriter(io.Discard)
  74             err := dispatchFunc(w, bytes.NewReader(nil), tool, nil)
  75             if errors.As(err, &unsupportedFuncType{}) {
  76                 t.Fatal(err.Error())
  77                 return
  78             }
  79 
  80             if _, ok := toolNameAliases[name]; !ok {
  81                 t.Fatalf(`no self-alias for %q`, name)
  82                 return
  83             }
  84         })
  85     }
  86 }

     File: tu/sbs.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "bytes"
   6     "errors"
   7     "io"
   8     "math"
   9     "strings"
  10     "unicode/utf8"
  11 )
  12 
  13 const (
  14     // sbsTabStop is the space-count used for tab-expansion
  15     sbsTabStop = 4
  16 
  17     // sbsSeparator is the string put between adjacent columns
  18     sbsSeparator = ` █ `
  19 
  20     // sbsMaxAutoWidth is the output max-width allowed when auto-picking a
  21     // column-count; chosen to fit very old monitors
  22     sbsMaxAutoWidth = 79
  23 )
  24 
  25 // book lays out input lines side-by-side like in a book
  26 func book(w *bufio.Writer, r io.Reader, height int) error {
  27     if height < 2 {
  28         return errors.New(`page-height can't be less than 2`)
  29     }
  30 
  31     c := 0
  32     leftw := 0
  33     rightw := 0
  34 
  35     step := height - 1
  36     var left, right [][]byte
  37 
  38     err := loopLines(r, func(line []byte) error {
  39         i := c
  40         c = (c + 1) % (2 * step)
  41 
  42         // expand all tabs, to avoid any later ambiguity about alignments
  43         if bytes.IndexByte(line, '\t') >= 0 {
  44             var exp bytes.Buffer
  45             sbsExpand(line, sbsTabStop, &exp)
  46             line = exp.Bytes()
  47         } else {
  48             line = bytes.Clone(line)
  49         }
  50 
  51         n := unstyledWidth(line)
  52 
  53         if i < step {
  54             if leftw < n {
  55                 leftw = n
  56             }
  57             left = append(left, line)
  58             return nil
  59         }
  60 
  61         if rightw < n {
  62             rightw = n
  63         }
  64         right = append(right, line)
  65         return nil
  66     })
  67 
  68     if err != nil {
  69         return err
  70     }
  71 
  72     if len(right) == 0 {
  73         for _, s := range left {
  74             w.Write(s)
  75             if err := endLine(w); err != nil {
  76                 return err
  77             }
  78         }
  79         return nil
  80     }
  81 
  82     const sep = ` █ `
  83     dots := strings.Repeat(`·`, leftw+utf8.RuneCountInString(sep)+rightw)
  84 
  85     n := len(left)
  86     if n < len(right) {
  87         n = len(right)
  88     }
  89 
  90     for i := 0; i < n; i++ {
  91         if i%step == 0 && i > 0 {
  92             w.WriteString(dots)
  93             endLine(w)
  94         }
  95 
  96         x := safeIndex(left, i)
  97         w.Write(x)
  98         writeSpaces(w, leftw-unstyledWidth(x))
  99         w.WriteString(sep)
 100         w.Write(safeIndex(right, i))
 101 
 102         err := endLine(w)
 103         if err != nil {
 104             return err
 105         }
 106     }
 107 
 108     return nil
 109 }
 110 
 111 func safeIndex(items [][]byte, i int) []byte {
 112     if i < len(items) {
 113         return items[i]
 114     }
 115     return nil
 116 }
 117 
 118 func sbs(w *bufio.Writer, r io.Reader, ncols int) error {
 119     var lines [][]byte
 120     err := loopLines(r, func(line []byte) error {
 121         // expand all tabs, to avoid any later ambiguity about alignments
 122         if bytes.IndexByte(line, '\t') >= 0 {
 123             var exp bytes.Buffer
 124             sbsExpand(line, sbsTabStop, &exp)
 125             lines = append(lines, exp.Bytes())
 126             return nil
 127         }
 128 
 129         lines = append(lines, bytes.Clone(line))
 130         return nil
 131     })
 132 
 133     if err != nil {
 134         return err
 135     }
 136 
 137     // choose a default number of columns, if not given a positive one
 138     if ncols < 1 {
 139         ncols = sbsChooseNumColumns(lines)
 140     }
 141     return sideBySide(w, lines, ncols)
 142 }
 143 
 144 // handleLines handles the use-case of showing/rearranging lines from a
 145 // single input source (presumably standard input) into several columns
 146 func sideBySide(w *bufio.Writer, lines [][]byte, ncols int) error {
 147     if ncols < 1 {
 148         return nil
 149     }
 150 
 151     if ncols == 1 {
 152         for _, s := range lines {
 153             w.Write(s)
 154             err := w.WriteByte('\n')
 155             if err != nil {
 156                 // assume error probably results from a closed stdout
 157                 // pipe, so quit the app right away without complaining
 158                 return err
 159             }
 160         }
 161         return nil
 162     }
 163 
 164     // nothing to show, so don't even bother
 165     if len(lines) == 0 {
 166         return nil
 167     }
 168 
 169     cols, height := splitLines(lines, ncols)
 170     widths := make([]int, 0, len(cols))
 171     for _, c := range cols {
 172         // find the max width of all lines of the current column
 173         maxw := 0
 174         for _, v := range c {
 175             w := width(v)
 176             if w > maxw {
 177                 maxw = w
 178             }
 179         }
 180 
 181         widths = append(widths, maxw)
 182     }
 183 
 184     // endSep is right-trimmed to avoid unneeded trailing spaces on output
 185     // lines whose last column is an empty/missing input line
 186     endSep := strings.TrimRight(sbsSeparator, ` `)
 187 
 188     // show columns side by side
 189     for r := 0; r < height; r++ {
 190         for c := 0; c < len(cols); c++ {
 191             badr := r >= len(cols[c])
 192 
 193             // clearly separate columns visually
 194             if c > 0 {
 195                 if c == len(cols)-1 && (badr || len(cols[c][r]) == 0) {
 196                     // avoid unneeded trailing spaces
 197                     w.WriteString(endSep)
 198                 } else {
 199                     w.WriteString(sbsSeparator)
 200                 }
 201             }
 202 
 203             if badr {
 204                 // exceeding items for this (last) column
 205                 continue
 206             }
 207 
 208             // pad all columns, except the last
 209             width := 0
 210             if c < len(cols)-1 {
 211                 width = widths[c]
 212             }
 213 
 214             // emit maybe-padded column
 215             writeItem(w, cols[c][r], width)
 216         }
 217 
 218         // end the line
 219         err := w.WriteByte('\n')
 220         if err != nil {
 221             // probably a pipe was closed
 222             return nil
 223         }
 224     }
 225 
 226     return nil
 227 }
 228 
 229 // sbsExpand replaces all tabs with correctly-padded tabstops, turning all tabs
 230 // each into 1 or more spaces, as appropriate
 231 func sbsExpand(s []byte, tabstop int, sb *bytes.Buffer) {
 232     sb.Reset()
 233     numrunes := 0
 234 
 235     for _, b := range s {
 236         switch b {
 237         case '\t':
 238             numspaces := tabstop - numrunes%tabstop
 239             for i := 0; i < numspaces; i++ {
 240                 sb.WriteRune(' ')
 241             }
 242             numrunes += numspaces
 243 
 244         default:
 245             sb.WriteByte(b)
 246             numrunes++
 247         }
 248     }
 249 }
 250 
 251 // width calculates visually-correct string widths
 252 func width(s []byte) int {
 253     return utf8.RuneCount(s) - ansiLength(s)
 254 }
 255 
 256 // ansiLength calculates how many bytes ANSI-codes take in the string given:
 257 // func width uses this to calculate visually-correct string widths
 258 func ansiLength(s []byte) int {
 259     n := 0
 260     var prev byte
 261     ansi := false
 262 
 263     for _, r := range s {
 264         if ansi {
 265             n++
 266         }
 267 
 268         if ansi && r == 'm' {
 269             ansi = false
 270             continue
 271         }
 272 
 273         if prev == '\x1b' && r == '[' {
 274             n += 2 // count the 2-item starter-sequence `\x1b[`
 275             ansi = true
 276         }
 277         prev = r
 278     }
 279 
 280     return n
 281 }
 282 
 283 // splitLines turns an array of lines into sub-arrays of lines, so they can
 284 // be shown side by side later on
 285 func splitLines(lines [][]byte, ncols int) (cols [][][]byte, maxheight int) {
 286     n := ncols
 287     hfrac := float64(len(lines)) / float64(n)
 288     h := int(math.Ceil(hfrac))
 289 
 290     cols = make([][][]byte, 0, n)
 291     for len(lines) > h {
 292         cols = append(cols, lines[:h])
 293         lines = lines[h:]
 294     }
 295     if len(lines) != 0 {
 296         cols = append(cols, lines)
 297     }
 298     return cols, h
 299 }
 300 
 301 // padWrite emits the string given, following it with spaces to fill the
 302 // width given if string is shorter than that
 303 func padWrite(w *bufio.Writer, s []byte, n int) {
 304     w.Write(s)
 305     writeSpaces(w, n-width(s))
 306 }
 307 
 308 // writeItem emits the string given, followed by any padding needed, as well
 309 // as ANSI-style clearing, again if needed
 310 func writeItem(w *bufio.Writer, s []byte, width int) {
 311     padWrite(w, s, width)
 312     if needsStyleReset(s) {
 313         w.WriteString("\x1b[0m")
 314     }
 315 }
 316 
 317 func needsStyleReset(s []byte) bool {
 318     return true &&
 319         bytes.Contains(s, []byte{'\x1b', '['}) &&
 320         !bytes.HasSuffix(s, []byte{'\x1b', '[', '0', 'm'})
 321 }
 322 
 323 // sbsChooseNumColumns implements heuristics to auto-pick the number of columns
 324 // to show: this func is used when the app is using data from standard-input
 325 func sbsChooseNumColumns(lines [][]byte) int {
 326     if len(lines) == 0 {
 327         return 1
 328     }
 329 
 330     // sepw is the separator width
 331     sepw := utf8.RuneCountInString(sbsSeparator)
 332 
 333     // see if lines can even fit a single column
 334     if !sbsColumnsCanFit(1, lines, sepw) {
 335         return 1
 336     }
 337 
 338     // starting from the max possible columns which may fit, keep trying
 339     // with 1 fewer column, until the columns fit
 340     for ncols := int(sbsMaxAutoWidth / sepw); ncols > 1; ncols-- {
 341         if sbsColumnsCanFit(ncols, lines, sepw) {
 342             // success: found the most columns which fit
 343             return ncols
 344         }
 345     }
 346 
 347     // avoid multiple columns if some lines are too wide
 348     return 1
 349 }
 350 
 351 // sbsColumnsCanFit checks whether the number of columns given would fit the
 352 // display max-width constant
 353 func sbsColumnsCanFit(ncols int, lines [][]byte, gap int) bool {
 354     if ncols < 1 {
 355         // avoid surprises when called with non-sense column counts
 356         return true
 357     }
 358 
 359     // stack-allocate the backing-array behind slice maxw
 360     var buf [sbsMaxAutoWidth / 2]int
 361     maxw := buf[:0]
 362 
 363     // find the column max-height, to chunk lines into columns
 364     h := int(math.Ceil(float64(len(lines)) / float64(ncols)))
 365 
 366     // find column max-width by looping over chunks of lines
 367     for len(lines) >= h {
 368         w := findMaxWidth(lines[:h])
 369         maxw = append(maxw, w)
 370         lines = lines[h:]
 371     }
 372 
 373     // don't forget the last column
 374     if len(lines) > 0 {
 375         w := findMaxWidth(lines)
 376         maxw = append(maxw, w)
 377     }
 378 
 379     // remember to add the gaps/separators between columns, along with
 380     // all the individual column max-widths
 381     w := (ncols - 1) * gap
 382     for _, n := range maxw {
 383         w += n
 384     }
 385 
 386     // do the columns fit?
 387     return w <= sbsMaxAutoWidth
 388 }
 389 
 390 // findMaxWidth finds the max width in the slice given, ignoring ANSI codes
 391 func findMaxWidth(lines [][]byte) int {
 392     maxw := 0
 393     for _, s := range lines {
 394         w := width(s)
 395         if w > maxw {
 396             maxw = w
 397         }
 398     }
 399     return maxw
 400 }

     File: tu/si.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "bytes"
   6     "encoding/base64"
   7     "errors"
   8     "fmt"
   9     "io"
  10     "net"
  11     "strings"
  12 )
  13 
  14 const (
  15     // beforeAudio starts HTML webpage with just an audio player
  16     beforeAudio = `<!DOCTYPE html>
  17 <html>
  18 <head>
  19   <meta charset="UTF-8">
  20   <link rel="icon" href="data:,">
  21   <title>wave sound</title>
  22   <style>
  23     body { margin: 2rem auto; width: 90vw; }
  24     audio { margin: auto; width: 100%; }
  25   </style>
  26 </head>
  27 <body>
  28   <audio controls autofocus src="`
  29 
  30     // beforeAutoplayAudio starts HTML webpage with just an audio player
  31     // in autoplay mode
  32     beforeAutoplayAudio = `<!DOCTYPE html>
  33     <html>
  34     <head>
  35       <meta charset="UTF-8">
  36       <link rel="icon" href="data:,">
  37       <title>wave sound</title>
  38       <style>
  39         body { margin: 2rem auto; width: 90vw; }
  40         audio { margin: auto; width: 100%; }
  41       </style>
  42     </head>
  43     <body>
  44       <audio controls autofocus autoplay src="`
  45 
  46     // afterAudio ends HTML webpage with just an audio player
  47     afterAudio = "\"></audio>\n</body>\n</html>\n"
  48 
  49     // beforeBitmap starts HTML webpage with just an image
  50     beforeBitmap = `<!DOCTYPE html>
  51 <html>
  52 <head>
  53   <meta charset="UTF-8">
  54   <link rel="icon" href="data:,">
  55   <title>bitmap image</title>
  56   <style>
  57     body { margin: 0.5rem auto; width: 90vw; }
  58     img { margin: auto; width: 100%; }
  59   </style>
  60 </head>
  61 <body>
  62   <img src="`
  63 
  64     // afterBitmap ends HTML webpage with just an image
  65     afterBitmap = "\"></img>\n</body>\n</html>\n"
  66 )
  67 
  68 // si implements the `Show It` tool
  69 func si(w *bufio.Writer, r io.Reader, names []string) error {
  70     if len(names) > 0 {
  71         return openTool(w, r, names)
  72     }
  73 
  74     for _, s := range names {
  75         // handle data-URIs
  76         if strings.HasPrefix(s, `data:`) && strings.Contains(s, `;base64,`) {
  77             if err := popup(s); err != nil {
  78                 return err
  79             }
  80 
  81             if err := siHandleInput(strings.NewReader(s)); err != nil {
  82                 return err
  83             }
  84 
  85             continue
  86         }
  87 
  88         if err := popupName(s); err != nil {
  89             return err
  90         }
  91     }
  92 
  93     if len(names) == 0 {
  94         return siHandleInput(r)
  95     }
  96     return nil
  97 }
  98 
  99 // siHandleInput specifically handles stdin and data-URIs
 100 func siHandleInput(r io.Reader) error {
 101     // before starting the single-request server, try to detect the MIME type
 102     // by inspecting the first bytes of the stream and matching known filetype
 103     // starting patterns
 104     var buf [64]byte
 105     n, err := r.Read(buf[:])
 106     if err != nil && err != io.EOF {
 107         return err
 108     }
 109     start := buf[:n]
 110 
 111     // handle data-URI-like inputs
 112     if bytes.HasPrefix(start, []byte(`data:`)) {
 113         if bytes.Contains(start, []byte(`;base64,`)) {
 114             return siHandleDataURI(start, r)
 115         }
 116     }
 117 
 118     // handle regular data, trying to auto-detect its MIME type using
 119     // its first few bytes
 120     mime, ok := detectMIME(start)
 121     if !ok || len(mime) == 0 {
 122         mime = `text/plain`
 123     }
 124 
 125     // remember to precede the partly-used reader with the starting bytes;
 126     // give a negative/invalid filesize hint, since stream is single-use
 127     const autoplay = true
 128     return serveOnce(start, r, serveConfig{
 129         ContentType:   mime,
 130         ContentLength: -1,
 131         Autoplay:      autoplay,
 132     })
 133 }
 134 
 135 // siHandleDataURI handles data-URIs for func handleInput
 136 func siHandleDataURI(start []byte, r io.Reader) error {
 137     const autoplay = true
 138     if !bytes.HasPrefix(start, []byte(`data:`)) {
 139         return errors.New(`invalid data-URI`)
 140     }
 141 
 142     i := bytes.Index(start, []byte(`;base64,`))
 143     if i < 0 {
 144         return errors.New(`invalid data-URI`)
 145     }
 146 
 147     // force browser to play wave and aiff sounds, instead of
 148     // showing a useless download-file option
 149     switch mime := string(start[len(`data:`):i]); mime {
 150     case `audio/wav`, `audio/wave`, `audio/x-wav`, `audio/aiff`, `audio/x-aiff`:
 151         before := beforeAudio
 152         if autoplay {
 153             before = beforeAutoplayAudio
 154         }
 155 
 156         // surround URI-encoded audio data with a web page only having
 157         // a media player in it: this is necessary for wave and aiff
 158         // sounds, since web browsers may insist on a useless download
 159         // option for those media types
 160         r = io.MultiReader(
 161             strings.NewReader(before),
 162             bytes.NewReader(start),
 163             r,
 164             strings.NewReader(afterAudio),
 165         )
 166 
 167         return serveOnce(nil, r, serveConfig{
 168             ContentType:   `text/html; charset=UTF-8`,
 169             ContentLength: -1,
 170             Autoplay:      autoplay,
 171         })
 172 
 173     case `image/bmp`, `audio/x-bmp`:
 174         // surround URI-encoded bitmap data with a web page only having
 175         // an image element in it: this is necessary for bitmap pictures,
 176         // since web browsers may insist on a useless download option for
 177         // that media type
 178         r = io.MultiReader(
 179             strings.NewReader(beforeBitmap),
 180             bytes.NewReader(start),
 181             r,
 182             strings.NewReader(afterBitmap),
 183         )
 184 
 185         return serveOnce(nil, r, serveConfig{
 186             ContentType:   `text/html; charset=UTF-8`,
 187             ContentLength: -1,
 188             Autoplay:      autoplay,
 189         })
 190 
 191     default:
 192         start = start[i+len(`;base64,`):]
 193         r = io.MultiReader(bytes.NewReader(start), r)
 194         dec := base64.NewDecoder(base64.URLEncoding, r)
 195 
 196         // give a negative/invalid filesize hint, since stream is single-use
 197         return serveOnce(nil, dec, serveConfig{
 198             ContentType:   mime,
 199             ContentLength: -1,
 200             Autoplay:      autoplay,
 201         })
 202     }
 203 }
 204 
 205 // serveConfig has all details func serveOnce needs
 206 type serveConfig struct {
 207     // ContentType is the MIME type of what's being served
 208     ContentType string
 209 
 210     // ContentLength is the byte-count of what's being served; negative
 211     // values are ignored
 212     ContentLength int
 213 
 214     // Autoplay autoplays audio/video data from stdin
 215     Autoplay bool
 216 }
 217 
 218 // serveOnce literally serves a single web request and no more
 219 func serveOnce(start []byte, rest io.Reader, cfg serveConfig) error {
 220     // pick a random port from the currently-available ones
 221     srv, err := net.Listen(`tcp`, `127.0.0.1:0`)
 222     if err != nil {
 223         return err
 224     }
 225     defer srv.Close()
 226 
 227     // open a new browser tab for that localhost port
 228     err = popup(fmt.Sprintf(`http://%s`, srv.Addr().String()))
 229     if err != nil {
 230         return err
 231     }
 232 
 233     // accept first connection: no need for async as the server quits after
 234     // its first response
 235     conn, err := srv.Accept()
 236     if err != nil {
 237         return err
 238     }
 239     defer conn.Close()
 240 
 241     respond(conn, start, rest, cfg)
 242     return nil
 243 }
 244 
 245 // respond reads/ignores all request headers, and then replies with some
 246 // content given, quitting immediately after
 247 func respond(conn net.Conn, start []byte, rest io.Reader, cfg serveConfig) {
 248     // maxbufsize is the max capacity the HTTP-protocol line-scanners are
 249     // allowed to reach
 250     const maxbufsize = 128 * 1024
 251 
 252     sc := bufio.NewScanner(conn)
 253     sc.Buffer(nil, maxbufsize)
 254     for sc.Scan() && sc.Text() != `` {
 255         // ignore all request headers
 256     }
 257 
 258     switch cfg.ContentType {
 259     case `audio/wav`, `audio/wave`, `audio/x-wav`, `audio/aiff`, `audio/x-aiff`:
 260         // force browser to play wave and aiff sounds, instead of showing
 261         // a useless download-file option; encode audio bytes as data-URI
 262         // in an intermediate buffer
 263 
 264         writePreludeHTTP(conn, `text/html; charset=UTF-8`, -1)
 265         // emit opening HTML right until <audio controls src="
 266         if cfg.Autoplay {
 267             fmt.Fprint(conn, beforeAutoplayAudio)
 268         } else {
 269             fmt.Fprint(conn, beforeAudio)
 270         }
 271         // emit the data-URI
 272         writeBase64(conn, cfg.ContentType, start, rest)
 273         // emit closing HTML after data-URI audio
 274         fmt.Fprint(conn, afterAudio)
 275         return
 276 
 277     case `image/bmp`, `image/x-bmp`:
 278         // force browser to show bitmap pictures, instead of showing a
 279         // useless download-file option; encode picture bytes as data-URI
 280         // in an intermediate buffer
 281 
 282         writePreludeHTTP(conn, `text/html; charset=UTF-8`, -1)
 283         // emit opening HTML right until <img src="
 284         fmt.Fprint(conn, beforeBitmap)
 285         // emit the data-URI
 286         writeBase64(conn, cfg.ContentType, start, rest)
 287         // emit closing HTML after data-URI image
 288         fmt.Fprint(conn, afterBitmap)
 289         return
 290 
 291     default:
 292         writePreludeHTTP(conn, cfg.ContentType, cfg.ContentLength)
 293         // send the starting bytes used to auto-detect the content-type
 294         conn.Write(start)
 295         // send rest of payload at light-speed
 296         io.Copy(conn, rest)
 297     }
 298 }
 299 
 300 func writePreludeHTTP(conn net.Conn, contentType string, contentLength int) {
 301     // respond right after the first empty line, which always follows the
 302     // request's headers
 303     fmt.Fprint(conn, "HTTP/1.1 200 OK\r\n")
 304     fmt.Fprintf(conn, "Content-Type: %s\r\n", contentType)
 305     if contentLength > 0 {
 306         fmt.Fprintf(conn, "Content-Length: %d\r\n", contentLength)
 307     }
 308 
 309     // prevent download-dialog or auto-download from the browser's part
 310     fmt.Fprintf(conn, "Content-Disposition: inline\r\n")
 311     // tell browser this is the last request
 312     fmt.Fprint(conn, "Connection: close\r\n")
 313     // payload starts right after an empty line
 314     fmt.Fprint(conn, "\r\n")
 315 }
 316 
 317 func writeBase64(conn net.Conn, mimeType string, start []byte, rest io.Reader) {
 318     // send the data-URI intro
 319     fmt.Fprintf(conn, `data:%s;base64,`, mimeType)
 320     enc := base64.NewEncoder(base64.StdEncoding, conn)
 321     // base64-encode the starting bytes used to auto-detect the input type
 322     enc.Write(start)
 323     // base64-encode the rest of the input
 324     io.Copy(enc, rest)
 325     enc.Close()
 326 }

     File: tu/strings.go
   1 package main
   2 
   3 import (
   4     "errors"
   5     "math"
   6     "os"
   7     "strconv"
   8     "strings"
   9 )
  10 
  11 func countDecimals(s string) int {
  12     if i := strings.LastIndexByte(s, '.'); i >= 0 {
  13         return len(s) - 1 - i
  14     }
  15     return 0
  16 }
  17 
  18 // parseInt makes using func parseInt64 more convenient
  19 func parseInt(s string) (int, error) {
  20     n, err := parseInt64(s)
  21     return int(n), err
  22 }
  23 
  24 // parseInt64 is basically a more flexible strconv.Atoi, since it supports
  25 // the handy floating-point notation for large numbers, and it ignores
  26 // underscores as well; when exposed to user-given args/input, it makes
  27 // using this app a much more pleasant experience overall
  28 func parseInt64(s string) (int64, error) {
  29     f, err := strconv.ParseFloat(s, 64)
  30     if err == nil && !math.IsNaN(f) && !math.IsInf(f, 0) {
  31         return int64(f), nil
  32     }
  33     return strconv.ParseInt(s, 10, 64)
  34 }
  35 
  36 // parsePortNumber handles the details specific to parsing valid port numbers;
  37 // this func refuses to accept 0 as a valid port number
  38 func parsePortNumber(s string) (int, error) {
  39     port, err := parseInt(s)
  40     if err != nil {
  41         return port, err
  42     }
  43 
  44     if port < 1 || port > 65_535 {
  45         msg := strconv.Itoa(port) + ` is an invalid port number`
  46         return port, errors.New(msg)
  47     }
  48     return port, nil
  49 }
  50 
  51 // splitSliceNonEmpty does what it says, ensuring no subslice in the result
  52 // is empty; empty slices return empty results
  53 func splitSliceNonEmpty(items []string, sep string) [][]string {
  54     cur := items
  55     var res [][]string
  56 
  57     for len(cur) > 0 {
  58         // skip all leading separators, also ensuring no empty subslices
  59         // sneak thru the splitting happending below
  60         for len(cur) > 0 && cur[0] == sep {
  61             cur = cur[1:]
  62         }
  63 
  64         i := findNext(cur, sep)
  65         // no more subslices, or the very last subslice follows
  66         if i < 0 {
  67             // don't forget trailing subslices, after the last separator
  68             if len(cur) > 0 {
  69                 res = append(res, cur)
  70             }
  71             return res
  72         }
  73 
  74         // ignore empty subslices
  75         if i == 0 {
  76             continue
  77         }
  78 
  79         res = append(res, cur[:i])
  80         cur = cur[i+1:]
  81     }
  82 
  83     return res
  84 }
  85 
  86 func startsWith(s string, what []byte) bool {
  87     if len(s) < len(what) {
  88         return false
  89     }
  90 
  91     for i := 0; i < len(s); i++ {
  92         if s[i] != what[i] {
  93             return false
  94         }
  95     }
  96     return true
  97 }
  98 
  99 // findNext finds a string in a string-slice, returning an invalid negative
 100 // index on failure
 101 func findNext(src []string, what string) int {
 102     for i, s := range src {
 103         if s == what {
 104             return i
 105         }
 106     }
 107     return -1
 108 }
 109 
 110 // unixSlashes ensures paths written to the output use unix-style slashes
 111 // on all systems
 112 func unixSlashes(path string) string {
 113     if os.PathSeparator == '\\' {
 114         return strings.ReplaceAll(path, `\`, `/`)
 115     }
 116     return path
 117 }
 118 
 119 // stringRing is a circular/ring-buffer for strings, where adding new items
 120 // eventually starts to overwrite earlier-added ones
 121 type stringRing struct {
 122     items []string
 123     next  int
 124 }
 125 
 126 // newStringRing is the constructor for type stringRing; the capacity given
 127 // to it can't be less than 1
 128 func newStringRing(cap int) stringRing {
 129     if cap < 1 {
 130         cap = 1
 131     }
 132     return stringRing{make([]string, 0, cap), 0}
 133 }
 134 
 135 // Len is the current item-count in the ring, which can never exceed the
 136 // max-capacity it was setup with
 137 func (sr stringRing) Len() int {
 138     return len(sr.items)
 139 }
 140 
 141 // Earliest gives you the earliest-added item still hanging in the ring
 142 func (sr stringRing) Earliest() string {
 143     if len(sr.items) > 0 {
 144         return sr.items[sr.next%len(sr.items)]
 145     }
 146     return ``
 147 }
 148 
 149 // Bring used to be called `Put`, but the new name rhymes
 150 func (sr *stringRing) Bring(s string) {
 151     if len(sr.items) < cap(sr.items) {
 152         sr.items = append(sr.items, s)
 153         sr.next++
 154         return
 155     }
 156 
 157     i := sr.next % cap(sr.items)
 158     sr.items[i] = s
 159     sr.next = i + 1
 160 }
 161 
 162 // Halves is a simple way to handle items in order, by treating the slice
 163 // as 2 separate sub-slices, each of which is ordered
 164 func (sr *stringRing) Halves() (first, second []string) {
 165     if sr.next > 0 {
 166         return sr.items[sr.next:], sr.items[:sr.next]
 167     }
 168     return sr.items, nil
 169 }

     File: tu/symbols.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "errors"
   6     "io"
   7     "sort"
   8     "strings"
   9 )
  10 
  11 const (
  12     faces = `` +
  13         `😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏😐😑😒😓` +
  14         `😔😕😖😗😘😙😚😛😜😝😞😟😠😡😢😣😤😥😦😧` +
  15         `😨😩😪😫😬😭😮😯😰😱😲😳😴😵😶😷🙁🙂🙃🙄` +
  16         `🧐👶🤓🤐🤑🤒🤔🤕🤗🤠🤡🤢🤣🤤🤥🤧🤨🤩🤪🤫` +
  17         `🤬🤭🤮🤯`
  18 )
  19 
  20 // `crlf`:       "\r\n",
  21 // `lf`:         "\n",
  22 // `linefeed`:   "\n",
  23 // `tab`:        "\t",
  24 
  25 // names2symbols translates common names for common symbols
  26 var names2symbols = map[string]string{
  27     `adash`:      `-`,
  28     `amp`:        `&`,
  29     `ampersand`:  `&`,
  30     `apostrophe`: `’`,
  31     `ast`:        `*`,
  32     `asterisk`:   `*`,
  33     `backquote`:  "`",
  34     `backslash`:  `\`,
  35     `ball`:       `●`,
  36     `bang`:       `!`,
  37     `block`:      `█`,
  38     `bquote`:     "`",
  39     `bslash`:     `\`,
  40     `bullet`:     `•`,
  41     `caret`:      `^`,
  42     `colon`:      `:`,
  43     `comma`:      `,`,
  44     `commaspace`: `, `,
  45     `cquote`:     `”`,
  46     `cdot`:       `·`,
  47     `circle`:     `●`,
  48     `cloud`:      `☁️`,
  49     `copyright`:  `©`,
  50     `cross`:      `×`,
  51     `dash`:       `–`,
  52     `dollar`:     `$`,
  53     `dot`:        `.`,
  54     `dquote`:     `"`,
  55     `ellip`:      `…`,
  56     `ellipsis`:   `…`,
  57     `eq`:         `=`,
  58     `equal`:      `=`,
  59     `equals`:     `=`,
  60     `euro`:       `€`,
  61     `excl`:       `!`,
  62     `happy`:      `😀`,
  63     `hash`:       `#`,
  64     `heart`:      `❤️`,
  65     `hellip`:     `…`,
  66     `hole`:       `○`,
  67     `lightning`:  `🌩️`,
  68     `mdot`:       `·`,
  69     `minus`:      `-`,
  70     `moon`:       `🌕`,
  71     `oquote`:     `“`,
  72     `space`:      ` `,
  73     `percent`:    `%`,
  74     `pipe`:       `|`,
  75     `plus`:       `+`,
  76     `rain`:       `🌧️`,
  77     `semi`:       `;`,
  78     `semicolon`:  `;`,
  79     `sharp`:      `#`,
  80     `shit`:       `💩`,
  81     `slash`:      `/`,
  82     `slasher`:    `⧸`,
  83     `smile`:      `🙂`,
  84     `snow`:       `❄️`,
  85     `square`:     `■`,
  86     `squote`:     `'`,
  87     `star`:       `⭐`,
  88     `sun`:        `☀️`,
  89     `tilde`:      `~`,
  90     `underscore`: `_`,
  91     `vbar`:       `|`,
  92 
  93     `digits`:       `0123456789`,
  94     `faces`:        faces,
  95     `greek`:        `ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρστυφχψω`,
  96     `infinity`:     `∞`,
  97     `latin`:        `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz`,
  98     `letters`:      `ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz`,
  99     `lower`:        `abcdefghijklmnopqrstuvwxyz`,
 100     `loweralpha`:   `α`,
 101     `lowerbeta`:    `β`,
 102     `lowerdelta`:   `δ`,
 103     `lowerepsilon`: `ε`,
 104     `lowergamma`:   `γ`,
 105     `lowerhex`:     `0123456789abcdef`,
 106     `lowerlambda`:  `λ`,
 107     `lowerlatin`:   `abcdefghijklmnopqrstuvwxyz`,
 108     `loweromega`:   `ω`,
 109     `lowerpi`:      `π`,
 110     `lowersigma`:   `σ`,
 111     `lowertau`:     `τ`,
 112     `lowertheta`:   `θ`,
 113     `lowergreek`:   `αβγδεζηθικλμνξοπρστυφχψω`,
 114     `math`:         `+-×÷²³±`,
 115     `other`:        `✓✗✔❌`,
 116     `plusminus`:    `±`,
 117     `product`:      `Π`,
 118     `punctuation`:  `!"#$%&'()*+,-./:;<=>?@[\]^_` + "`" + `{|}~`,
 119     `sum`:          `Σ`,
 120     `upper`:        `ABCDEFGHIJKLMNOPQRSTUVWXYZ`,
 121     `uppergreek`:   `ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ`,
 122     `upperhex`:     `0123456789ABCDEF`,
 123     `upperlatin`:   `ABCDEFGHIJKLMNOPQRSTUVWXYZ`,
 124 
 125     `eu`:  `🇪🇺`,
 126     `eur`: `🇪🇺`,
 127 
 128     `af`: `🇦🇫`,
 129     `ax`: `🇦🇽`,
 130     `al`: `🇦🇱`,
 131     `dz`: `🇩🇿`,
 132     `as`: `🇦🇸`,
 133     `ad`: `🇦🇩`,
 134     `ao`: `🇦🇴`,
 135     `ai`: `🇦🇮`,
 136     `aq`: `🇦🇶`,
 137     `ag`: `🇦🇬`,
 138     `ar`: `🇦🇷`,
 139     `am`: `🇦🇲`,
 140     `aw`: `🇦🇼`,
 141     `au`: `🇦🇺`,
 142     `at`: `🇦🇹`,
 143     `az`: `🇦🇿`,
 144     `bs`: `🇧🇸`,
 145     `bh`: `🇧🇭`,
 146     `bd`: `🇧🇩`,
 147     `bb`: `🇧🇧`,
 148     `by`: `🇧🇾`,
 149     `be`: `🇧🇪`,
 150     `bz`: `🇧🇿`,
 151     `bj`: `🇧🇯`,
 152     `bm`: `🇧🇲`,
 153     `bt`: `🇧🇹`,
 154     `bo`: `🇧🇴`,
 155     `bq`: `🇧🇶`,
 156     `ba`: `🇧🇦`,
 157     `bw`: `🇧🇼`,
 158     `bv`: `🇧🇻`,
 159     `br`: `🇧🇷`,
 160     `io`: `🇮🇴`,
 161     `bn`: `🇧🇳`,
 162     `bg`: `🇧🇬`,
 163     `bf`: `🇧🇫`,
 164     `bi`: `🇧🇮`,
 165     `cv`: `🇨🇻`,
 166     `kh`: `🇰🇭`,
 167     `cm`: `🇨🇲`,
 168     `ca`: `🇨🇦`,
 169     `ky`: `🇰🇾`,
 170     `cf`: `🇨🇫`,
 171     `td`: `🇹🇩`,
 172     `cl`: `🇨🇱`,
 173     `cn`: `🇨🇳`,
 174     `cx`: `🇨🇽`,
 175     `cc`: `🇨🇨`,
 176     `co`: `🇨🇴`,
 177     `km`: `🇰🇲`,
 178     `cd`: `🇨🇩`,
 179     `cg`: `🇨🇬`,
 180     `ck`: `🇨🇰`,
 181     `cr`: `🇨🇷`,
 182     `ci`: `🇨🇮`,
 183     `hr`: `🇭🇷`,
 184     `cu`: `🇨🇺`,
 185     `cw`: `🇨🇼`,
 186     `cy`: `🇨🇾`,
 187     `cz`: `🇨🇿`,
 188     `dk`: `🇩🇰`,
 189     `dj`: `🇩🇯`,
 190     `dm`: `🇩🇲`,
 191     `do`: `🇩🇴`,
 192     `ec`: `🇪🇨`,
 193     `eg`: `🇪🇬`,
 194     `sv`: `🇸🇻`,
 195     `gq`: `🇬🇶`,
 196     `er`: `🇪🇷`,
 197     `ee`: `🇪🇪`,
 198     `sz`: `🇸🇿`,
 199     `et`: `🇪🇹`,
 200     `fk`: `🇫🇰`,
 201     `fo`: `🇫🇴`,
 202     `fj`: `🇫🇯`,
 203     `fi`: `🇫🇮`,
 204     `fr`: `🇫🇷`,
 205     `gf`: `🇬🇫`,
 206     `pf`: `🇵🇫`,
 207     `tf`: `🇹🇫`,
 208     `ga`: `🇬🇦`,
 209     `gm`: `🇬🇲`,
 210     `ge`: `🇬🇪`,
 211     `de`: `🇩🇪`,
 212     `gh`: `🇬🇭`,
 213     `gi`: `🇬🇮`,
 214     `gr`: `🇬🇷`,
 215     `gl`: `🇬🇱`,
 216     `gd`: `🇬🇩`,
 217     `gp`: `🇬🇵`,
 218     `gu`: `🇬🇺`,
 219     `gt`: `🇬🇹`,
 220     `gg`: `🇬🇬`,
 221     `gn`: `🇬🇳`,
 222     `gw`: `🇬🇼`,
 223     `gy`: `🇬🇾`,
 224     `ht`: `🇭🇹`,
 225     `hm`: `🇭🇲`,
 226     `va`: `🇻🇦`,
 227     `hn`: `🇭🇳`,
 228     `hk`: `🇭🇰`,
 229     `hu`: `🇭🇺`,
 230     `is`: `🇮🇸`,
 231     `in`: `🇮🇳`,
 232     `id`: `🇮🇩`,
 233     `ir`: `🇮🇷`,
 234     `iq`: `🇮🇶`,
 235     `ie`: `🇮🇪`,
 236     `im`: `🇮🇲`,
 237     `il`: `🇮🇱`,
 238     `it`: `🇮🇹`,
 239     `jm`: `🇯🇲`,
 240     `jp`: `🇯🇵`,
 241     `je`: `🇯🇪`,
 242     `jo`: `🇯🇴`,
 243     `kz`: `🇰🇿`,
 244     `ke`: `🇰🇪`,
 245     `ki`: `🇰🇮`,
 246     `kp`: `🇰🇵`,
 247     `kr`: `🇰🇷`,
 248     `kw`: `🇰🇼`,
 249     `kg`: `🇰🇬`,
 250     `la`: `🇱🇦`,
 251     `lv`: `🇱🇻`,
 252     `lb`: `🇱🇧`,
 253     `ls`: `🇱🇸`,
 254     `lr`: `🇱🇷`,
 255     `ly`: `🇱🇾`,
 256     `li`: `🇱🇮`,
 257     `lt`: `🇱🇹`,
 258     `lu`: `🇱🇺`,
 259     `mo`: `🇲🇴`,
 260     `mk`: `🇲🇰`,
 261     `mg`: `🇲🇬`,
 262     `mw`: `🇲🇼`,
 263     `my`: `🇲🇾`,
 264     `mv`: `🇲🇻`,
 265     `ml`: `🇲🇱`,
 266     `mt`: `🇲🇹`,
 267     `mh`: `🇲🇭`,
 268     `mq`: `🇲🇶`,
 269     `mr`: `🇲🇷`,
 270     `mu`: `🇲🇺`,
 271     `yt`: `🇾🇹`,
 272     `mx`: `🇲🇽`,
 273     `fm`: `🇫🇲`,
 274     `md`: `🇲🇩`,
 275     `mc`: `🇲🇨`,
 276     `mn`: `🇲🇳`,
 277     `me`: `🇲🇪`,
 278     `ms`: `🇲🇸`,
 279     `ma`: `🇲🇦`,
 280     `mz`: `🇲🇿`,
 281     `mm`: `🇲🇲`,
 282     `na`: `🇳🇦`,
 283     `nr`: `🇳🇷`,
 284     `np`: `🇳🇵`,
 285     `nl`: `🇳🇱`,
 286     `nc`: `🇳🇨`,
 287     `nz`: `🇳🇿`,
 288     `ni`: `🇳🇮`,
 289     `ne`: `🇳🇪`,
 290     `ng`: `🇳🇬`,
 291     `nu`: `🇳🇺`,
 292     `nf`: `🇳🇫`,
 293     `mp`: `🇲🇵`,
 294     `no`: `🇳🇴`,
 295     `om`: `🇴🇲`,
 296     `pk`: `🇵🇰`,
 297     `pw`: `🇵🇼`,
 298     `ps`: `🇵🇸`,
 299     `pa`: `🇵🇦`,
 300     `pg`: `🇵🇬`,
 301     `py`: `🇵🇾`,
 302     `pe`: `🇵🇪`,
 303     `ph`: `🇵🇭`,
 304     `pn`: `🇵🇳`,
 305     `pl`: `🇵🇱`,
 306     `pt`: `🇵🇹`,
 307     `pr`: `🇵🇷`,
 308     `qa`: `🇶🇦`,
 309     `re`: `🇷🇪`,
 310     `ro`: `🇷🇴`,
 311     `ru`: `🇷🇺`,
 312     `rw`: `🇷🇼`,
 313     `bl`: `🇧🇱`,
 314     `sh`: `🇸🇭`,
 315     `kn`: `🇰🇳`,
 316     `lc`: `🇱🇨`,
 317     `mf`: `🇲🇫`,
 318     `pm`: `🇵🇲`,
 319     `vc`: `🇻🇨`,
 320     `ws`: `🇼🇸`,
 321     `sm`: `🇸🇲`,
 322     `st`: `🇸🇹`,
 323     `sa`: `🇸🇦`,
 324     `sn`: `🇸🇳`,
 325     `rs`: `🇷🇸`,
 326     `sc`: `🇸🇨`,
 327     `sl`: `🇸🇱`,
 328     `sg`: `🇸🇬`,
 329     `sx`: `🇸🇽`,
 330     `sk`: `🇸🇰`,
 331     `si`: `🇸🇮`,
 332     `sb`: `🇸🇧`,
 333     `so`: `🇸🇴`,
 334     `za`: `🇿🇦`,
 335     `gs`: `🇬🇸`,
 336     `ss`: `🇸🇸`,
 337     `es`: `🇪🇸`,
 338     `lk`: `🇱🇰`,
 339     `sd`: `🇸🇩`,
 340     `sr`: `🇸🇷`,
 341     `sj`: `🇸🇯`,
 342     `se`: `🇸🇪`,
 343     `ch`: `🇨🇭`,
 344     `sy`: `🇸🇾`,
 345     `tw`: `🇹🇼`,
 346     `tj`: `🇹🇯`,
 347     `tz`: `🇹🇿`,
 348     `th`: `🇹🇭`,
 349     `tl`: `🇹🇱`,
 350     `tg`: `🇹🇬`,
 351     `tk`: `🇹🇰`,
 352     `to`: `🇹🇴`,
 353     `tt`: `🇹🇹`,
 354     `tn`: `🇹🇳`,
 355     `tr`: `🇹🇷`,
 356     `tm`: `🇹🇲`,
 357     `tc`: `🇹🇨`,
 358     `tv`: `🇹🇻`,
 359     `ug`: `🇺🇬`,
 360     `ua`: `🇺🇦`,
 361     `ae`: `🇦🇪`,
 362     `gb`: `🇬🇧`,
 363     `um`: `🇺🇲`,
 364     `us`: `🇺🇸`,
 365     `uy`: `🇺🇾`,
 366     `uz`: `🇺🇿`,
 367     `vu`: `🇻🇺`,
 368     `ve`: `🇻🇪`,
 369     `vn`: `🇻🇳`,
 370     `vg`: `🇻🇬`,
 371     `vi`: `🇻🇮`,
 372     `wf`: `🇼🇫`,
 373     `eh`: `🇪🇭`,
 374     `ye`: `🇾🇪`,
 375     `zm`: `🇿🇲`,
 376     `zw`: `🇿🇼`,
 377 
 378     `afg`: `🇦🇫`,
 379     `ala`: `🇦🇽`,
 380     `alb`: `🇦🇱`,
 381     `dza`: `🇩🇿`,
 382     `asm`: `🇦🇸`,
 383     `and`: `🇦🇩`,
 384     `ago`: `🇦🇴`,
 385     `aia`: `🇦🇮`,
 386     `ata`: `🇦🇶`,
 387     `atg`: `🇦🇬`,
 388     `arg`: `🇦🇷`,
 389     `arm`: `🇦🇲`,
 390     `abw`: `🇦🇼`,
 391     `aus`: `🇦🇺`,
 392     `aut`: `🇦🇹`,
 393     `aze`: `🇦🇿`,
 394     `bhs`: `🇧🇸`,
 395     `bhr`: `🇧🇭`,
 396     `bgd`: `🇧🇩`,
 397     `brb`: `🇧🇧`,
 398     `blr`: `🇧🇾`,
 399     `bel`: `🇧🇪`,
 400     `blz`: `🇧🇿`,
 401     `ben`: `🇧🇯`,
 402     `bmu`: `🇧🇲`,
 403     `btn`: `🇧🇹`,
 404     `bol`: `🇧🇴`,
 405     `bes`: `🇧🇶`,
 406     `bih`: `🇧🇦`,
 407     `bwa`: `🇧🇼`,
 408     `bvt`: `🇧🇻`,
 409     `bra`: `🇧🇷`,
 410     `iot`: `🇮🇴`,
 411     `brn`: `🇧🇳`,
 412     `bgr`: `🇧🇬`,
 413     `bfa`: `🇧🇫`,
 414     `bdi`: `🇧🇮`,
 415     `cpv`: `🇨🇻`,
 416     `khm`: `🇰🇭`,
 417     `cmr`: `🇨🇲`,
 418     `can`: `🇨🇦`,
 419     `cym`: `🇰🇾`,
 420     `caf`: `🇨🇫`,
 421     `tcd`: `🇹🇩`,
 422     `chl`: `🇨🇱`,
 423     `chn`: `🇨🇳`,
 424     `cxr`: `🇨🇽`,
 425     `cck`: `🇨🇨`,
 426     `col`: `🇨🇴`,
 427     `com`: `🇰🇲`,
 428     `cod`: `🇨🇩`,
 429     `cog`: `🇨🇬`,
 430     `cok`: `🇨🇰`,
 431     `cri`: `🇨🇷`,
 432     `civ`: `🇨🇮`,
 433     `hrv`: `🇭🇷`,
 434     `cub`: `🇨🇺`,
 435     `cuw`: `🇨🇼`,
 436     `cyp`: `🇨🇾`,
 437     `cze`: `🇨🇿`,
 438     `dnk`: `🇩🇰`,
 439     `dji`: `🇩🇯`,
 440     `dma`: `🇩🇲`,
 441     `dom`: `🇩🇴`,
 442     `ecu`: `🇪🇨`,
 443     `egy`: `🇪🇬`,
 444     `slv`: `🇸🇻`,
 445     `gnq`: `🇬🇶`,
 446     `eri`: `🇪🇷`,
 447     `est`: `🇪🇪`,
 448     `swz`: `🇸🇿`,
 449     `eth`: `🇪🇹`,
 450     `flk`: `🇫🇰`,
 451     `fro`: `🇫🇴`,
 452     `fji`: `🇫🇯`,
 453     `fin`: `🇫🇮`,
 454     `fra`: `🇫🇷`,
 455     `guf`: `🇬🇫`,
 456     `pyf`: `🇵🇫`,
 457     `atf`: `🇹🇫`,
 458     `gab`: `🇬🇦`,
 459     `gmb`: `🇬🇲`,
 460     `geo`: `🇬🇪`,
 461     `deu`: `🇩🇪`,
 462     `gha`: `🇬🇭`,
 463     `gib`: `🇬🇮`,
 464     `grc`: `🇬🇷`,
 465     `grl`: `🇬🇱`,
 466     `grd`: `🇬🇩`,
 467     `glp`: `🇬🇵`,
 468     `gum`: `🇬🇺`,
 469     `gtm`: `🇬🇹`,
 470     `ggy`: `🇬🇬`,
 471     `gin`: `🇬🇳`,
 472     `gnb`: `🇬🇼`,
 473     `guy`: `🇬🇾`,
 474     `hti`: `🇭🇹`,
 475     `hmd`: `🇭🇲`,
 476     `vat`: `🇻🇦`,
 477     `hnd`: `🇭🇳`,
 478     `hkg`: `🇭🇰`,
 479     `hun`: `🇭🇺`,
 480     `isl`: `🇮🇸`,
 481     `ind`: `🇮🇳`,
 482     `idn`: `🇮🇩`,
 483     `irn`: `🇮🇷`,
 484     `irq`: `🇮🇶`,
 485     `irl`: `🇮🇪`,
 486     `imn`: `🇮🇲`,
 487     `isr`: `🇮🇱`,
 488     `ita`: `🇮🇹`,
 489     `jam`: `🇯🇲`,
 490     `jpn`: `🇯🇵`,
 491     `jey`: `🇯🇪`,
 492     `jor`: `🇯🇴`,
 493     `kaz`: `🇰🇿`,
 494     `ken`: `🇰🇪`,
 495     `kir`: `🇰🇮`,
 496     `prk`: `🇰🇵`,
 497     `kor`: `🇰🇷`,
 498     `kwt`: `🇰🇼`,
 499     `kgz`: `🇰🇬`,
 500     `lao`: `🇱🇦`,
 501     `lva`: `🇱🇻`,
 502     `lbn`: `🇱🇧`,
 503     `lso`: `🇱🇸`,
 504     `lbr`: `🇱🇷`,
 505     `lby`: `🇱🇾`,
 506     `lie`: `🇱🇮`,
 507     `ltu`: `🇱🇹`,
 508     `lux`: `🇱🇺`,
 509     `mac`: `🇲🇴`,
 510     `mkd`: `🇲🇰`,
 511     `mdg`: `🇲🇬`,
 512     `mwi`: `🇲🇼`,
 513     `mys`: `🇲🇾`,
 514     `mdv`: `🇲🇻`,
 515     `mli`: `🇲🇱`,
 516     `mlt`: `🇲🇹`,
 517     `mhl`: `🇲🇭`,
 518     `mtq`: `🇲🇶`,
 519     `mrt`: `🇲🇷`,
 520     `mus`: `🇲🇺`,
 521     `myt`: `🇾🇹`,
 522     `mex`: `🇲🇽`,
 523     `fsm`: `🇫🇲`,
 524     `mda`: `🇲🇩`,
 525     `mco`: `🇲🇨`,
 526     `mng`: `🇲🇳`,
 527     `mne`: `🇲🇪`,
 528     `msr`: `🇲🇸`,
 529     `mar`: `🇲🇦`,
 530     `moz`: `🇲🇿`,
 531     `mmr`: `🇲🇲`,
 532     `nam`: `🇳🇦`,
 533     `nru`: `🇳🇷`,
 534     `npl`: `🇳🇵`,
 535     `nld`: `🇳🇱`,
 536     `ncl`: `🇳🇨`,
 537     `nzl`: `🇳🇿`,
 538     `nic`: `🇳🇮`,
 539     `ner`: `🇳🇪`,
 540     `nga`: `🇳🇬`,
 541     `niu`: `🇳🇺`,
 542     `nfk`: `🇳🇫`,
 543     `mnp`: `🇲🇵`,
 544     `nor`: `🇳🇴`,
 545     `omn`: `🇴🇲`,
 546     `pak`: `🇵🇰`,
 547     `plw`: `🇵🇼`,
 548     `pse`: `🇵🇸`,
 549     `pan`: `🇵🇦`,
 550     `png`: `🇵🇬`,
 551     `pry`: `🇵🇾`,
 552     `per`: `🇵🇪`,
 553     `phl`: `🇵🇭`,
 554     `pcn`: `🇵🇳`,
 555     `pol`: `🇵🇱`,
 556     `prt`: `🇵🇹`,
 557     `pri`: `🇵🇷`,
 558     `qat`: `🇶🇦`,
 559     `reu`: `🇷🇪`,
 560     `rou`: `🇷🇴`,
 561     `rus`: `🇷🇺`,
 562     `rwa`: `🇷🇼`,
 563     `blm`: `🇧🇱`,
 564     `shn`: `🇸🇭`,
 565     `kna`: `🇰🇳`,
 566     `lca`: `🇱🇨`,
 567     `maf`: `🇲🇫`,
 568     `spm`: `🇵🇲`,
 569     `vct`: `🇻🇨`,
 570     `wsm`: `🇼🇸`,
 571     `smr`: `🇸🇲`,
 572     `stp`: `🇸🇹`,
 573     `sau`: `🇸🇦`,
 574     `sen`: `🇸🇳`,
 575     `srb`: `🇷🇸`,
 576     `syc`: `🇸🇨`,
 577     `sle`: `🇸🇱`,
 578     `sgp`: `🇸🇬`,
 579     `sxm`: `🇸🇽`,
 580     `svk`: `🇸🇰`,
 581     `svn`: `🇸🇮`,
 582     `slb`: `🇸🇧`,
 583     `som`: `🇸🇴`,
 584     `zaf`: `🇿🇦`,
 585     `sgs`: `🇬🇸`,
 586     `ssd`: `🇸🇸`,
 587     `esp`: `🇪🇸`,
 588     `lka`: `🇱🇰`,
 589     `sdn`: `🇸🇩`,
 590     `sur`: `🇸🇷`,
 591     `sjm`: `🇸🇯`,
 592     `swe`: `🇸🇪`,
 593     `che`: `🇨🇭`,
 594     `syr`: `🇸🇾`,
 595     `twn`: `🇹🇼`,
 596     `tjk`: `🇹🇯`,
 597     `tza`: `🇹🇿`,
 598     `tha`: `🇹🇭`,
 599     `tls`: `🇹🇱`,
 600     `tgo`: `🇹🇬`,
 601     `tkl`: `🇹🇰`,
 602     `ton`: `🇹🇴`,
 603     `tto`: `🇹🇹`,
 604     `tun`: `🇹🇳`,
 605     `tur`: `🇹🇷`,
 606     `tkm`: `🇹🇲`,
 607     `tca`: `🇹🇨`,
 608     `tuv`: `🇹🇻`,
 609     `uga`: `🇺🇬`,
 610     `ukr`: `🇺🇦`,
 611     `are`: `🇦🇪`,
 612     `gbr`: `🇬🇧`,
 613     `uae`: `🇦🇪`,
 614     `umi`: `🇺🇲`,
 615     `usa`: `🇺🇸`,
 616     `ury`: `🇺🇾`,
 617     `uzb`: `🇺🇿`,
 618     `vut`: `🇻🇺`,
 619     `ven`: `🇻🇪`,
 620     `vnm`: `🇻🇳`,
 621     `vgb`: `🇻🇬`,
 622     `vir`: `🇻🇮`,
 623     `wlf`: `🇼🇫`,
 624     `esh`: `🇪🇭`,
 625     `yem`: `🇾🇪`,
 626     `zmb`: `🇿🇲`,
 627     `zwe`: `🇿🇼`,
 628 }
 629 
 630 // symbolAliases resolves aliases for keys used in table name2symbols
 631 var symbolAliases = map[string]string{
 632     // greek symbols
 633     `alpha`:   `loweralpha`,
 634     `beta`:    `lowerbeta`,
 635     `delta`:   `lowerdelta`,
 636     `epsilon`: `lowerepsilon`,
 637     `gamma`:   `lowergamma`,
 638     `lambda`:  `lowerlambda`,
 639     `omega`:   `loweromega`,
 640     `pi`:      `lowerpi`,
 641     `sigma`:   `lowersigma`,
 642     `tau`:     `lowertau`,
 643     `theta`:   `lowertheta`,
 644 
 645     // math symbols
 646     `inf`:       `infinity`,
 647     `infty`:     `infinity`,
 648     `summation`: `sum`,
 649 
 650     // punctuation symbols
 651     `apos`:  `apostrophe`,
 652     `bquo`:  `bquote`,
 653     `dquo`:  `dquote`,
 654     `squo`:  `squote`,
 655     `punct`: `punctuation`,
 656 
 657     // country flags
 658     `afghanistan`:        `af`,
 659     `america`:            `us`,
 660     `algeria`:            `dz`,
 661     `angola`:             `ao`,
 662     `arabia`:             `sa`,
 663     `arabemirates`:       `ae`,
 664     `argentina`:          `ar`,
 665     `australia`:          `au`,
 666     `austria`:            `at`,
 667     `bangladesh`:         `bd`,
 668     `belgium`:            `be`,
 669     `brazil`:             `br`,
 670     `britain`:            `gb`,
 671     `canada`:             `ca`,
 672     `chile`:              `cl`,
 673     `china`:              `cn`,
 674     `colombia`:           `co`,
 675     `czechia`:            `cz`,
 676     `czechrepublic`:      `cz`,
 677     `denmark`:            `dk`,
 678     `dominicanrepublic`:  `do`,
 679     `drc`:                `cd`,
 680     `drcongo`:            `cd`,
 681     `ecuador`:            `ec`,
 682     `egypt`:              `eg`,
 683     `emirates`:           `ae`,
 684     `england`:            `gb`,
 685     `ethiopia`:           `et`,
 686     `finland`:            `fi`,
 687     `france`:             `fr`,
 688     `germany`:            `de`,
 689     `ghana`:              `gh`,
 690     `greatbritain`:       `gb`,
 691     `greece`:             `gr`,
 692     `holland`:            `nl`,
 693     `hungary`:            `hu`,
 694     `india`:              `in`,
 695     `indonesia`:          `id`,
 696     `iran`:               `ir`,
 697     `iraq`:               `iq`,
 698     `ireland`:            `ie`,
 699     `israel`:             `il`,
 700     `italy`:              `it`,
 701     `japan`:              `jp`,
 702     `kazakhstan`:         `kz`,
 703     `kenya`:              `ke`,
 704     `korea`:              `kr`,
 705     `kuwait`:             `kw`,
 706     `madagascar`:         `mg`,
 707     `malaysia`:           `my`,
 708     `mexico`:             `mx`,
 709     `morocco`:            `ma`,
 710     `mozambique`:         `mz`,
 711     `myanmar`:            `mm`,
 712     `nepal`:              `np`,
 713     `netherlands`:        `nl`,
 714     `newzealand`:         `nz`,
 715     `nigeria`:            `ng`,
 716     `northkorea`:         `kp`,
 717     `norway`:             `no`,
 718     `pakistan`:           `pk`,
 719     `peru`:               `pe`,
 720     `philippines`:        `ph`,
 721     `poland`:             `pl`,
 722     `portugal`:           `pt`,
 723     `qatar`:              `qa`,
 724     `rok`:                `kr`,
 725     `romania`:            `ro`,
 726     `russia`:             `ru`,
 727     `saudiarabia`:        `sa`,
 728     `singapore`:          `sg`,
 729     `somalia`:            `so`,
 730     `southafrica`:        `za`,
 731     `southkorea`:         `kr`,
 732     `spain`:              `es`,
 733     `srilanka`:           `lk`,
 734     `sudan`:              `sd`,
 735     `sweden`:             `se`,
 736     `switzerland`:        `ch`,
 737     `taiwan`:             `tw`,
 738     `tanzania`:           `tz`,
 739     `thailand`:           `th`,
 740     `turkey`:             `tr`,
 741     `uganda`:             `ug`,
 742     `ukraine`:            `ua`,
 743     `unitedarabemirates`: `ae`,
 744     `unitedemirates`:     `ae`,
 745     `unitedkingdom`:      `gb`,
 746     `unitedstates`:       `us`,
 747     `uzbekistan`:         `uz`,
 748     `yemen`:              `ye`,
 749     `venezuela`:          `ve`,
 750     `vietnam`:            `vn`,
 751 }
 752 
 753 func showSymbols(w *bufio.Writer, r io.Reader, names []string) error {
 754     if len(names) == 0 {
 755         return showAllSymbols(w)
 756     }
 757 
 758     for _, name := range names {
 759         // normalize symbol names
 760         name = strings.TrimSpace(name)
 761         name = strings.ToLower(name)
 762         name = strings.ReplaceAll(name, ` `, ``)
 763         name = strings.ReplaceAll(name, `-`, ``)
 764         name = strings.ReplaceAll(name, `_`, ``)
 765 
 766         if len(name) == 0 {
 767             return errors.New(`no symbol name given`)
 768         }
 769 
 770         // handle regular symbol-name lookups
 771         s := name
 772         if alias, ok := symbolAliases[name]; ok {
 773             s = alias
 774         }
 775         sym, ok := names2symbols[s]
 776         if !ok {
 777             return errors.New(`no symbol named ` + name + ` found`)
 778         }
 779 
 780         w.WriteString(sym)
 781         if err := endLine(w); err != nil {
 782             return err
 783         }
 784     }
 785 
 786     return nil
 787 }
 788 
 789 func showAllSymbols(w *bufio.Writer) error {
 790     var keys []string
 791     got := make(map[string]struct{}, len(names2symbols)+len(symbolAliases))
 792 
 793     for k := range names2symbols {
 794         got[k] = struct{}{}
 795         keys = append(keys, k)
 796     }
 797 
 798     for k := range symbolAliases {
 799         if _, ok := got[k]; ok {
 800             continue
 801         }
 802         got[k] = struct{}{}
 803         keys = append(keys, k)
 804     }
 805 
 806     sort.Strings(keys)
 807 
 808     for _, s := range keys {
 809         k := s
 810         if alias, ok := symbolAliases[k]; ok {
 811             k = alias
 812         }
 813 
 814         w.WriteString(s)
 815         w.WriteByte('\t')
 816         w.WriteString(names2symbols[k])
 817         if err := endLine(w); err != nil {
 818             return err
 819         }
 820     }
 821 
 822     return nil
 823 }

     File: tu/tables.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "io"
   6 )
   7 
   8 func beginCSV(w *bufio.Writer, r io.Reader, args []string) error {
   9     rw := newWriterCSV(w)
  10     err := rw.Write(args)
  11     rw.Flush()
  12 
  13     if err := adaptWriteError(err); err != nil {
  14         return err
  15     }
  16 
  17     return loopLines(r, func(line []byte) error {
  18         w.Write(line)
  19         return endLine(w)
  20     })
  21 }
  22 
  23 func beginTSV(w *bufio.Writer, r io.Reader, args []string) error {
  24     for i, s := range args {
  25         if i > 0 {
  26             w.WriteByte('\t')
  27         }
  28         w.WriteString(s)
  29     }
  30 
  31     if err := endLine(w); err != nil {
  32         return err
  33     }
  34 
  35     return loopLines(r, func(line []byte) error {
  36         w.Write(line)
  37         return endLine(w)
  38     })
  39 }
  40 
  41 func csv2lines(w *bufio.Writer, r io.Reader) error {
  42     return loopCSV(r, func(row []string) error {
  43         return writeLines(w, row)
  44     })
  45 }
  46 
  47 func csv2tsv(w *bufio.Writer, r io.Reader) error {
  48     return loopCSV(r, func(row []string) error {
  49         for i, v := range row {
  50             if i > 0 {
  51                 w.WriteByte('\t')
  52             }
  53             w.WriteString(v)
  54         }
  55         return endLine(w)
  56     })
  57 }
  58 
  59 func endCSV(w *bufio.Writer, r io.Reader, args []string) error {
  60     err := loopLines(r, func(line []byte) error {
  61         w.Write(line)
  62         return endLine(w)
  63     })
  64 
  65     if err != nil {
  66         return err
  67     }
  68 
  69     rw := newWriterCSV(w)
  70     defer rw.Flush()
  71     return adaptWriteError(rw.Write(args))
  72 }
  73 
  74 func endTSV(w *bufio.Writer, r io.Reader, args []string) error {
  75     err := loopLines(r, func(line []byte) error {
  76         w.Write(line)
  77         return endLine(w)
  78     })
  79 
  80     if err != nil {
  81         return err
  82     }
  83 
  84     for i, s := range args {
  85         if i > 0 {
  86             w.WriteByte('\t')
  87         }
  88         w.WriteString(s)
  89     }
  90     return endLine(w)
  91 }
  92 
  93 func ssv2lines(w *bufio.Writer, r io.Reader) error {
  94     return loopLines(r, func(line []byte) error {
  95         var err error
  96         loopFields(line, func(i int, s []byte) (keepGoing bool) {
  97             w.Write(s)
  98             err = endLine(w)
  99             return err == nil
 100         })
 101         return err
 102     })
 103 }
 104 
 105 func ssv2tsv(w *bufio.Writer, r io.Reader) error {
 106     return loopLines(r, func(line []byte) error {
 107         // avoid empty output lines; keeps the original string, in case
 108         // it's later being treated as TSV, padding and all
 109         if len(trimSpaces(line)) == 0 {
 110             return nil
 111         }
 112 
 113         loopFields(line, func(i int, s []byte) (keepGoing bool) {
 114             if i > 0 {
 115                 w.WriteByte('\t')
 116             }
 117             w.Write(s)
 118             return true
 119         })
 120 
 121         return endLine(w)
 122     })
 123 }
 124 
 125 func tsv2lines(w *bufio.Writer, r io.Reader) error {
 126     return loopLines(r, func(line []byte) error {
 127         var err error
 128         loopTSV(line, func(i int, s []byte) (keepGoing bool) {
 129             w.Write(s)
 130             err = endLine(w)
 131             return err == nil
 132         })
 133         return err
 134     })
 135 }

     File: tu/tools.txt
   1 after [what]
   2     ignore starts of lines until the string/marker given; no matches in
   3     a line results in an empty line
   4 
   5 after-last [what]
   6     ignore starts of lines until the last appearance of the string/marker
   7     given; no matches in a line results in an empty line
   8 
   9 aliases
  10     show all tool names and their aliases as lines of tab-separated items
  11 
  12 base-64
  13     turn input bytes into their base-64 counterparts
  14 
  15 before [what]
  16     ignore parts of lines starting from the string/marker given; lines
  17     with no matches are kept in full
  18 
  19 before-last [what]
  20     ignore parts of lines starting from the last appearance of the
  21     string/marker given; lines with no matches are kept in full
  22 
  23 begin [lines...]
  24     start output with the lines given, followed by all input lines
  25 
  26 begin-csv [column names...]
  27     start output with a CSV (comma-separated values) line, followed by
  28     all lines from the input; precede input with a CSV line at the start
  29 
  30 begin-tsv [column names...]
  31     start output with a TSV (tab-separated values) line, followed by
  32     all lines from the input; precede input with a TSV line at the start
  33 
  34 big-files [min] [folders...]
  35     find all files recursively from the folders given, which have at
  36     least the number of bytes given
  37 
  38 blow [tab-stop...]
  39     expand tabs using up to the number of spaces given (the `tab-stop`);
  40     default tab-stop is 4, when not given explicitly; when the tab-stop
  41     is less than 1, behaves like tool `drop-tabs`
  42 
  43 book [height]
  44     layout input lines side-by-side on 2 columns, like in a book; a
  45     useful shell function around this tool is `like a book` (lab):
  46     lab() { tu book "$(($(tput lines) - 1))" "$@" | less -KiCRS; }
  47 
  48 breathe [every...]
  49     put an extra empty line every few lines (by default every 5); periods
  50     less than 1 disable adding extra empty lines
  51 
  52 bytes [filepaths...]
  53     concatenate/emit all bytes from all named inputs (files) given: this is
  54     one of the few tools which can open files; single dashes mean fully-read
  55     standard input; multiple single dashes are allowed, stdin being read only
  56     once
  57 
  58 chop-lf
  59     ignore the last byte, if it's a line-feed
  60 
  61 common [filepath] [filepath]
  62     find all lines 2 inputs have in common (their set-intersection); this
  63     is one of the few tools which can open files
  64 
  65 compose [tools...]
  66     compose multiple tools pipe-like, allowing their executions to overlap,
  67     taking advantage of multiple cores, when available; the double-dash `--`
  68     separates each tool/arguments combo used for the internal pipe; the same
  69     double-dash is also available as an alias for this tool, for convenience
  70 
  71 count-bytes [files/folders...]
  72     count all bytes, including tallies of several bytes of common interest,
  73     such as spaces, tabs, nulls, and so on; the result is TSV (tab-separated
  74     values) lines, starting with a header line
  75 
  76 crlf
  77     make all lines end with a CRLF byte-pair
  78 
  79 csv
  80     turn CSV (comma-separated values) lines into single-item lines
  81 
  82 csv2tsv
  83     turn CSV (comma-separated values) lines into TSV (tab-separated) ones
  84 
  85 data-uri [files...]
  86     encode each response from each path/URI given as a data-URI line,
  87     which is like base-64, but starts with a MIME-type declaration
  88 
  89 debase-64
  90     decode base64-encoded text into its corresponding bytes
  91 
  92 dedup
  93     ensure any input-line appears in the output only once
  94 
  95 drop [strings...]
  96     remove all strings given from each line, in the order given
  97 
  98 drop-end [strings...]
  99     remove all endings given from each line, if present at the end when
 100     checked, and in the order given
 101 
 102 drop-start [strings...]
 103     remove all starts given from each line, if present at the start when
 104     checked, and in the order given
 105 
 106 drop-tabs
 107     remove/ignore all tabs from each line; tool `blow` acts the same way,
 108     when given a 0 or negative `tab-stop` values
 109 
 110 each [tool] [arguments...]
 111     run the tool for each input line, using the arguments given, followed
 112     by the current line as an extra/final argument
 113 
 114 empty-files [folders...]
 115     find all empty files recursively from the folders given
 116 
 117 empty-folders [folders...]
 118     find all empty folders recursively from the folders given
 119 
 120 end [lines...]
 121     output all input lines, followed by the lines given
 122 
 123 end-csv [column names...]
 124     output all input lines, followed by a CSV (comma-separated values)
 125     line
 126 
 127 end-tsv [column names...]
 128     output all input lines, followed by a TSV (tab-separated values) line
 129 
 130 examples
 131     show examples explaining how to use this app's tools on the cmd-line
 132 
 133 files [folders...]
 134     find all files recursively from the folders given
 135 
 136 first [max lines...]
 137     limit output to the number of lines given, if input exceeds it; the
 138     default is 1 line, when no number is given
 139 
 140 folders [folders...]
 141     find all folders recursively from the folders given
 142 
 143 glue [separator...]
 144     put all input lines into a single line, putting the separator given
 145     between items; when not given, the default is to use no separator
 146 
 147 gzip
 148     gzip-encode/compress input bytes
 149 
 150 help
 151     show this app's help message
 152 
 153 hex
 154     encode input bytes into ASCII-hexadecimal text
 155 
 156 id3-pic
 157     isolate the thumbnail bytes, if available, from the ID3 part of a media
 158     file/stream, usually with MP3-encoded sound in it; this tool supports
 159     both PIC and APIC data-section types; also available as `mp3-pic`
 160 
 161 join [separator...]
 162     join input lines into a single line, putting the separator between
 163     adjacent items; the separator defaults to a tab, when not given
 164 
 165 json-0
 166     convert/fix JSON/pseudo-JSON input into minimal JSON output; valid JSON
 167     input is simply minimized; output is always a single line, which always
 168     ends with a line-feed
 169 
 170 jsonl
 171     convert lines, each with JSON/pseudo-JSON data, into a valid (minimal)
 172     JSON array
 173 
 174 junk [byte count...]
 175     emit pseudo-random bytes; if no byte-count is given, the default is to
 176     emit 1024 pseudo-random bytes
 177 
 178 last [max lines...]
 179     limit output to the last few lines, if input has more lines than the
 180     number given; the default is 1 line, when no number is given
 181 
 182 lines [filepaths...]
 183     ensure all lines end with a single LF byte, including the last one,
 184     ignoring CRLF byte-pairs from the original input; this is one of the
 185     few tools which can open files; single dashes mean fully-read standard
 186     input; multiple single dashes are allowed, stdin being read only once
 187 
 188 line-up [column count...]
 189     line-up input lines into TSV (tab-separated values) lines with up to
 190     the number of items given; when the item-count is less than 1, line-up
 191     all input lines into a single TSV line
 192 
 193 links
 194     emit all hyperlinks found in the input, one per output line
 195 
 196 lower
 197     lower-case all symbols
 198 
 199 match-para [regexp...]
 200     `match paragraphs` keeps only the paragraphs matching any of the
 201     regexes given on any of their lines; all regex matches are done
 202     case-insensitively, meaning letter-case differences are ignored
 203 
 204 md-5
 205     turn input bytes into their MD-5 hash, as a hexadecimal-ASCII line
 206 
 207 missing [filepath] [filepath]
 208     find all lines which are in the 2nd input, but not in the 1st input;
 209     this is one of the few tools which can open files
 210 
 211 mumble [tool] [arguments...]
 212     run the tool with the arguments given, after starting a line with
 213     space-separated tool/argument list, followed by a tab
 214 
 215 n [start...]
 216     number lines starting from the number given, or 1 by default; line
 217     numbers are separated from the rest of their output line by a tab
 218 
 219 no-pun
 220     drop/ignore all common punctuation symbols
 221 
 222 now
 223     show the current date and time using the YYYY-MM-DD HH:MM:SS format
 224 
 225 null
 226     read nothing, write nothing
 227 
 228 numbers
 229     emit only numbers detected from the input, one item per output line
 230 
 231 open [names...]
 232     open files/folders/URIs/ports using the system's designated app for
 233     each; port numbers open in the main web-browser pointing to localhost
 234 
 235 plain
 236     ignore all ANSI-style sequences, leaving all other bytes as given
 237 
 238 primes [count...]
 239     show the first n prime numbers, one per line; when not given a count,
 240     the default is to show the first 1 million primes
 241 
 242 reflow [max runes]
 243     wrap/fold lines, trying to keep to the rune-count given, the exception
 244     being single `words` which are longer than that on their own, and thus
 245     can't be wrapped `losslessly`
 246 
 247 runes
 248     emit each UTF-8 code on its own line, except carriage-returns and
 249     line-feeds, which are ignored; empty input-lines are ignored, so no
 250     empty output-lines should result; all multi-code symbols, such as
 251     country-flags, will get split across lines, resulting in non-sense
 252 
 253 sbs [column count...]
 254     `Side By Side` tries to lay input lines into columns; if not given
 255     a column-count it tries to guess how many can fit an 80-symbol width;
 256     the same automatic behavior happens when given a non-positive count
 257 
 258 sha-1
 259     turn input bytes into their SHA-1 hash, as a hexadecimal-ASCII line
 260 
 261 sha-256
 262     turn input bytes into their SHA-256 hash, as a hexadecimal-ASCII line
 263 
 264 sha-512
 265     turn input bytes into their SHA-512 hash, as a hexadecimal-ASCII line
 266 
 267 show-it [files/folders/URIs/ports...]
 268     when given names, works mostly the same as the `open` tool; when not
 269     given any name, it auto-detects data from the main input, and pops a
 270     new web-browser tab with the content ready to view and/or play
 271 
 272 since [what]
 273     ignore starts of lines before the string/marker given, so the marker
 274     itself ends in the result; no matches in a line results in an empty
 275     line
 276 
 277 since-last [what]
 278     ignore starts of lines before the last appearance of the string/marker
 279     given, so the marker itself ends in the result; no matches in a line
 280     results in an empty line
 281 
 282 skip [line count]
 283     ignore the first n lines, possibly ignoring all input, when it has
 284     fewer lines than that
 285 
 286 skip-empty
 287     skip/ignore empty lines, keeping all others
 288 
 289 skip-last [line count...]
 290     ignore the last n lines, possibly ignoring all input, when it has
 291     fewer lines than that; the default is 1, when no number is given
 292 
 293 small-files [max] [folders...]
 294     find all files recursively from the folders given, which have fewer
 295     than the number of bytes given
 296 
 297 squeeze
 298     aggressively ignore unneeded spaces, ignoring leading and/or trailing
 299     spaces in lines, also `squeezing` runs of multiple spaces into single
 300     spaces; spaces around tabs are ignored as well
 301 
 302 ssv
 303     turn SSV (space-separated values) lines into single-item lines
 304 
 305 ssv2tsv
 306     turn SSV (space-separated values) lines into TSV (tab-separated) ones
 307 
 308 stomp
 309     turn runs of consecutive empty lines into single empty lines, thus
 310     tightening paragraphs; also ignore leading and trailing empty lines,
 311     except for a final line-feed output for any non-empty input; to also
 312     trim lines in paragraphs, use tool `trim-par` instead
 313 
 314 strings
 315     find all ASCII strings among the input bytes, showing one per line
 316 
 317 symbols [symbol name...]
 318     lookup commonly-used symbols by their name(s)/aliases; show all symbols
 319     available, with all their aliases, when not given any lookup-names
 320 
 321 tally
 322     reverse-sort unique input-lines by their tally counts; each output line
 323     is the tally-count, a tab, and the tallied line; output has no header
 324     line with the column names, as usual for TSV (tab-separated values)
 325     tables; command `begin-tsv` can prepend such a starting line
 326 
 327 today
 328     show the current date
 329 
 330 top-files [folders...]
 331     find all top-level files from the folders given
 332 
 333 top-folders [folders...]
 334     find all top-level folders from the folders given
 335 
 336 trim
 337     ignore leading and/or trailing spaces from lines
 338 
 339 trim-trail
 340     ignore trailing spaces from lines
 341 
 342 truncate [max runes]
 343     limit lines up to the number of runes given; negative values result
 344     in empty lines; some symbols which take more than 1 `rune`, such as
 345     country flags, may get truncated into non-sense, and always count as
 346     multiple runes
 347 
 348 tsv
 349     turn TSV (tab-separated values) lines into single-item lines
 350 
 351 un-bzip
 352     bzip2-decode/uncompress input bytes
 353 
 354 un-gzip
 355     gzip-decode/uncompress input bytes
 356 
 357 un-hex
 358     decode input bytes from ASCII-hexadecimal text
 359 
 360 until [what]
 361     ignore parts of lines beyond the string/marker given; no matches in
 362     a line keeps the whole line as is
 363 
 364 until-last [what]
 365     ignore parts of lines beyond the last appearance of the string/marker
 366     given; no matches in a line keeps the whole line as is
 367 
 368 utf-8
 369     turn both kinds of UTF-16 data into UTF-8, keeping UTF-8 input as
 370     given, except ignoring a leading UTF-8 BOM, if present
 371 
 372 uri-encode
 373     URI-encode each input line, percent-escaping symbols when needed
 374 
 375 vulgarize
 376     turn Latin-1-encoded bytes into UTF-8 runes; the name is taken from
 377     the word `vulgarization`, which refers to the historical evolution
 378     from latin into various modern languages over the centuries
 379 
 380 with [file] [tool] [arguments...]
 381     run the tool with the arguments given, using data from the named-input
 382     given before the tool-name
 383 
 384 ymd
 385     show the current date using the YYYY-MM-DD format

     File: tu/utf8.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "io"
   6     "unicode/utf16"
   7 )
   8 
   9 // toUTF8 turns UTF-16 bytes (both kinds) and BOMed UTF-8 bytes into
  10 // proper UTF-8 bytes: this is one of the few text-related tools which
  11 // keeps CRLF sequences verbatim
  12 func toUTF8(w *bufio.Writer, r io.Reader) error {
  13     br := bufio.NewReader(r)
  14 
  15     a, err := br.ReadByte()
  16     if err == io.EOF {
  17         return nil
  18     }
  19     if err != nil {
  20         return err
  21     }
  22 
  23     b, err := br.ReadByte()
  24     if err == io.EOF {
  25         w.WriteByte(a)
  26         return nil
  27     }
  28     if err != nil {
  29         return err
  30     }
  31 
  32     // handle potential leading UTF-8 BOM
  33     if a == 0xEF && b == 0xBB {
  34         c, err := br.ReadByte()
  35         if err == io.EOF {
  36             w.WriteByte(a)
  37             w.WriteByte(b)
  38             return nil
  39         }
  40 
  41         if err != nil {
  42             return err
  43         }
  44 
  45         if c != 0xBF {
  46             w.WriteByte(a)
  47             w.WriteByte(b)
  48             w.WriteByte(c)
  49         }
  50 
  51         _, err = io.Copy(w, br)
  52         return adaptWriteError(err)
  53     }
  54 
  55     // handle leading UTF-16 big-endian BOM
  56     if a == 0xFE && b == 0xFF {
  57         return deUTF16(w, br, readBytePairBE)
  58     }
  59 
  60     // handle leading UTF-16 little-endian BOM
  61     if a == 0xFF && b == 0xFE {
  62         return deUTF16(w, br, readBytePairLE)
  63     }
  64 
  65     // handle lack of leading UTF-16 BOM
  66     sym := rune(256*int(b) + int(a))
  67 
  68     if utf16.IsSurrogate(sym) {
  69         a, b, err := readBytePairLE(br)
  70         if err == io.EOF {
  71             return nil
  72         }
  73         if err != nil {
  74             return err
  75         }
  76 
  77         next := rune(256*int(a) + int(b))
  78         sym = utf16.DecodeRune(sym, next)
  79     }
  80 
  81     w.WriteRune(sym)
  82     return deUTF16(w, br, readBytePairLE)
  83 }
  84 
  85 // deUTF16 is used by func toUTF8
  86 func deUTF16(w *bufio.Writer, br *bufio.Reader, readPair readPairFunc) error {
  87     for {
  88         a, b, err := readPair(br)
  89         if err == io.EOF {
  90             return nil
  91         }
  92         if err != nil {
  93             return err
  94         }
  95 
  96         r := rune(256*int(a) + int(b))
  97         if utf16.IsSurrogate(r) {
  98             a, b, err := readPair(br)
  99             if err == io.EOF {
 100                 return nil
 101             }
 102             if err != nil {
 103                 return err
 104             }
 105 
 106             next := rune(256*int(a) + int(b))
 107             r = utf16.DecodeRune(r, next)
 108         }
 109 
 110         _, err = w.WriteRune(r)
 111         err = adaptWriteError(err)
 112         if err != nil {
 113             return err
 114         }
 115     }
 116 }