/* The MIT License (MIT) Copyright (c) 2026 pacman64 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* To compile a smaller-sized command-line app, you can use the `go` command as follows: go build -ldflags "-s -w" -trimpath easybox.go */ package main import ( "bufio" "bytes" "encoding/base64" "encoding/binary" "encoding/json" "errors" "fmt" "io" "math" "os" "regexp" "sort" "strconv" "strings" "unicode" "unicode/utf16" "unicode/utf8" ) const easyboxInfo = ` easybox [tool...] [options...] [arguments...] This is a busybox-style command-line app, with several simple (easy) tools in it. Running this tool without a tool name shows this help message, along with all tool names and aliases available. All (optional) leading options start with either single or double-dash: -h, -help show this help message -list show all tools available ` var mains = map[string]func(){ `avoid`: avoidMain, `bytedump`: bytedumpMain, `catl`: catlMain, `coma`: comaMain, `datauri`: datauriMain, `debase64`: debase64Main, `dedup`: dedupMain, `dejsonl`: dejsonlMain, `dessv`: dessvMain, `erase`: eraseMain, `fixlines`: fixlinesMain, `hima`: himaMain, `json0`: json0Main, `json2`: json2Main, `jsonl`: jsonlMain, `jsons`: jsonsMain, `match`: matchMain, `ncol`: ncolMain, `njson`: njsonMain, `nn`: nnMain, `plain`: plainMain, `primes`: primesMain, `realign`: realignMain, `squeeze`: squeezeMain, `tcatl`: tcatlMain, `utfate`: utfateMain, } var toolAliases = map[string]string{ `deduplicate`: `dedup`, `detrail`: `fixlines`, `entab`: `dessv`, `entabulate`: `dessv`, `j0`: `json0`, `j2`: `json2`, `jl`: `jsonl`, `jsonlines`: `jsonl`, `ndjson`: `jsonl`, `nicej`: `njson`, `nicejson`: `njson`, `nicenum`: `nn`, `nicenums`: `nn`, `nj`: `njson`, `nicenumbers`: `nn`, `unique`: `dedup`, `utf8`: `utfate`, } // errNoMoreOutput is a dummy error whose message is ignored, and which // causes the app to quit immediately and successfully var errNoMoreOutput = errors.New(`no more output`) const ( gb = 1024 * 1024 * 1024 bufSize = 32 * 1024 maxLineBufSize = 8 * gb stdinDisplayName = `` ) func main() { if len(os.Args) > 1 { switch os.Args[1] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(easyboxInfo[1:]) return case `-list`, `--list`: easyboxList(os.Stdout) return } } // skip past all folder names, if present tool := os.Args[0] if i := strings.LastIndexByte(tool, '/'); i >= 0 { tool = tool[i+1:] } // if not called from a link, make the tool the first cmd-line argument switch tool { case `easybox`, `eb`, `ebox`: if len(os.Args) == 1 { easyboxHelp(os.Stderr) os.Exit(1) } os.Args = os.Args[1:] // skip past all folder names, if present tool = os.Args[0] if i := strings.LastIndexByte(tool, '/'); i >= 0 { tool = tool[i+1:] } } // ignore all dashes and/or underscores in tool names tool = strings.Replace(tool, `-`, ``, -1) tool = strings.Replace(tool, `_`, ``, -1) if tool == `help` { easyboxHelp(os.Stdout) return } // dealias tool name if name, ok := toolAliases[tool]; ok { tool = name } main, ok := mains[tool] if !ok { os.Stderr.WriteString(`easybox: tool named '`) os.Stderr.WriteString(tool) os.Stderr.WriteString("' not found\n") os.Stderr.WriteString("hint: try using the 'help' tool\n") os.Exit(1) } main() } func easyboxHelp(w io.Writer) { n := len(mains) if n < len(toolAliases) { n = len(toolAliases) } sortedKeys := make([]string, 0, n) for k := range mains { sortedKeys = append(sortedKeys, k) } sort.Strings(sortedKeys) io.WriteString(w, easyboxInfo[1:]) io.WriteString(w, "\nTools Available\n\n") for _, k := range sortedKeys { io.WriteString(w, ` `) io.WriteString(w, k) io.WriteString(w, "\n") } n = 0 sortedKeys = sortedKeys[:0] for k := range toolAliases { if n < len(k) { n = len(k) } sortedKeys = append(sortedKeys, k) } sort.Strings(sortedKeys) io.WriteString(w, "\nAliases Available\n\n") for _, k := range sortedKeys { fmt.Fprintf(w, " %-*s -> %s\n", n, k, toolAliases[k]) } } func easyboxList(w io.Writer) { sortedKeys := make([]string, 0, len(mains)) for k := range mains { sortedKeys = append(sortedKeys, k) } sort.Strings(sortedKeys) for _, k := range sortedKeys { io.WriteString(w, k) io.WriteString(w, "\n") } } type easyboxRunner func(bw *bufio.Writer, r io.Reader, name string) error func easyboxRun(args []string, run easyboxRunner) { dashes := 0 for _, name := range args { if name == `-` { dashes++ } // only need to tell whether more than 1 dash was given if dashes > 1 { break } } bw := bufio.NewWriterSize(os.Stdout, bufSize) defer bw.Flush() if len(args) == 0 { if err := run(bw, os.Stdin, stdinDisplayName); err != nil { handleError(bw, err) } return } gotStdin := false multipleDashes := dashes > 1 var stdin []byte for _, name := range args { // allow re-reading stdin more than once if name == `-` { var r io.Reader = os.Stdin if multipleDashes && !gotStdin { gotStdin = true stdin, _ = io.ReadAll(r) } if gotStdin { r = bytes.NewReader(stdin) } if err := run(bw, r, stdinDisplayName); err != nil { handleError(bw, err) return } continue } if err := easyboxHandleFile(bw, name, run); err != nil { handleError(bw, err) return } } } func easyboxHandleFile(w *bufio.Writer, name string, run easyboxRunner) error { if name == `` || name == `-` { return run(w, os.Stdin, stdinDisplayName) } f, err := os.Open(name) if err != nil { return errors.New(`can't read from file named "` + name + `"`) } defer f.Close() return run(w, f, name) } // countDecimals counts decimal digits from the string given, assuming it // represents a valid/useable float64, when parsed func countDecimals(s string) int { dot := strings.IndexByte(s, '.') if dot < 0 { return 0 } decs := 0 s = s[dot+1:] for len(s) > 0 { s = skipLeadingEscapeSequences(s) if len(s) == 0 { break } if '0' <= s[0] && s[0] <= '9' { decs++ } s = s[1:] } return decs } // countDotDecimals is like func countDecimals, but this one also includes // the dot, when any decimals are present, else the count stays at 0 func countDotDecimals(s string) int { decs := countDecimals(s) if decs > 0 { return decs + 1 } return decs } func countWidth(s string) int { width := 0 for len(s) > 0 { i := indexStartANSI(s) if i < 0 { width += utf8.RuneCountInString(s) return width } width += utf8.RuneCountInString(s[:i]) for len(s) > 0 { upper := s[0] &^ 32 s = s[1:] if 'A' <= upper && upper <= 'Z' { break } } } return width } func handleError(w *bufio.Writer, err error) { if err == nil { return } if err == errNoMoreOutput { // os.Exit(141) return } if w != nil { w.Flush() } os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\n") os.Exit(1) } // hasPrefixByte is a simpler, single-byte version of bytes.HasPrefix func hasPrefixByte(b []byte, prefix byte) bool { return len(b) > 0 && b[0] == prefix } // hasPrefixFold is a case-insensitive bytes.HasPrefix func hasPrefixFold(s []byte, prefix []byte) bool { n := len(prefix) return len(s) >= n && bytes.EqualFold(s[:n], prefix) } // hasPrefixBOM checks if a byte-slice starts with a UTF-8 BOM sequence func hasPrefixBOM(s []byte) bool { return len(s) > 2 && s[0] == 0xef && s[1] == 0xbb && s[2] == 0xbf } // indexDigit finds the index of the first digit in a string, or -1 when the // string has no decimal digits func indexDigit(s string) int { for i := 0; i < len(s); i++ { switch s[i] { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return i } } // empty slice, or a slice without any digits return -1 } // indexNonDigit finds the index of the first non-digit in a string, or -1 // when the string is all decimal digits func indexNonDigit(s string) int { for i := 0; i < len(s); i++ { switch s[i] { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': continue default: return i } } // empty slice, or a slice which only has digits return -1 } // indexEscapeSequence finds the first ANSI-style escape-sequence, which is // the multi-byte sequences starting with ESC[; the result is a pair of slice // indices which can be independently negative when either the start/end of // a sequence isn't found; given their fairly-common use, even the hyperlink // ESC]8 sequences are supported func indexEscapeSequence(s []byte) (int, int) { var prev byte for i, b := range s { if prev == '\x1b' && b == '[' { j := indexLetter(s[i+1:]) if j < 0 { return i, -1 } return i - 1, i + 1 + j + 1 } if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' { j := indexPair(s[i+1:], '\x1b', '\\') if j < 0 { return i, -1 } return i - 1, i + 1 + j + 2 } prev = b } return -1, -1 } func indexLetter(s []byte) int { for i, b := range s { upper := b &^ 32 if 'A' <= upper && upper <= 'Z' { return i } } return -1 } func indexPair(s []byte, x byte, y byte) int { var prev byte for i, b := range s { if prev == x && b == y { return i } prev = b } return -1 } func indexStartANSI(s string) int { var prev byte for i := range s { b := s[i] if prev == '\x1b' && b == '[' { return i - 1 } prev = b } return -1 } func loopTSV(line string, f func(i int, s string)) { for i := 0; len(line) > 0; i++ { pos := strings.IndexByte(line, '\t') if pos < 0 { f(i, line) return } f(i, line[:pos]) line = line[pos+1:] } } func match(what []byte, with []*regexp.Regexp) bool { for _, e := range with { if e.Match(what) { return true } } return false } func seemsDataURI(s string) bool { start := s if len(s) > 64 { start = s[:64] } return strings.HasPrefix(s, `data:`) && strings.Contains(start, `;base64,`) } // skip ignores n bytes from the reader given func skip(r io.Reader, n int) { if n < 1 { return } // use func Seek for input files, except for stdin, which you can't seek if f, ok := r.(*os.File); ok && r != os.Stdin { f.Seek(int64(n), io.SeekCurrent) return } io.CopyN(io.Discard, r, int64(n)) } func skipLeadingEscapeSequences(s string) string { for len(s) >= 2 { if s[0] != '\x1b' { return s } switch s[1] { case '[': s = skipSingleLeadingANSI(s[2:]) case ']': if len(s) < 3 || s[2] != '8' { return s } s = skipSingleLeadingOSC(s[3:]) default: return s } } return s } func skipSingleLeadingANSI(s string) string { for len(s) > 0 { upper := s[0] &^ 32 s = s[1:] if 'A' <= upper && upper <= 'Z' { break } } return s } func skipSingleLeadingOSC(s string) string { var prev byte for len(s) > 0 { b := s[0] s = s[1:] if prev == '\x1b' && b == '\\' { break } prev = b } return s } func trimLeadingSpaces(s string) string { for len(s) > 0 && s[0] == ' ' { s = s[1:] } return s } // trimLeadingWhitespace ignores leading space-like symbols: this is useful // to handle text-based data formats more flexibly func trimLeadingWhitespace(b []byte) []byte { for len(b) > 0 { switch b[0] { case ' ', '\t', '\n', '\r': b = b[1:] default: return b } } // an empty slice is all that's left, at this point return nil } func trimTrailingSpaces(s string) string { for len(s) > 0 && s[len(s)-1] == ' ' { s = s[:len(s)-1] } return s } // writeSpaces minimizes calls to write-like funcs func writeSpaces(w *bufio.Writer, n int) { const spaces = ` ` if n < 1 { return } for n >= len(spaces) { w.WriteString(spaces) n -= len(spaces) } w.WriteString(spaces[:n]) } func writeTabs(w *bufio.Writer, n int) { for n > 0 { w.WriteByte('\t') n-- } } const avoidInfo = ` avoid [options...] [regular expressions...] Avoid/ignore lines which match any of the extended-mode regular expressions given. When not given any regex, all empty lines are ignored by default. The options are, available both in single and double-dash versions -h, -help show this help message -i, -ins match regexes case-insensitively ` func avoidMain() { nerr := 0 buffered := false sensitive := true args := os.Args[1:] out: for len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] case `-i`, `--i`, `-ins`, `--ins`: sensitive = false args = args[1:] case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(avoidInfo[1:]) return default: break out } } if len(args) > 0 && args[0] == `--` { args = args[1:] } liveLines := !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { liveLines = false } } if len(args) == 0 { args = []string{`^$`} } exprs := make([]*regexp.Regexp, 0, len(args)) for _, src := range args { var err error var exp *regexp.Regexp if !sensitive { exp, err = regexp.Compile(`(?i)` + src) } else { exp, err = regexp.Compile(src) } if err != nil { os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\n") nerr++ } exprs = append(exprs, exp) } if nerr > 0 { os.Exit(1) } sc := bufio.NewScanner(os.Stdin) sc.Buffer(nil, maxLineBufSize) bw := bufio.NewWriterSize(os.Stdout, bufSize) defer bw.Flush() for i := 0; sc.Scan(); i++ { s := sc.Bytes() if i == 0 && hasPrefixBOM(s) { s = s[3:] } if !match(s, exprs) { bw.Write(s) bw.WriteByte('\n') if !liveLines { continue } if err := bw.Flush(); err != nil { return } } } } const bytedumpInfo = ` bytedump [options...] [filenames...] Show bytes as hexadecimal and ascii on the side. Each line shows the starting offset for the bytes shown, 16 of the bytes themselves in base-16 notation, and any ASCII codes when the byte values are in the typical ASCII range. The offsets shown are base-10. ` const bytedumpChunkSize = 16 // bytedumpHexSymbols is a direct lookup table combining 2 hex digits with // either a space or a displayable ASCII symbol matching the byte's own ASCII // value; this table was autogenerated by running the command // // seq 0 255 | ./hex-symbols.awk var bytedumpHexSymbols = [256]string{ `00 `, `01 `, `02 `, `03 `, `04 `, `05 `, `06 `, `07 `, `08 `, `09 `, `0a `, `0b `, `0c `, `0d `, `0e `, `0f `, `10 `, `11 `, `12 `, `13 `, `14 `, `15 `, `16 `, `17 `, `18 `, `19 `, `1a `, `1b `, `1c `, `1d `, `1e `, `1f `, `20 `, `21!`, `22"`, `23#`, `24$`, `25%`, `26&`, `27'`, `28(`, `29)`, `2a*`, `2b+`, `2c,`, `2d-`, `2e.`, `2f/`, `300`, `311`, `322`, `333`, `344`, `355`, `366`, `377`, `388`, `399`, `3a:`, `3b;`, `3c<`, `3d=`, `3e>`, `3f?`, `40@`, `41A`, `42B`, `43C`, `44D`, `45E`, `46F`, `47G`, `48H`, `49I`, `4aJ`, `4bK`, `4cL`, `4dM`, `4eN`, `4fO`, `50P`, `51Q`, `52R`, `53S`, `54T`, `55U`, `56V`, `57W`, `58X`, `59Y`, `5aZ`, `5b[`, `5c\`, `5d]`, `5e^`, `5f_`, "60`", `61a`, `62b`, `63c`, `64d`, `65e`, `66f`, `67g`, `68h`, `69i`, `6aj`, `6bk`, `6cl`, `6dm`, `6en`, `6fo`, `70p`, `71q`, `72r`, `73s`, `74t`, `75u`, `76v`, `77w`, `78x`, `79y`, `7az`, `7b{`, `7c|`, `7d}`, `7e~`, `7f `, `80 `, `81 `, `82 `, `83 `, `84 `, `85 `, `86 `, `87 `, `88 `, `89 `, `8a `, `8b `, `8c `, `8d `, `8e `, `8f `, `90 `, `91 `, `92 `, `93 `, `94 `, `95 `, `96 `, `97 `, `98 `, `99 `, `9a `, `9b `, `9c `, `9d `, `9e `, `9f `, `a0 `, `a1 `, `a2 `, `a3 `, `a4 `, `a5 `, `a6 `, `a7 `, `a8 `, `a9 `, `aa `, `ab `, `ac `, `ad `, `ae `, `af `, `b0 `, `b1 `, `b2 `, `b3 `, `b4 `, `b5 `, `b6 `, `b7 `, `b8 `, `b9 `, `ba `, `bb `, `bc `, `bd `, `be `, `bf `, `c0 `, `c1 `, `c2 `, `c3 `, `c4 `, `c5 `, `c6 `, `c7 `, `c8 `, `c9 `, `ca `, `cb `, `cc `, `cd `, `ce `, `cf `, `d0 `, `d1 `, `d2 `, `d3 `, `d4 `, `d5 `, `d6 `, `d7 `, `d8 `, `d9 `, `da `, `db `, `dc `, `dd `, `de `, `df `, `e0 `, `e1 `, `e2 `, `e3 `, `e4 `, `e5 `, `e6 `, `e7 `, `e8 `, `e9 `, `ea `, `eb `, `ec `, `ed `, `ee `, `ef `, `f0 `, `f1 `, `f2 `, `f3 `, `f4 `, `f5 `, `f6 `, `f7 `, `f8 `, `f9 `, `fa `, `fb `, `fc `, `fd `, `fe `, `ff `, } func bytedumpMain() { args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(bytedumpInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } handleError(nil, bytedumpRun(args)) } func bytedumpRun(args []string) error { w := bufio.NewWriterSize(os.Stdout, 32*1024) defer w.Flush() // with no filenames given, handle stdin and quit if len(args) == 0 { return bytedump(w, os.Stdin, stdinDisplayName, -1) } for i, fname := range args { if i > 0 { w.WriteString("\n") w.WriteString("\n") } if err := bytedumpHandleFile(w, fname); err != nil { return err } } return nil } func bytedumpHandleFile(w *bufio.Writer, fname string) error { f, err := os.Open(fname) if err != nil { return err } defer f.Close() stat, err := f.Stat() if err != nil { return bytedump(w, f, fname, -1) } fsize := int(stat.Size()) return bytedump(w, f, fname, fsize) } // bytedump shows some messages related to the input and the cmd-line options // used, and then follows them by the hexadecimal byte-view func bytedump(w *bufio.Writer, r io.Reader, name string, size int) error { owidth := -1 if size > 0 { w := math.Log10(float64(size)) w = math.Max(math.Ceil(w), 1) owidth = int(w) } if owidth < 0 { owidth = 8 } rc := bytedumpConfig{ out: w, offsetWidth: owidth, } if size < 0 { fmt.Fprintf(w, "• %s\n", name) } else { const fs = "• %s (%s bytes)\n" fmt.Fprintf(w, fs, name, sprintCommas(size)) } w.WriteByte('\n') // when done, emit a new line in case only part of the last line is // shown, which means no newline was emitted for it defer w.WriteString("\n") // calling func Read directly can sometimes result in chunks shorter // than the max chunk-size, even when there are plenty of bytes yet // to read; to avoid that, use a buffered-reader to explicitly fill // a slice instead br := bufio.NewReader(r) // to show ASCII up to 1 full chunk ahead, 2 chunks are needed cur := make([]byte, 0, bytedumpChunkSize) ahead := make([]byte, 0, bytedumpChunkSize) // the ASCII-panel's wide output requires staying 1 step/chunk behind, // so to speak cur, err := bytedumpFillChunk(cur[:0], bytedumpChunkSize, br) if len(cur) == 0 { if err == io.EOF { err = nil } return err } for { ahead, err := bytedumpFillChunk(ahead[:0], bytedumpChunkSize, br) if err != nil && err != io.EOF { return err } if len(ahead) == 0 { // done, maybe except for an extra line of output break } // show the byte-chunk on its own output line if err := bytedumpWriteChunk(rc, cur, ahead); err != nil { return errNoMoreOutput } rc.chunks++ rc.offset += uint(len(cur)) cur = cur[:copy(cur, ahead)] } // don't forget the last output line if rc.chunks > 0 && len(cur) > 0 { return bytedumpWriteChunk(rc, cur, nil) } return nil } // bytedumpFillChunk tries to read the number of bytes given, appending them // to the byte-slice given; this func returns an EOF error only when no bytes // are read, which somewhat simplifies error-handling for the func caller func bytedumpFillChunk(chunk []byte, n int, br *bufio.Reader) ([]byte, error) { // read buffered-bytes up to the max chunk-size for i := 0; i < n; i++ { b, err := br.ReadByte() if err == nil { chunk = append(chunk, b) continue } if err == io.EOF && i > 0 { return chunk, nil } return chunk, err } // got the full byte-count asked for return chunk, nil } // bytedumpConfig groups several arguments given to any of the rendering funcs type bytedumpConfig struct { // out is writer to send all output to out *bufio.Writer // offset is the byte-offset of the first byte shown on the current output // line: if shown at all, it's shown at the start the line offset uint // chunks is the 0-based counter for byte-chunks/lines shown so far, which // indirectly keeps track of when it's time to show a `breather` line chunks uint // perLine is how many hex-encoded bytes are shown per line perLine uint // offsetWidth is the max string-width for the byte-offsets shown at the // start of output lines, and determines those values' left-padding offsetWidth int } // loopThousandsGroups comes from my lib/package `mathplus`: that's why it // handles negatives, even though this app only uses it with non-negatives. func loopThousandsGroups(n int, fn func(i, n int)) { // 0 doesn't have a log10 if n == 0 { fn(0, 0) return } sign := +1 if n < 0 { n = -n sign = -1 } intLog1000 := int(math.Log10(float64(n)) / 3) remBase := int(math.Pow10(3 * intLog1000)) for i := 0; remBase > 0; i++ { group := (1000 * n) / remBase / 1000 fn(i, sign*group) // if original number was negative, ensure only first // group gives a negative input to the callback sign = +1 n %= remBase remBase /= 1000 } } // sprintCommas turns the non-negative number given into a readable string, // where digits are grouped-separated by commas func sprintCommas(n int) string { var sb strings.Builder loopThousandsGroups(n, func(i, n int) { if i == 0 { var buf [4]byte sb.Write(strconv.AppendInt(buf[:0], int64(n), 10)) return } sb.WriteByte(',') writePad0Sub1000Counter(&sb, uint(n)) }) return sb.String() } // writePad0Sub1000Counter is an alternative to fmt.Fprintf(w, `%03d`, n) func writePad0Sub1000Counter(w io.Writer, n uint) { // precondition is 0...999 if n > 999 { w.Write([]byte(`???`)) return } var buf [3]byte buf[0] = byte(n/100) + '0' n %= 100 buf[1] = byte(n/10) + '0' buf[2] = byte(n%10) + '0' w.Write(buf[:]) } // bytedumpWriteHex is faster than calling fmt.Fprintf(w, `%02x`, b): this // matters when handling megabytes, as it's called for every byte of input func bytedumpWriteHex(w *bufio.Writer, b byte) { const hexDigits = `0123456789abcdef` w.WriteByte(hexDigits[b>>4]) w.WriteByte(hexDigits[b&0x0f]) } // bytedumpPadding is the padding/spacing emitted across each output line const bytedumpPadding = 2 func bytedumpWriteChunk(cfg bytedumpConfig, first, second []byte) error { w := cfg.out // start each line with the byte-offset for the 1st item shown on it writeCounter(w, cfg.offsetWidth, cfg.offset) w.WriteByte(' ') for _, b := range first { // fmt.Fprintf(w, ` %02x`, b) // // the commented part above was a performance bottleneck, since // the slow/generic fmt.Fprintf was called for each input byte w.WriteByte(' ') bytedumpWriteHex(w, b) } bytedumpWriteASCII(w, first, second, bytedumpChunkSize) return w.WriteByte('\n') } // writeCounter just emits a left-padded number func writeCounter(w *bufio.Writer, width int, n uint) { var buf [32]byte str := strconv.AppendUint(buf[:0], uint64(n), 10) writeSpaces(w, width-len(str)) w.Write(str) } // bytedumpWriteASCII emits the side-panel showing all ASCII runs for each line func bytedumpWriteASCII(w *bufio.Writer, first, second []byte, width int) { // prev keeps track of the previous byte, so spaces are added // when bytes change from non-visible-ASCII to visible-ASCII var prev byte spaces := 3*(width-len(first)) + bytedumpPadding for _, b := range first { if 32 < b && b < 127 { if !(32 < prev && prev < 127) { writeSpaces(w, spaces) spaces = 1 } w.WriteByte(b) } prev = b } for _, b := range second { if 32 < b && b < 127 { if !(32 < prev && prev < 127) { writeSpaces(w, spaces) spaces = 1 } w.WriteByte(b) } prev = b } } const catlInfo = ` catl [options...] [file...] Unlike "cat", conCATenate Lines ensures lines across inputs are never joined by accident, when an input's last line doesn't end with a line-feed. Input is assumed to be UTF-8, and all CRLF byte-pairs are turned into line feeds. Leading BOM (byte-order marks) on first lines are also ignored. All (optional) leading options start with either single or double-dash: -h show this help message -help show this help message ` func catlMain() { buffered := false args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(catlInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } liveLines := !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { liveLines = false } } easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { return catl(w, r, liveLines) }) } func catl(w *bufio.Writer, r io.Reader, live bool) error { sc := bufio.NewScanner(r) sc.Buffer(nil, maxLineBufSize) for i := 0; sc.Scan(); i++ { s := sc.Bytes() if i == 0 && hasPrefixBOM(s) { s = s[3:] } w.Write(s) if w.WriteByte('\n') != nil { return errNoMoreOutput } if !live { continue } if err := w.Flush(); err != nil { return errNoMoreOutput } } return sc.Err() } const comaInfo = ` coma [options...] [regexes/style pairs...] COlor MAtches ANSI-styles matching regular expressions along lines read from the standard input. The regular-expression mode used is "re2", which is a superset of the commonly-used "extended-mode". Regexes always avoid matching any ANSI-style sequences, to avoid messing those up. Also, multiple matches in a line never overlap: at each step along a line, the earliest-starting match among the regexes always wins, as the order regexes are given among the arguments never matters. The options are, available both in single and double-dash versions -h show this help message -help show this help message -i match regexes case-insensitively -ins match regexes case-insensitively ` var styleAliases = map[string]string{ `b`: `blue`, `g`: `green`, `m`: `magenta`, `o`: `orange`, `p`: `purple`, `r`: `red`, `u`: `underline`, `bb`: `blueback`, `bg`: `greenback`, `bm`: `magentaback`, `bo`: `orangeback`, `bp`: `purpleback`, `br`: `redback`, `gb`: `greenback`, `mb`: `magentaback`, `ob`: `orangeback`, `pb`: `purpleback`, `rb`: `redback`, `hi`: `inverse`, `inv`: `inverse`, `mag`: `magenta`, `du`: `doubleunderline`, `flip`: `inverse`, `swap`: `inverse`, `reset`: `plain`, `highlight`: `inverse`, `hilite`: `inverse`, `invert`: `inverse`, `inverted`: `inverse`, `swapped`: `inverse`, `dunderline`: `doubleunderline`, `dunderlined`: `doubleunderline`, `strikethrough`: `strike`, `strikethru`: `strike`, `struck`: `strike`, `underlined`: `underline`, `bblue`: `blueback`, `bgray`: `grayback`, `bgreen`: `greenback`, `bmagenta`: `magentaback`, `borange`: `orangeback`, `bpurple`: `purpleback`, `bred`: `redback`, `bgblue`: `blueback`, `bggray`: `grayback`, `bggreen`: `greenback`, `bgmag`: `magentaback`, `bgmagenta`: `magentaback`, `bgorange`: `orangeback`, `bgpurple`: `purpleback`, `bgred`: `redback`, `bluebg`: `blueback`, `graybg`: `grayback`, `greenbg`: `greenback`, `magbg`: `magentaback`, `magentabg`: `magentaback`, `orangebg`: `orangeback`, `purplebg`: `purpleback`, `redbg`: `redback`, `backblue`: `blueback`, `backgray`: `grayback`, `backgreen`: `greenback`, `backmag`: `magentaback`, `backmagenta`: `magentaback`, `backorange`: `orangeback`, `backpurple`: `purpleback`, `backred`: `redback`, } var styles = map[string]string{ `blue`: "\x1b[38;2;0;95;215m", `bold`: "\x1b[1m", `doubleunderline`: "\x1b[21m", `gray`: "\x1b[38;2;168;168;168m", `green`: "\x1b[38;2;0;135;95m", `inverse`: "\x1b[7m", `magenta`: "\x1b[38;2;215;0;255m", `orange`: "\x1b[38;2;215;95;0m", `plain`: "\x1b[0m", `purple`: "\x1b[38;2;135;95;255m", `red`: "\x1b[38;2;204;0;0m", `strike`: "\x1b[9m", `underline`: "\x1b[4m", `blueback`: "\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m", `grayback`: "\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m", `greenback`: "\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m", `magentaback`: "\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m", `orangeback`: "\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m", `purpleback`: "\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m", `redback`: "\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m", } type patternStylePair struct { expr *regexp.Regexp style string } func comaMain() { buffered := false insensitive := false args := os.Args[1:] out: for len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] continue case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(comaInfo[1:]) return case `-i`, `--i`, `-ins`, `--ins`: insensitive = true args = args[1:] continue default: break out } } if len(args) > 0 && args[0] == `--` { args = args[1:] } if len(args)%2 != 0 { const msg = "you forgot the style-name for/after the last regex\n" os.Stderr.WriteString(msg) os.Exit(1) } nerr := 0 pairs := make([]patternStylePair, 0, len(args)/2) for len(args) >= 2 { src := args[0] sname := args[1] var err error var exp *regexp.Regexp if insensitive { exp, err = regexp.Compile(`(?i)` + src) } else { exp, err = regexp.Compile(src) } if err != nil { os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\n") nerr++ } if alias, ok := styleAliases[sname]; ok { sname = alias } style, ok := styles[sname] if !ok { os.Stderr.WriteString("no style named `") os.Stderr.WriteString(args[1]) os.Stderr.WriteString("`\n") nerr++ } pairs = append(pairs, patternStylePair{expr: exp, style: style}) args = args[2:] } if nerr > 0 { os.Exit(1) } liveLines := !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { liveLines = false } } sc := bufio.NewScanner(os.Stdin) sc.Buffer(nil, maxLineBufSize) bw := bufio.NewWriterSize(os.Stdout, bufSize) defer bw.Flush() for i := 0; sc.Scan(); i++ { s := sc.Bytes() if i == 0 && hasPrefixBOM(s) { s = s[3:] } comaHandleLine(bw, s, pairs) if err := bw.WriteByte('\n'); err != nil { return } if !liveLines { continue } if err := bw.Flush(); err != nil { return } } } func comaHandleLine(w *bufio.Writer, s []byte, with []patternStylePair) { for len(s) > 0 { i, j := indexEscapeSequence(s) if i < 0 { comaHandleLineChunk(w, s, with) return } comaHandleLineChunk(w, s[:i], with) w.Write(s[i:j]) if j < 0 { break } s = s[j:] } } func comaHandleLineChunk(w *bufio.Writer, s []byte, with []patternStylePair) { start := -1 end := -1 which := -1 for len(s) > 0 { start = -1 for i, pair := range with { span := pair.expr.FindIndex(s) // also ignore empty regex matches to avoid infinite outer loops, // as skipping empty slices isn't advancing at all, leaving the // string stuck to being empty-matched forever by the same regex if span == nil || span[0] == span[1] { continue } if span[0] < start || start < 0 { start = span[0] end = span[1] which = i } } if start < 0 { w.Write(s) return } w.Write(s[:start]) w.WriteString(with[which].style) w.Write(s[start:end]) w.WriteString("\x1b[0m") s = s[end:] } } const datauriInfo = ` datauri [options...] [filenames...] Encode bytes as data-URIs, auto-detecting the file/data type using the first few bytes from each data/file stream. When given multiple inputs, the output will be multiple lines, one for each file given. Empty files/inputs result in empty lines. A simple dash (-) stands for the standard-input, which is also used automatically when not given any files. Data-URIs are base64-encoded text representations of arbitrary data, which include their payload's MIME-type, and which are directly useable/shareable in web-browsers as links, despite not looking like normal links/URIs. Some web-browsers limit the size of handled data-URIs to tens of kilobytes. Options -h, -help, --h, --help show this help message ` func datauriMain() { args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(datauriInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { return dataURI(w, r, name) }) } func dataURI(w *bufio.Writer, r io.Reader, name string) error { var buf [64]byte n, err := r.Read(buf[:]) if err != nil && err != io.EOF { return err } start := buf[:n] // handle regular data, trying to auto-detect its MIME type using // its first few bytes mime, ok := detectMIME(start) if !ok { return errors.New(name + `: unknown file type`) } w.WriteString(`data:`) w.WriteString(mime) w.WriteString(`;base64,`) r = io.MultiReader(bytes.NewReader(start), r) enc := base64.NewEncoder(base64.StdEncoding, w) if _, err := io.Copy(enc, r); err != nil { return err } enc.Close() w.WriteByte('\n') if err := w.Flush(); err != nil { return errNoMoreOutput } return nil } // makeDotless is similar to filepath.Ext, except its results never start // with a dot func makeDotless(s string) string { i := strings.LastIndexByte(s, '.') if i >= 0 { return s[(i + 1):] } return s } // nameToMIME tries to match a MIME type to a filename, dotted file extension, // or a dot-less filetype/extension given func nameToMIME(fname string) (mimeType string, ok bool) { // handle dotless file types and filenames alike kind, ok := type2mime[makeDotless(fname)] return kind, ok } // detectMIME guesses the first appropriate MIME type from the first few // data bytes given: 24 bytes are enough to detect all supported types func detectMIME(b []byte) (mimeType string, ok bool) { t, ok := detectType(b) if ok { return t, true } return ``, false } // detectType guesses the first appropriate file type for the data given: // here the type is a a filename extension without the leading dot func detectType(b []byte) (dotlessExt string, ok bool) { // empty data, so there's no way to detect anything if len(b) == 0 { return ``, false } // check for plain-text web-document formats case-insensitively kind, ok := checkDoc(b) if ok { return kind, true } // check data formats which allow any byte at the start kind, ok = checkSpecial(b) if ok { return kind, true } // check all other supported data formats headers := hdrDispatch[b[0]] for _, t := range headers { if hasPrefixPattern(b[1:], t.Header[1:], cba) { return t.Type, true } } // unrecognized data format return ``, false } // checkDoc tries to guess if the bytes given are the start of HTML, SVG, // XML, or JSON data func checkDoc(b []byte) (kind string, ok bool) { // ignore leading whitespaces b = trimLeadingWhitespace(b) // can't detect anything with empty data if len(b) == 0 { return ``, false } // handle XHTML documents which don't start with a doctype declaration if bytes.Contains(b, doctypeHTML) { return html, true } // handle HTML/SVG/XML documents if hasPrefixByte(b, '<') { if hasPrefixFold(b, []byte{'<', '?', 'x', 'm', 'l'}) { if bytes.Contains(b, []byte{'<', 's', 'v', 'g'}) { return svg, true } return xml, true } headers := hdrDispatch['<'] for _, v := range headers { if hasPrefixFold(b, v.Header) { return v.Type, true } } return ``, false } // handle JSON with top-level arrays if hasPrefixByte(b, '[') { // match [", or [[, or [{, ignoring spaces between b = trimLeadingWhitespace(b[1:]) if len(b) > 0 { switch b[0] { case '"', '[', '{': return json_, true } } return ``, false } // handle JSON with top-level objects if hasPrefixByte(b, '{') { // match {", ignoring spaces between: after {, the only valid syntax // which can follow is the opening quote for the expected object-key b = trimLeadingWhitespace(b[1:]) if hasPrefixByte(b, '"') { return json_, true } return ``, false } // checking for a quoted string, any of the JSON keywords, or even a // number seems too ambiguous to declare the data valid JSON // no web-document format detected return ``, false } // checkSpecial handles special file-format headers, which should be checked // before the normal file-type headers, since the first-byte dispatch algo // doesn't work for these func checkSpecial(b []byte) (kind string, ok bool) { if len(b) >= 8 && bytes.Index(b, []byte{'f', 't', 'y', 'p'}) == 4 { for _, t := range specialHeaders { if hasPrefixPattern(b[4:], t.Header[4:], cba) { return t.Type, true } } } return ``, false } // hasPrefixPattern works like bytes.HasPrefix, except it allows for a special // value to signal any byte is allowed on specific spots func hasPrefixPattern(what []byte, pat []byte, wildcard byte) bool { // if the data are shorter than the pattern to match, there's no match if len(what) < len(pat) { return false } // use a slice which ensures the pattern length is never exceeded what = what[:len(pat)] for i, x := range what { y := pat[i] if x != y && y != wildcard { return false } } return true } // all the MIME types used/recognized in this package const ( aiff = `audio/aiff` au = `audio/basic` avi = `video/avi` avif = `image/avif` bmp = `image/x-bmp` caf = `audio/x-caf` cur = `image/vnd.microsoft.icon` css = `text/css` csv = `text/csv` djvu = `image/x-djvu` elf = `application/x-elf` exe = `application/vnd.microsoft.portable-executable` flac = `audio/x-flac` gif = `image/gif` gz = `application/gzip` heic = `image/heic` htm = `text/html` html = `text/html` ico = `image/x-icon` iso = `application/octet-stream` jpg = `image/jpeg` jpeg = `image/jpeg` js = `application/javascript` json_ = `application/json` m4a = `audio/aac` m4v = `video/x-m4v` mid = `audio/midi` mov = `video/quicktime` mp4 = `video/mp4` mp3 = `audio/mpeg` mpg = `video/mpeg` ogg = `audio/ogg` opus = `audio/opus` pdf = `application/pdf` png = `image/png` ps = `application/postscript` psd = `image/vnd.adobe.photoshop` rtf = `application/rtf` sqlite3 = `application/x-sqlite3` svg = `image/svg+xml` text = `text/plain` tiff = `image/tiff` tsv = `text/tsv` wasm = `application/wasm` wav = `audio/x-wav` webp = `image/webp` webm = `video/webm` xml = `application/xml` zip = `application/zip` zst = `application/zstd` ) // type2mime turns dotless format-names into MIME types var type2mime = map[string]string{ `aiff`: aiff, `wav`: wav, `avi`: avi, `jpg`: jpg, `jpeg`: jpeg, `m4a`: m4a, `mp4`: mp4, `m4v`: m4v, `mov`: mov, `png`: png, `avif`: avif, `webp`: webp, `gif`: gif, `tiff`: tiff, `psd`: psd, `flac`: flac, `webm`: webm, `mpg`: mpg, `zip`: zip, `gz`: gz, `zst`: zst, `mp3`: mp3, `opus`: opus, `bmp`: bmp, `mid`: mid, `ogg`: ogg, `html`: html, `htm`: htm, `svg`: svg, `xml`: xml, `rtf`: rtf, `pdf`: pdf, `ps`: ps, `au`: au, `ico`: ico, `cur`: cur, `caf`: caf, `heic`: heic, `sqlite3`: sqlite3, `elf`: elf, `exe`: exe, `wasm`: wasm, `iso`: iso, `txt`: text, `css`: css, `csv`: csv, `tsv`: tsv, `js`: js, `json`: json_, `geojson`: json_, } // formatDescriptor ties a file-header pattern to its data-format type type formatDescriptor struct { Header []byte Type string } // can be anything: ensure this value differs from all other literal bytes // in the generic-headers table: failing that, its value could cause subtle // type-misdetection bugs const cba = 0xFD // 253, which is > 127, the highest-valued ascii symbol // dash-streamed m4a format var m4aDash = []byte{ cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', } // format markers with leading wildcards, which should be checked before the // normal ones: this is to prevent mismatches with the latter types, even // though you can make probabilistic arguments which suggest these mismatches // should be very unlikely in practice var specialHeaders = []formatDescriptor{ {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a}, {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a}, {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4}, {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4}, {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v}, {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov}, {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic}, {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif}, {m4aDash, m4a}, } // sqlite3 database format var sqlite3db = []byte{ 'S', 'Q', 'L', 'i', 't', 'e', ' ', 'f', 'o', 'r', 'm', 'a', 't', ' ', '3', 000, } // windows-variant bitmap file-header, which is followed by a byte-counter for // the 40-byte infoheader which follows that var winbmp = []byte{ 'B', 'M', cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, 40, } // deja-vu document format var djv = []byte{ 'A', 'T', '&', 'T', 'F', 'O', 'R', 'M', cba, cba, cba, cba, 'D', 'J', 'V', } var doctypeHTML = []byte{ '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E', ' ', 'h', 't', 'm', 'l', } // hdrDispatch groups format-description-groups by their first byte, thus // shortening total lookups for some data header: notice how the `ftyp` data // formats aren't handled here, since these can start with any byte, instead // of the literal value of the any-byte markers they use var hdrDispatch = [256][]formatDescriptor{ { {[]byte{000, 000, 001, 0xBA}, mpg}, {[]byte{000, 000, 001, 0xB3}, mpg}, {[]byte{000, 000, 001, 000}, ico}, {[]byte{000, 000, 002, 000}, cur}, {[]byte{000, 'a', 's', 'm'}, wasm}, }, // 0 nil, // 1 nil, // 2 nil, // 3 nil, // 4 nil, // 5 nil, // 6 nil, // 7 nil, // 8 nil, // 9 nil, // 10 nil, // 11 nil, // 12 nil, // 13 nil, // 14 nil, // 15 nil, // 16 nil, // 17 nil, // 18 nil, // 19 nil, // 20 nil, // 21 nil, // 22 nil, // 23 nil, // 24 nil, // 25 { {[]byte{0x1A, 0x45, 0xDF, 0xA3}, webm}, }, // 26 nil, // 27 nil, // 28 nil, // 29 nil, // 30 { // {[]byte{0x1F, 0x8B, 0x08, 0x08}, gz}, {[]byte{0x1F, 0x8B, 0x08}, gz}, }, // 31 nil, // 32 nil, // 33 ! nil, // 34 " { {[]byte{'#', '!', ' '}, text}, {[]byte{'#', '!', '/'}, text}, }, // 35 # nil, // 36 $ { {[]byte{'%', 'P', 'D', 'F'}, pdf}, {[]byte{'%', '!', 'P', 'S'}, ps}, }, // 37 % nil, // 38 & nil, // 39 ' { {[]byte{0x28, 0xB5, 0x2F, 0xFD}, zst}, }, // 40 ( nil, // 41 ) nil, // 42 * nil, // 43 + nil, // 44 , nil, // 45 - { {[]byte{'.', 's', 'n', 'd'}, au}, }, // 46 . nil, // 47 / nil, // 48 0 nil, // 49 1 nil, // 50 2 nil, // 51 3 nil, // 52 4 nil, // 53 5 nil, // 54 6 nil, // 55 7 { {[]byte{'8', 'B', 'P', 'S'}, psd}, }, // 56 8 nil, // 57 9 nil, // 58 : nil, // 59 ; { // func checkDoc is better for these, since it's case-insensitive {doctypeHTML, html}, {[]byte{'<', 's', 'v', 'g'}, svg}, {[]byte{'<', 'h', 't', 'm', 'l', '>'}, html}, {[]byte{'<', 'h', 'e', 'a', 'd', '>'}, html}, {[]byte{'<', 'b', 'o', 'd', 'y', '>'}, html}, {[]byte{'<', '?', 'x', 'm', 'l'}, xml}, }, // 60 < nil, // 61 = nil, // 62 > nil, // 63 ? nil, // 64 @ { {djv, djvu}, }, // 65 A { {winbmp, bmp}, }, // 66 B nil, // 67 C nil, // 68 D nil, // 69 E { {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff}, {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff}, }, // 70 F { {[]byte{'G', 'I', 'F', '8', '7', 'a'}, gif}, {[]byte{'G', 'I', 'F', '8', '9', 'a'}, gif}, }, // 71 G nil, // 72 H { {[]byte{'I', 'D', '3', 2}, mp3}, // ID3-format metadata {[]byte{'I', 'D', '3', 3}, mp3}, // ID3-format metadata {[]byte{'I', 'D', '3', 4}, mp3}, // ID3-format metadata {[]byte{'I', 'I', '*', 000}, tiff}, }, // 73 I nil, // 74 J nil, // 75 K nil, // 76 L { {[]byte{'M', 'M', 000, '*'}, tiff}, {[]byte{'M', 'T', 'h', 'd'}, mid}, {[]byte{'M', 'Z', cba, 000, cba, 000}, exe}, // {[]byte{'M', 'Z', 0x90, 000, 003, 000}, exe}, // {[]byte{'M', 'Z', 0x78, 000, 001, 000}, exe}, // {[]byte{'M', 'Z', 'P', 000, 002, 000}, exe}, }, // 77 M nil, // 78 N { {[]byte{'O', 'g', 'g', 'S'}, ogg}, }, // 79 O { {[]byte{'P', 'K', 003, 004}, zip}, }, // 80 P nil, // 81 Q { {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp}, {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav}, {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi}, }, // 82 R { {sqlite3db, sqlite3}, }, // 83 S nil, // 84 T nil, // 85 U nil, // 86 V nil, // 87 W nil, // 88 X nil, // 89 Y nil, // 90 Z nil, // 91 [ nil, // 92 \ nil, // 93 ] nil, // 94 ^ nil, // 95 _ nil, // 96 ` nil, // 97 a nil, // 98 b { {[]byte{'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf}, }, // 99 c nil, // 100 d nil, // 101 e { {[]byte{'f', 'L', 'a', 'C'}, flac}, }, // 102 f nil, // 103 g nil, // 104 h nil, // 105 i nil, // 106 j nil, // 107 k nil, // 108 l nil, // 109 m nil, // 110 n nil, // 111 o nil, // 112 p nil, // 113 q nil, // 114 r nil, // 115 s nil, // 116 t nil, // 117 u nil, // 118 v nil, // 119 w nil, // 120 x nil, // 121 y nil, // 122 z { {[]byte{'{', '\\', 'r', 't', 'f'}, rtf}, }, // 123 { nil, // 124 | nil, // 125 } nil, // 126 { {[]byte{127, 'E', 'L', 'F'}, elf}, }, // 127 nil, // 128 nil, // 129 nil, // 130 nil, // 131 nil, // 132 nil, // 133 nil, // 134 nil, // 135 nil, // 136 { {[]byte{0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png}, }, // 137 nil, // 138 nil, // 139 nil, // 140 nil, // 141 nil, // 142 nil, // 143 nil, // 144 nil, // 145 nil, // 146 nil, // 147 nil, // 148 nil, // 149 nil, // 150 nil, // 151 nil, // 152 nil, // 153 nil, // 154 nil, // 155 nil, // 156 nil, // 157 nil, // 158 nil, // 159 nil, // 160 nil, // 161 nil, // 162 nil, // 163 nil, // 164 nil, // 165 nil, // 166 nil, // 167 nil, // 168 nil, // 169 nil, // 170 nil, // 171 nil, // 172 nil, // 173 nil, // 174 nil, // 175 nil, // 176 nil, // 177 nil, // 178 nil, // 179 nil, // 180 nil, // 181 nil, // 182 nil, // 183 nil, // 184 nil, // 185 nil, // 186 nil, // 187 nil, // 188 nil, // 189 nil, // 190 nil, // 191 nil, // 192 nil, // 193 nil, // 194 nil, // 195 nil, // 196 nil, // 197 nil, // 198 nil, // 199 nil, // 200 nil, // 201 nil, // 202 nil, // 203 nil, // 204 nil, // 205 nil, // 206 nil, // 207 nil, // 208 nil, // 209 nil, // 210 nil, // 211 nil, // 212 nil, // 213 nil, // 214 nil, // 215 nil, // 216 nil, // 217 nil, // 218 nil, // 219 nil, // 220 nil, // 221 nil, // 222 nil, // 223 nil, // 224 nil, // 225 nil, // 226 nil, // 227 nil, // 228 nil, // 229 nil, // 230 nil, // 231 nil, // 232 nil, // 233 nil, // 234 nil, // 235 nil, // 236 nil, // 237 nil, // 238 nil, // 239 nil, // 240 nil, // 241 nil, // 242 nil, // 243 nil, // 244 nil, // 245 nil, // 246 nil, // 247 nil, // 248 nil, // 249 nil, // 250 nil, // 251 nil, // 252 nil, // 253 nil, // 254 { {[]byte{0xFF, 0xD8, 0xFF}, jpg}, {[]byte{0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3}, {[]byte{0xFF, 0xFB}, mp3}, }, // 255 } const debase64Info = ` debase64 [file/data-URI...] Decode base64-encoded files and/or data-URIs. ` func debase64Main() { args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(debase64Info[1:]) return case `--`: args = args[1:] } } if len(args) > 1 { os.Stderr.WriteString(debase64Info[1:]) os.Exit(1) } name := `-` if len(args) == 1 { name = args[0] } if err := debase64Run(name); err != nil { os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\n") os.Exit(1) } } func debase64Run(s string) error { bw := bufio.NewWriterSize(os.Stdout, bufSize) defer bw.Flush() w := bw if s == `-` { return debase64(w, os.Stdin) } if seemsDataURI(s) { return debase64(w, strings.NewReader(s)) } f, err := os.Open(s) if err != nil { return err } defer f.Close() return debase64(w, f) } // debase64 decodes base64 chunks explicitly, so decoding errors can be told // apart from output-writing ones func debase64(w io.Writer, r io.Reader) error { br := bufio.NewReaderSize(r, bufSize) start, err := br.Peek(64) if err != nil && err != io.EOF { return err } skip, err := skipIntroDataURI(start) if err != nil { return err } if skip > 0 { br.Discard(skip) } dec := base64.NewDecoder(base64.StdEncoding, br) _, err = io.Copy(w, dec) return err } func skipIntroDataURI(chunk []byte) (skip int, err error) { if bytes.HasPrefix(chunk, []byte{0xef, 0xbb, 0xbf}) { chunk = chunk[3:] skip += 3 } if !bytes.HasPrefix(chunk, []byte(`data:`)) { return skip, nil } start := chunk if len(start) > 64 { start = start[:64] } i := bytes.Index(start, []byte(`;base64,`)) if i < 0 { return skip, errors.New(`invalid data URI`) } skip += i + len(`;base64,`) return skip, nil } const dedupInfo = ` dedup [options...] [file...] DEDUPlicate lines prevents the same line from appearing again in the output, after the first time. Unique lines are remembered across inputs. Input is assumed to be UTF-8, and all CRLF byte-pairs are turned into line feeds by default. All (optional) leading options start with either single or double-dash: -h show this help message -help show this help message ` type stringSet map[string]struct{} func dedupMain() { buffered := false args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(dedupInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } liveLines := !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { liveLines = false } } err := dedupRun(os.Stdout, args, liveLines) if err != nil && err != errNoMoreOutput { os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\n") os.Exit(1) } } func dedupRun(w io.Writer, args []string, live bool) error { files := make(stringSet) lines := make(stringSet) bw := bufio.NewWriterSize(w, bufSize) defer bw.Flush() for _, name := range args { if _, ok := files[name]; ok { continue } files[name] = struct{}{} if err := dedupHandleFile(bw, name, lines, live); err != nil { return err } } if len(args) == 0 { return dedup(bw, os.Stdin, lines, live) } return nil } func dedupHandleFile(w *bufio.Writer, name string, got stringSet, live bool) error { if name == `` || name == `-` { return dedup(w, os.Stdin, got, live) } f, err := os.Open(name) if err != nil { return errors.New(`can't read from file named "` + name + `"`) } defer f.Close() return dedup(w, f, got, live) } func dedup(w *bufio.Writer, r io.Reader, got stringSet, live bool) error { sc := bufio.NewScanner(r) sc.Buffer(nil, maxLineBufSize) for sc.Scan() { line := sc.Text() if _, ok := got[line]; ok { continue } got[line] = struct{}{} w.Write(sc.Bytes()) if w.WriteByte('\n') != nil { return errNoMoreOutput } if !live { continue } if err := w.Flush(); err != nil { return errNoMoreOutput } } return sc.Err() } const dejsonlInfo = ` dejsonl [filepath...] Turn JSON Lines (JSONL) into proper-JSON arrays. The JSON Lines format is simply plain-text lines, where each line is valid JSON on its own. ` const dejsonlIndent = ` ` func dejsonlMain() { buffered := false args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(dejsonlInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } liveLines := !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { liveLines = false } } got := 0 easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { return dejsonl(w, r, liveLines, &got) }) if got == 0 { os.Stdout.WriteString("[\n]\n") } else { os.Stdout.WriteString("\n]\n") } } func dejsonl(w *bufio.Writer, r io.Reader, live bool, got *int) error { sc := bufio.NewScanner(r) sc.Buffer(nil, maxLineBufSize) for i := 0; sc.Scan(); i++ { s := sc.Text() if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") { s = s[3:] } // trim spaces at both ends of the current line for len(s) > 0 && s[0] == ' ' { s = s[1:] } for len(s) > 0 && s[len(s)-1] == ' ' { s = s[:len(s)-1] } // ignore empty(ish) lines if len(s) == 0 { continue } // ignore lines starting with unix-style comments if len(s) > 0 && s[0] == '#' { continue } if err := checkJSONL(strings.NewReader(s)); err != nil { return err } if *got == 0 { w.WriteByte('[') } else { w.WriteByte(',') } if w.WriteByte('\n') != nil { return errNoMoreOutput } w.WriteString(dejsonlIndent) w.WriteString(s) *got++ if !live { continue } if err := w.Flush(); err != nil { return errNoMoreOutput } } return sc.Err() } func checkJSONL(r io.Reader) error { dec := json.NewDecoder(r) // avoid parsing numbers, so unusually-long numbers are kept verbatim, // even if JSON parsers aren't required to guarantee such input-fidelity // for numbers dec.UseNumber() t, err := dec.Token() if err == io.EOF { return errors.New(`input has no JSON values`) } if err := checkToken(dec, t); err != nil { return err } _, err = dec.Token() if err == io.EOF { // input is over, so it's a success return nil } if err == nil { // a successful `read` is a failure, as it means there are // trailing JSON tokens return errors.New(`unexpected trailing data`) } // any other error, perhaps some invalid-JSON-syntax-type error return err } func checkToken(dec *json.Decoder, t json.Token) error { switch t := t.(type) { case json.Delim: switch t { case json.Delim('['): return checkArray(dec) case json.Delim('{'): return checkObject(dec) default: return errors.New(`unsupported JSON syntax ` + string(t)) } case nil, bool, float64, json.Number, string: return nil default: // return fmt.Errorf(`unsupported token type %T`, t) return errors.New(`invalid JSON token`) } } func checkArray(dec *json.Decoder) error { for { t, err := dec.Token() if err != nil { return err } if t == json.Delim(']') { return nil } if err := checkToken(dec, t); err != nil { return err } } } func checkObject(dec *json.Decoder) error { for { t, err := dec.Token() if err != nil { return err } if t == json.Delim('}') { return nil } if _, ok := t.(string); !ok { return errors.New(`expected a string for a key-value pair`) } t, err = dec.Token() if err == io.EOF || t == json.Delim('}') { return errors.New(`expected a value for a key-value pair`) } if err := checkToken(dec, t); err != nil { return err } } } const dessvInfo = ` dessv [filenames...] Turn Space(s)-Separated Values (SSV) into Tab-Separated Values (TSV), where both leading and trailing spaces from input lines are ignored. ` func dessvMain() { buffered := false args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(dessvInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } liveLines := !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { liveLines = false } } easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { return dessv(w, r, liveLines) }) } func dessv(w *bufio.Writer, r io.Reader, live bool) error { sc := bufio.NewScanner(r) sc.Buffer(nil, maxLineBufSize) handleRow := dessvHandleRowSSV numTabs := ^0 for i := 0; sc.Scan(); i++ { s := sc.Bytes() if i == 0 { if bytes.HasPrefix(s, []byte{0xef, 0xbb, 0xbf}) { s = s[3:] } for _, b := range s { if b == '\t' { handleRow = dessvHandleRowTSV break } } numTabs = handleRow(w, s, numTabs) } else { handleRow(w, s, numTabs) } if w.WriteByte('\n') != nil { return errNoMoreOutput } if !live { continue } if err := w.Flush(); err != nil { return errNoMoreOutput } } return sc.Err() } func dessvHandleRowSSV(w *bufio.Writer, s []byte, n int) int { for len(s) > 0 && s[0] == ' ' { s = s[1:] } for len(s) > 0 && s[len(s)-1] == ' ' { s = s[:len(s)-1] } got := 0 for got = 0; len(s) > 0; got++ { if got > 0 { w.WriteByte('\t') } i := bytes.IndexByte(s, ' ') if i < 0 { w.Write(s) s = nil n-- break } w.Write(s[:i]) s = s[i+1:] for len(s) > 0 && s[0] == ' ' { s = s[1:] } n-- } w.Write(s) writeTabs(w, n) return got } func dessvHandleRowTSV(w *bufio.Writer, s []byte, n int) int { got := 0 for _, b := range s { if b == '\t' { got++ } } w.Write(s) writeTabs(w, n-got) return got } const eraseInfo = ` erase [options...] [regexes...] Ignore/remove all occurrences of all regex matches along lines read from the standard input. The regular-expression mode used is "re2", which is a superset of the commonly-used "extended-mode". Regexes always avoid matching any ANSI-style sequences, to avoid messing those up. Each regex erases all its occurrences on the current line in the order given among the arguments, so regex-order matters. The options are, available both in single and double-dash versions -h show this help message -help show this help message -i match regexes case-insensitively -ins match regexes case-insensitively ` func eraseMain() { args := os.Args[1:] buffered := false insensitive := false out: for len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(eraseInfo[1:]) return case `-i`, `--i`, `-ins`, `--ins`: insensitive = true args = args[1:] default: break out } } if len(args) > 0 && args[0] == `--` { args = args[1:] } exprs := make([]*regexp.Regexp, 0, len(args)) for _, s := range args { var err error var exp *regexp.Regexp if insensitive { exp, err = regexp.Compile(`(?i)` + s) } else { exp, err = regexp.Compile(s) } if err != nil { os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\n") continue } exprs = append(exprs, exp) } // quit right away when given invalid regexes if len(exprs) < len(args) { os.Exit(1) } liveLines := !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { liveLines = false } } sc := bufio.NewScanner(os.Stdin) sc.Buffer(nil, maxLineBufSize) bw := bufio.NewWriterSize(os.Stdout, bufSize) defer bw.Flush() var src []byte var dst []byte for i := 0; sc.Scan(); i++ { s := sc.Bytes() if i == 0 && hasPrefixBOM(s) { s = s[3:] } src = append(src[:0], s...) for _, e := range exprs { dst = erase(dst[:0], src, e) src = append(src[:0], dst...) } bw.Write(dst) if err := bw.WriteByte('\n'); err != nil { return } if !liveLines { continue } if err := bw.Flush(); err != nil { return } } handleError(bw, sc.Err()) } func erase(dst []byte, src []byte, with *regexp.Regexp) []byte { for len(src) > 0 { i, j := indexEscapeSequence(src) if i < 0 { dst = eraseHandleChunk(dst, src, with) break } if j < 0 { j = len(src) } dst = eraseHandleChunk(dst, src[:i], with) dst = append(dst, src[i:j]...) src = src[j:] } return dst } func eraseHandleChunk(dst []byte, src []byte, with *regexp.Regexp) []byte { for len(src) > 0 { span := with.FindIndex(src) if span == nil { return append(dst, src...) } start := span[0] end := span[1] dst = append(dst, src[:start]...) // avoid infinite loops caused by empty regex matches if start == end && end < len(src) { dst = append(dst, src[end]) end++ } src = src[end:] } return dst } const fixlinesInfo = ` fixlines [options...] [filepaths...] This tool fixes lines in UTF-8 text, ignoring leading UTF-8 BOMs, trailing carriage-returns on all lines, and ensures no lines across inputs are accidentally joined, since all lines it outputs end with line-feeds, even when the original files don't. The only option available is to show this help message, using any of "-h", "--h", "-help", or "--help", without the quotes. ` func fixlinesMain() { buffered := false args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(fixlinesInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } liveLines := !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { liveLines = false } } easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { return fixLines(w, r, liveLines) }) } func fixLines(w *bufio.Writer, r io.Reader, live bool) error { sc := bufio.NewScanner(r) sc.Buffer(nil, maxLineBufSize) for i := 0; sc.Scan(); i++ { s := sc.Bytes() // ignore leading UTF-8 BOM on the first line if i == 0 && hasPrefixBOM(s) { s = s[3:] } // trim trailing spaces on the current line for len(s) > 0 && s[len(s)-1] == ' ' { s = s[:len(s)-1] } w.Write(s) if w.WriteByte('\n') != nil { return errNoMoreOutput } if !live { continue } if err := w.Flush(); err != nil { return errNoMoreOutput } } return sc.Err() } const himaInfo = ` hima [options...] [regexes...] HIlight MAtches ANSI-styles matching regular expressions along lines read from the standard input. The regular-expression mode used is "re2", which is a superset of the commonly-used "extended-mode". Regexes always avoid matching any ANSI-style sequences, to avoid messing those up. Also, multiple matches in a line never overlap: at each step along a line, the earliest-starting match among the regexes always wins, as the order regexes are given among the arguments never matters. The options are, available both in single and double-dash versions -h show this help message -help show this help message -i match regexes case-insensitively -ins match regexes case-insensitively ` const highlightStyle = "\x1b[7m" func himaMain() { buffered := false insensitive := false args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(himaInfo[1:]) return } } out: for len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] case `-i`, `--i`, `-ins`, `--ins`: insensitive = true args = args[1:] default: break out } } if len(args) > 0 && args[0] == `--` { args = args[1:] } exprs := make([]*regexp.Regexp, 0, len(args)) for _, s := range args { var err error var exp *regexp.Regexp if insensitive { exp, err = regexp.Compile(`(?i)` + s) } else { exp, err = regexp.Compile(s) } if err != nil { os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\n") continue } exprs = append(exprs, exp) } // quit right away when given invalid regexes if len(exprs) < len(args) { os.Exit(1) } liveLines := !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { liveLines = false } } sc := bufio.NewScanner(os.Stdin) sc.Buffer(nil, maxLineBufSize) bw := bufio.NewWriterSize(os.Stdout, bufSize) defer bw.Flush() for i := 0; sc.Scan(); i++ { s := sc.Bytes() if i == 0 && hasPrefixBOM(s) { s = s[3:] } for len(s) > 0 { i, j := indexEscapeSequence(s) if i < 0 { himaHandleChunk(bw, s, exprs) break } if j < 0 { j = len(s) } himaHandleChunk(bw, s[:i], exprs) bw.Write(s[i:j]) s = s[j:] } if err := bw.WriteByte('\n'); err != nil { return } if !liveLines { continue } if err := bw.Flush(); err != nil { return } } handleError(bw, sc.Err()) } // note: looking at the results of restoring ANSI-styles after style-resets // doesn't seem to be worth it, as a previous version used to do // himaHandleChunk handles line-slices around any detected ANSI-style sequences, // or even whole lines, when no ANSI-styles are found in them func himaHandleChunk(w *bufio.Writer, s []byte, with []*regexp.Regexp) { start := -1 end := -1 for len(s) > 0 { start = -1 for _, e := range with { span := e.FindIndex(s) // also ignore empty regex matches to avoid infinite outer loops, // as skipping empty slices isn't advancing at all, leaving the // string stuck to being empty-matched forever by the same regex if span == nil || span[0] == span[1] { continue } if span[0] < start || start < 0 { start = span[0] end = span[1] } } if start < 0 { w.Write(s) return } w.Write(s[:start]) w.WriteString(highlightStyle) w.Write(s[start:end]) w.WriteString("\x1b[0m") s = s[end:] } } const json0Info = ` json0 [options...] [file...] JSON-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output. Its output is always a single line, which ends with a line-feed. Besides minimizing bytes, this tool also adapts almost-JSON input into valid JSON, since it - ignores both rest-of-line and multi-line comments - ignores extra/trailing commas in arrays and objects - turns single-quoted strings/keys into double-quoted strings - double-quotes unquoted object keys - changes \x 2-hex-digit into \u 4-hex-digit string-escapes All options available can either start with a single or a double-dash -h show this help message -help show this help message -jsonl emit JSON Lines, when top-level value is an array ` const json0ChunkPeekSize = 16 func json0Main() { args := os.Args[1:] buffered := false handler := json0 out: for len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] continue case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(json0Info[1:]) return case `-jsonl`, `--jsonl`: handler = jsonl0 args = args[1:] continue default: break out } } if len(args) > 0 && args[0] == `--` { args = args[1:] } if len(args) > 1 { const msg = "multiple inputs aren't allowed\n" os.Stderr.WriteString(msg) os.Exit(1) } liveLines := !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { liveLines = false } } // figure out whether input should come from a named file or from stdin var paths [1]string paths[0] = `-` if len(args) > 0 { paths[0] = args[0] } easyboxRun(paths[:], func(w *bufio.Writer, r io.Reader, name string) error { br := bufio.NewReaderSize(r, bufSize) defer w.Flush() return handler(w, br, liveLines) }) } type handlerFunc func(w *bufio.Writer, r *bufio.Reader, live bool) error var ( errCommentEarlyEnd = errors.New(`unexpected early-end of comment`) errInputEarlyEnd = errors.New(`expected end of input data`) errInvalidComment = errors.New(`expected / or *`) errInvalidHex = errors.New(`expected a base-16 digit`) errInvalidRune = errors.New(`invalid UTF-8 bytes`) errInvalidToken = errors.New(`invalid JSON token`) errNoDigits = errors.New(`expected numeric digits`) errNoStringQuote = errors.New(`expected " or '`) errNoArrayComma = errors.New(`missing comma between array values`) errNoObjectComma = errors.New(`missing comma between key-value pairs`) errStringEarlyEnd = errors.New(`unexpected early-end of string`) errExtraBytes = errors.New(`unexpected extra input bytes`) ) // linePosError is a more descriptive kind of error, showing the source of // the input-related problem, as 1-based a line/pos number pair in front // of the error message type linePosError struct { // line is the 1-based line count from the input line int // pos is the 1-based `horizontal` position in its line pos int // err is the error message to `decorate` with the position info err error } // Error satisfies the error interface func (lpe linePosError) Error() string { where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos) return where + `: ` + lpe.err.Error() } // isIdentifier improves control-flow of func json0HandleKey, when it handles // unquoted object keys var isIdentifier = [256]bool{ '_': true, '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true, '8': true, '9': true, 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 'Y': true, 'Z': true, 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 'y': true, 'z': true, } // matchHex both figures out if a byte is a valid ASCII hex-digit, by not // being 0, and normalizes letter-case for the hex letters var matchHex = [256]byte{ '0': '0', '1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', 'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F', 'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F', } // json0 converts JSON/pseudo-JSON into (valid) minimal JSON; final boolean // value isn't used, and is just there to match the signature of func jsonl func json0(w *bufio.Writer, r *bufio.Reader, live bool) error { jr := jsonReader{r, 1, 1} defer w.Flush() if err := jr.handleLeadingJunk(); err != nil { return err } // handle a single top-level JSON value err := json0HandleValue(w, &jr) // end the only output-line with a line-feed; this also avoids showing // error messages on the same line as the main output, since JSON-0 // output has no line-feeds before its last byte outputByte(w, '\n') if err != nil { return err } return jr.handleTrailingJunk() } // jsonl0 converts JSON/pseudo-JSON into (valid) minimal JSON Lines; this func // avoids writing a trailing line-feed, leaving that up to its caller func jsonl0(w *bufio.Writer, r *bufio.Reader, live bool) error { jr := jsonReader{r, 1, 1} if err := jr.handleLeadingJunk(); err != nil { return err } chunk, err := jr.r.Peek(1) if err == nil && len(chunk) >= 1 { switch b := chunk[0]; b { case '[', '(': return json0HandleArrayJSONL(w, &jr, b, live) } } // handle a single top-level JSON value err = json0HandleValue(w, &jr) // end the only output-line with a line-feed; this also avoids showing // error messages on the same line as the main output, since JSON-0 // output has no line-feeds before its last byte outputByte(w, '\n') if err != nil { return err } return jr.handleTrailingJunk() } // json0HandleArrayJSONL handles top-level arrays for func jsonl0 func json0HandleArrayJSONL(w *bufio.Writer, jr *jsonReader, start byte, live bool) error { if err := jr.demandSyntax(start); err != nil { return err } var end byte = ']' if start == '(' { end = ')' } for n := 0; true; n++ { // there may be whitespace/comments before the next comma if err := jr.seekNext(); err != nil { return err } // handle commas between values, as well as trailing ones comma := false b, _ := jr.peekByte() if b == ',' { jr.readByte() comma = true // there may be whitespace/comments before an ending ']' if err := jr.seekNext(); err != nil { return err } b, _ = jr.peekByte() } // handle end of array if b == end { jr.readByte() if n > 0 { err := outputByte(w, '\n') if live { w.Flush() } return err } return nil } // turn commas between adjacent values into line-feeds, as the // output for this custom func is supposed to be JSON Lines if n > 0 { if !comma { return errNoArrayComma } if err := outputByte(w, '\n'); err != nil { return err } if live { w.Flush() } } // handle the next value if err := jr.seekNext(); err != nil { return err } if err := json0HandleValue(w, jr); err != nil { return err } } // make the compiler happy return nil } // jsonReader reads data via a buffer, keeping track of the input position: // this in turn allows showing much more useful errors, when these happen type jsonReader struct { // r is the actual reader r *bufio.Reader // line is the 1-based line-counter for input bytes, and gives errors // useful position info line int // pos is the 1-based `horizontal` position in its line, and gives // errors useful position info pos int } // improveError makes any error more useful, by giving it info about the // current input-position, as a 1-based line/within-line-position pair func (jr jsonReader) improveError(err error) error { if _, ok := err.(linePosError); ok { return err } if err == io.EOF { return linePosError{jr.line, jr.pos, errInputEarlyEnd} } if err != nil { return linePosError{jr.line, jr.pos, err} } return nil } func (jr *jsonReader) handleLeadingJunk() error { // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order // mark) gives no useful info if present, as UTF-8 leaves no ambiguity // about byte-order by design jr.skipUTF8BOM() // ignore leading whitespace and/or comments return jr.seekNext() } func (jr *jsonReader) handleTrailingJunk() error { // ignore trailing whitespace and/or comments if err := jr.seekNext(); err != nil { return err } // ignore trailing semicolons for { if b, ok := jr.peekByte(); !ok || b != ';' { break } jr.readByte() // ignore trailing whitespace and/or comments if err := jr.seekNext(); err != nil { return err } } // beyond trailing whitespace and/or comments, any more bytes // make the whole input data invalid JSON if _, ok := jr.peekByte(); ok { return jr.improveError(errExtraBytes) } return nil } // demandSyntax fails with an error when the next byte isn't the one given; // when it is, the byte is then read/skipped, and a nil error is returned func (jr *jsonReader) demandSyntax(syntax byte) error { chunk, err := jr.r.Peek(1) if err == io.EOF { return jr.improveError(errInputEarlyEnd) } if err != nil { return jr.improveError(err) } if len(chunk) < 1 || chunk[0] != syntax { msg := `expected ` + string(rune(syntax)) return jr.improveError(errors.New(msg)) } jr.readByte() return nil } // updatePosInfo does what it says, given the byte just read separately func (jr *jsonReader) updatePosInfo(r rune) { if r == '\n' { jr.line += 1 jr.pos = 1 } else { jr.pos++ } } // peekByte simplifies control-flow for various other funcs func (jr jsonReader) peekByte() (b byte, ok bool) { chunk, err := jr.r.Peek(1) if err == nil && len(chunk) >= 1 { return chunk[0], true } return 0, false } // readByte does what it says, updating the reader's position info func (jr *jsonReader) readByte() (b byte, err error) { b, err = jr.r.ReadByte() if err == nil { jr.updatePosInfo(rune(b)) return b, nil } return b, jr.improveError(err) } // readRune does what it says, updating the reader's position info func (jr *jsonReader) readRune() (r rune, err error) { r, _, err = jr.r.ReadRune() if err == nil { jr.updatePosInfo(r) return r, nil } return r, jr.improveError(err) } // seekNext skips/seeks the next token, ignoring runs of whitespace symbols // and comments, either single-line (starting with //) or general (starting // with /* and ending with */) func (jr *jsonReader) seekNext() error { for { b, ok := jr.peekByte() if !ok { return nil } // case ' ', '\t', '\f', '\v', '\r', '\n': if b <= 32 { // keep skipping whitespace bytes jr.readByte() continue } if b == '#' { if err := jr.skipLine(); err != nil { return err } continue } if b != '/' { // reached the next token return nil } if err := jr.skipComment(); err != nil { return err } // after comments, keep looking for more whitespace and/or comments } } // skipComment helps func seekNext skip over comments, simplifying the latter // func's control-flow func (jr *jsonReader) skipComment() error { err := jr.demandSyntax('/') if err != nil { return err } b, ok := jr.peekByte() if !ok { return nil } switch b { case '/': // handle single-line comments return jr.skipLine() case '*': // handle (potentially) multi-line comments return jr.skipGeneralComment() default: return jr.improveError(errInvalidComment) } } // skipLine handles single-line comments for func skipComment func (jr *jsonReader) skipLine() error { for { b, err := jr.readByte() if err == io.EOF { // end of input is fine in this case return nil } if err != nil { return err } if b == '\n' { return nil } } } // skipGeneralComment handles (potentially) multi-line comments for func // skipComment func (jr *jsonReader) skipGeneralComment() error { var prev byte for { b, err := jr.readByte() if err != nil { return jr.improveError(errCommentEarlyEnd) } if prev == '*' && b == '/' { return nil } if b == '\n' { jr.line++ } prev = b } } // skipUTF8BOM does what it says, if a UTF-8 BOM is present func (jr *jsonReader) skipUTF8BOM() { lead, err := jr.r.Peek(3) if err != nil { return } if len(lead) > 2 && lead[0] == 0xef && lead[1] == 0xbb && lead[2] == 0xbf { jr.readByte() jr.readByte() jr.readByte() jr.pos += 3 } } // outputByte is a small wrapper on func WriteByte, which adapts any error // into a custom dummy output-error, which is in turn meant to be ignored, // being just an excuse to quit the app immediately and successfully func outputByte(w *bufio.Writer, b byte) error { err := w.WriteByte(b) if err == nil { return nil } return errNoMoreOutput } func json0HandleArray(w *bufio.Writer, jr *jsonReader, start byte) error { if err := jr.demandSyntax(start); err != nil { return err } var end byte = ']' if start == '(' { end = ')' } w.WriteByte('[') for n := 0; true; n++ { // there may be whitespace/comments before the next comma if err := jr.seekNext(); err != nil { return err } // handle commas between values, as well as trailing ones comma := false b, _ := jr.peekByte() if b == ',' { jr.readByte() comma = true // there may be whitespace/comments before an ending ']' if err := jr.seekNext(); err != nil { return err } b, _ = jr.peekByte() } // handle end of array if b == end { jr.readByte() w.WriteByte(']') return nil } // don't forget commas between adjacent values if n > 0 { if !comma { return errNoArrayComma } if err := outputByte(w, ','); err != nil { return err } } // handle the next value if err := jr.seekNext(); err != nil { return err } if err := json0HandleValue(w, jr); err != nil { return err } } // make the compiler happy return nil } // json0HandleDigits helps various number-handling funcs do their job func json0HandleDigits(w *bufio.Writer, jr *jsonReader) error { if json0TrySimpleDigits(w, jr) { return nil } for n := 0; true; n++ { b, _ := jr.peekByte() // support `nice` long numbers by ignoring their underscores if b == '_' { jr.readByte() continue } if '0' <= b && b <= '9' { jr.readByte() w.WriteByte(b) continue } if n == 0 { return errNoDigits } return nil } // make the compiler happy return nil } // json0TrySimpleDigits tries to handle (more quickly) digit-runs where all // bytes are just digits: this is a very common case for numbers; returns // whether it succeeded, so this func's caller knows knows if it needs to do // anything, the slower way func json0TrySimpleDigits(w *bufio.Writer, jr *jsonReader) (gotIt bool) { chunk, _ := jr.r.Peek(json0ChunkPeekSize) for i, b := range chunk { if '0' <= b && b <= '9' { continue } if i == 0 || b == '_' { return false } // bulk-writing the chunk is this func's whole point w.Write(chunk[:i]) jr.r.Discard(i) jr.pos += i return true } // maybe the digits-run is ok, but it's just longer than the chunk return false } // json0HandleDot handles pseudo-JSON numbers which start with a decimal dot func json0HandleDot(w *bufio.Writer, jr *jsonReader) error { if err := jr.demandSyntax('.'); err != nil { return err } w.Write([]byte{'0', '.'}) return json0HandleDigits(w, jr) } // json0HandleKey is used by func json0HandleObjects and generalizes func // json0HandleString, by allowing unquoted object keys; it's not used anywhere // else, as allowing unquoted string values is ambiguous with the actual // JSON-keyword values null, false, and true. func json0HandleKey(w *bufio.Writer, jr *jsonReader) error { quote, ok := jr.peekByte() if !ok { return jr.improveError(errStringEarlyEnd) } if quote == '"' || quote == '\'' { return json0HandleString(w, jr, rune(quote)) } w.WriteByte('"') for { if b, _ := jr.peekByte(); isIdentifier[b] { jr.readByte() w.WriteByte(b) continue } w.WriteByte('"') return nil } } // json0TrySimpleString tries to handle (more quickly) inner-strings where all // bytes are unescaped ASCII symbols: this is a very common case for strings, // and is almost always the case for object keys; returns whether it succeeded, // so this func's caller knows knows if it needs to do anything, the slower way func json0TrySimpleString(w *bufio.Writer, jr *jsonReader, quote rune) (gotIt bool) { end := -1 chunk, _ := jr.r.Peek(json0ChunkPeekSize) for i, b := range chunk { if 32 <= b && b <= 127 && b != '\\' && b != '\'' && b != '"' { continue } if b == byte(quote) { end = i break } return false } if end < 0 { return false } // bulk-writing the chunk is this func's whole point w.WriteByte('"') w.Write(chunk) w.WriteByte('"') jr.r.Discard(end + 1) jr.pos += end + 1 return true } // json0HandleKeyword is used by funcs json0HandleFalse, json0HandleNull, and // json0HandleTrue func json0HandleKeyword(w *bufio.Writer, jr *jsonReader, kw []byte) error { for rest := kw; len(rest) > 0; rest = rest[1:] { b, err := jr.readByte() if err == nil && b == rest[0] { // keywords given to this func have no line-feeds jr.pos++ continue } msg := `expected JSON value ` + string(kw) return jr.improveError(errors.New(msg)) } w.Write(kw) return nil } // json0HandleNegative handles numbers starting with a negative sign for func // json0HandleValue func json0HandleNegative(w *bufio.Writer, jr *jsonReader) error { if err := jr.demandSyntax('-'); err != nil { return err } w.WriteByte('-') if b, _ := jr.peekByte(); b == '.' { jr.readByte() w.Write([]byte{'0', '.'}) return json0HandleDigits(w, jr) } return json0HandleNumber(w, jr) } // json0HandleNumber handles numeric values/tokens, including invalid-JSON // cases, such as values starting with a decimal dot func json0HandleNumber(w *bufio.Writer, jr *jsonReader) error { // handle integer digits if err := json0HandleDigits(w, jr); err != nil { return err } // handle optional decimal digits, starting with a leading dot if b, _ := jr.peekByte(); b == '.' { jr.readByte() w.WriteByte('.') return json0HandleDigits(w, jr) } // handle optional exponent digits if b, _ := jr.peekByte(); b == 'e' || b == 'E' { jr.readByte() w.WriteByte(b) b, _ = jr.peekByte() if b == '+' { jr.readByte() } else if b == '-' { w.WriteByte('-') jr.readByte() } return json0HandleDigits(w, jr) } return nil } func json0HandleObject(w *bufio.Writer, jr *jsonReader) error { if err := jr.demandSyntax('{'); err != nil { return err } w.WriteByte('{') for npairs := 0; true; npairs++ { // there may be whitespace/comments before the next comma if err := jr.seekNext(); err != nil { return err } // handle commas between key-value pairs, as well as trailing ones comma := false b, _ := jr.peekByte() if b == ',' { jr.readByte() comma = true // there may be whitespace/comments before an ending '}' if err := jr.seekNext(); err != nil { return err } b, _ = jr.peekByte() } // handle end of object if b == '}' { jr.readByte() w.WriteByte('}') return nil } // don't forget commas between adjacent key-value pairs if npairs > 0 { if !comma { return errNoObjectComma } if err := outputByte(w, ','); err != nil { return err } } // handle the next pair's key if err := jr.seekNext(); err != nil { return err } if err := json0HandleKey(w, jr); err != nil { return err } // demand a colon right after the key if err := jr.seekNext(); err != nil { return err } if err := jr.demandSyntax(':'); err != nil { return err } w.WriteByte(':') // handle the next pair's value if err := jr.seekNext(); err != nil { return err } if err := json0HandleValue(w, jr); err != nil { return err } } // make the compiler happy return nil } // json0HandlePositive handles numbers starting with a positive sign for func // json0HandleValue func json0HandlePositive(w *bufio.Writer, jr *jsonReader) error { if err := jr.demandSyntax('+'); err != nil { return err } // valid JSON isn't supposed to have leading pluses on numbers, so // emit nothing for it, unlike for negative numbers if b, _ := jr.peekByte(); b == '.' { jr.readByte() w.Write([]byte{'0', '.'}) return json0HandleDigits(w, jr) } return json0HandleNumber(w, jr) } // json0HandleString handles strings for func json0HandleValue, and supports // both single-quotes and double-quotes, always emitting the latter in the // output, of course func json0HandleString(w *bufio.Writer, jr *jsonReader, quote rune) error { if quote != '"' && quote != '\'' { return errNoStringQuote } jr.readByte() // try the quicker no-escapes ASCII handler if json0TrySimpleString(w, jr, quote) { return nil } // it's a non-trivial inner-string, so handle it byte-by-byte w.WriteByte('"') escaped := false for { r, err := jr.readRune() if r == unicode.ReplacementChar { return jr.improveError(errInvalidRune) } if err != nil { if err == io.EOF { return jr.improveError(errStringEarlyEnd) } return jr.improveError(err) } if !escaped { if r == '\\' { escaped = true continue } // handle end of string if r == quote { return outputByte(w, '"') } if r <= 127 { w.Write(escapedStringBytes[byte(r)]) } else { w.WriteRune(r) } continue } // handle escaped items escaped = false switch r { case 'u': // \u needs exactly 4 hex-digits to follow it w.Write([]byte{'\\', 'u'}) if err := json0CopyHex(w, 4, jr); err != nil { return jr.improveError(err) } case 'x': // JSON only supports 4 escaped hex-digits, so pad the 2 // expected hex-digits with 2 zeros w.Write([]byte{'\\', 'u', '0', '0'}) if err := json0CopyHex(w, 2, jr); err != nil { return jr.improveError(err) } case 't', 'f', 'r', 'n', 'b', '\\', '"': // handle valid-JSON escaped string sequences w.WriteByte('\\') w.WriteByte(byte(r)) case '\'': // escaped single-quotes aren't standard JSON, but they can // be handy when the input uses non-standard single-quoted // strings w.WriteByte('\'') default: if r <= 127 { w.Write(escapedStringBytes[byte(r)]) } else { w.WriteRune(r) } } } } // json0CopyHex handles a run of hex-digits for func json0HandleString, starting // right after the leading `\u` (or `\x`) part; this func doesn't `improve` // its errors with position info: that's up to the caller func json0CopyHex(w *bufio.Writer, n int, jr *jsonReader) error { for i := 0; i < n; i++ { b, err := jr.readByte() if err == io.EOF { return errStringEarlyEnd } if err != nil { return err } if b >= 128 { return errInvalidHex } if b := matchHex[b]; b != 0 { w.WriteByte(b) continue } return errInvalidHex } return nil } // json0HandleValue is a generic JSON-token handler, which allows the recursive // behavior to handle any kind of JSON/pseudo-JSON input func json0HandleValue(w *bufio.Writer, jr *jsonReader) error { chunk, err := jr.r.Peek(1) if err == nil && len(chunk) >= 1 { return json0HandleValueDispatch(w, jr, chunk[0]) } if err == io.EOF { return jr.improveError(errInputEarlyEnd) } return jr.improveError(errInputEarlyEnd) } // json0HandleValueDispatch simplifies control-flow for func json0HandleValue func json0HandleValueDispatch(w *bufio.Writer, jr *jsonReader, b byte) error { switch b { case 'f': return json0HandleKeyword(w, jr, []byte{'f', 'a', 'l', 's', 'e'}) case 'n': return json0HandleKeyword(w, jr, []byte{'n', 'u', 'l', 'l'}) case 't': return json0HandleKeyword(w, jr, []byte{'t', 'r', 'u', 'e'}) case 'F': return json0HandleKeyword(w, jr, []byte{'F', 'a', 'l', 's', 'e'}) case 'N': return json0HandleKeyword(w, jr, []byte{'N', 'o', 'n', 'e'}) case 'T': return json0HandleKeyword(w, jr, []byte{'T', 'r', 'u', 'e'}) case '.': return json0HandleDot(w, jr) case '+': return json0HandlePositive(w, jr) case '-': return json0HandleNegative(w, jr) case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return json0HandleNumber(w, jr) case '\'', '"': return json0HandleString(w, jr, rune(b)) case '[', '(': return json0HandleArray(w, jr, b) case '{': return json0HandleObject(w, jr) default: return jr.improveError(errInvalidToken) } } // escapedStringBytes helps func json0HandleString treat string bytes quickly // and correctly, using their officially-supported JSON escape sequences // // https://www.rfc-editor.org/rfc/rfc8259#section-7 var escapedStringBytes = [256][]byte{ {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, {'\\', 'b'}, {'\\', 't'}, {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, {'\\', 'f'}, {'\\', 'r'}, {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, } const json2Info = ` json2 [filepath...] JSON-2 indents valid JSON input into multi-line JSON which uses 2 spaces for each indentation level. ` func json2Main() { args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(himaInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } if len(args) > 1 { handleError(nil, errors.New(`multiple inputs not allowed`)) return } // figure out whether input should come from a named file or from stdin var paths [1]string paths[0] = `-` if len(args) > 0 { paths[0] = args[0] } easyboxRun(paths[:], func(w *bufio.Writer, r io.Reader, name string) error { return json2(w, r) }) } func json2(w *bufio.Writer, r io.Reader) error { dec := json.NewDecoder(r) // avoid parsing numbers, so unusually-long numbers are kept verbatim, // even if JSON parsers aren't required to guarantee such input-fidelity // for numbers dec.UseNumber() t, err := dec.Token() if err == io.EOF { return errors.New(`input has no JSON values`) } if err = json2HandleToken(w, dec, t, 0, 0); err != nil { return err } // don't forget ending the last line for the last value w.WriteByte('\n') _, err = dec.Token() if err == io.EOF { // input is over, so it's a success return nil } if err == nil { // a successful `read` is a failure, as it means there are // trailing JSON tokens return errors.New(`unexpected trailing data`) } // any other error, perhaps some invalid-JSON-syntax-type error return err } // json2HandleToken handles recursion for func json2 func json2HandleToken(w *bufio.Writer, dec *json.Decoder, t json.Token, pre, level int) error { switch t := t.(type) { case json.Delim: switch t { case json.Delim('['): return json2HandleArray(w, dec, pre, level) case json.Delim('{'): return json2HandleObject(w, dec, pre, level) default: return errors.New(`unsupported JSON syntax ` + string(t)) } case nil: writeSpaces(w, 2*pre) w.WriteString(`null`) return nil case bool: writeSpaces(w, 2*pre) if t { w.WriteString(`true`) } else { w.WriteString(`false`) } return nil case json.Number: writeSpaces(w, 2*pre) w.WriteString(t.String()) return nil case string: return json2HandleString(w, t, pre) default: // return fmt.Errorf(`unsupported token type %T`, t) return errors.New(`invalid JSON token`) } } func json2HandleArray(w *bufio.Writer, dec *json.Decoder, pre, level int) error { for i := 0; true; i++ { t, err := dec.Token() if err != nil { return err } if t == json.Delim(']') { if i == 0 { writeSpaces(w, 2*pre) w.WriteByte('[') w.WriteByte(']') } else { w.WriteByte('\n') writeSpaces(w, 2*level) w.WriteByte(']') } return nil } if i == 0 { writeSpaces(w, 2*pre) w.WriteByte('[') w.WriteByte('\n') } else { w.WriteByte(',') w.WriteByte('\n') if err := w.Flush(); err != nil { // a write error may be the consequence of stdout being closed, // perhaps by another app along a pipe return errNoMoreOutput } } err = json2HandleToken(w, dec, t, level+1, level+1) if err != nil { return err } } // make the compiler happy return nil } func json2HandleObject(w *bufio.Writer, dec *json.Decoder, pre, level int) error { for i := 0; true; i++ { t, err := dec.Token() if err != nil { return err } if t == json.Delim('}') { if i == 0 { writeSpaces(w, 2*pre) w.WriteByte('{') w.WriteByte('}') } else { w.WriteByte('\n') writeSpaces(w, 2*level) w.WriteByte('}') } return nil } if i == 0 { writeSpaces(w, 2*pre) w.WriteByte('{') w.WriteByte('\n') } else { w.WriteByte(',') w.WriteByte('\n') if err := w.Flush(); err != nil { // a write error may be the consequence of stdout being closed, // perhaps by another app along a pipe return errNoMoreOutput } } k, ok := t.(string) if !ok { return errors.New(`expected a string for a key-value pair`) } err = json2HandleString(w, k, level+1) if err != nil { return err } w.WriteString(": ") t, err = dec.Token() if err == io.EOF { return errors.New(`expected a value for a key-value pair`) } err = json2HandleToken(w, dec, t, 0, level+1) if err != nil { return err } } // make the compiler happy return nil } func json2HandleString(w *bufio.Writer, s string, level int) error { writeSpaces(w, 2*level) w.WriteByte('"') for i := range s { w.Write(escapedStringBytes[s[i]]) } w.WriteByte('"') return nil } const jsonlInfo = ` jsonl [options...] [filepath...] JSON Lines turns valid JSON-input arrays into separate JSON lines, one for each top-level item. Non-arrays result in a single JSON-line. When not given a filepath to load, standard input is used instead. Every output line is always a single top-level item from the input. ` func jsonlMain() { buffered := false args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(plainInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } liveLines := !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { liveLines = false } } easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { return jsonl(w, r, liveLines) }) } func jsonl(w *bufio.Writer, r io.Reader, live bool) error { dec := json.NewDecoder(r) // avoid parsing numbers, so unusually-long numbers are kept verbatim, // even if JSON parsers aren't required to guarantee such input-fidelity // for numbers dec.UseNumber() t, err := dec.Token() if err == io.EOF { // return errors.New(`input has no JSON values`) return nil } if t == json.Delim('[') { if err := jsonlHandleTopLevelArray(w, dec, live); err != nil { return err } } else { if err := jsonlHandleToken(w, dec, t); err != nil { return err } w.WriteByte('\n') if live { w.Flush() } } _, err = dec.Token() if err == io.EOF { // input is over, so it's a success return nil } if err == nil { // a successful `read` is a failure, as it means there are // trailing JSON tokens return errors.New(`unexpected trailing data`) } // any other error, perhaps some invalid-JSON-syntax-type error return err } func jsonlHandleToken(w *bufio.Writer, dec *json.Decoder, t json.Token) error { switch t := t.(type) { case json.Delim: switch t { case json.Delim('['): return jsonlHandleArray(w, dec) case json.Delim('{'): return jsonlHandleObject(w, dec) default: return errors.New(`unsupported JSON syntax ` + string(t)) } case nil: w.WriteString(`null`) return nil case bool: if t { w.WriteString(`true`) } else { w.WriteString(`false`) } return nil case json.Number: w.WriteString(t.String()) return nil case string: return jsonlHandleString(w, t) default: // return fmt.Errorf(`unsupported token type %T`, t) return errors.New(`invalid JSON token`) } } func jsonlHandleTopLevelArray(w *bufio.Writer, dec *json.Decoder, live bool) error { for i := 0; true; i++ { t, err := dec.Token() if err == io.EOF { return nil } if err != nil { return err } if t == json.Delim(']') { return nil } err = jsonlHandleToken(w, dec, t) if err != nil { return err } w.WriteByte('\n') if !live { continue } if err := w.Flush(); err != nil { // a write error may be the consequence of stdout being closed, // perhaps by another app along a pipe return errNoMoreOutput } } // make the compiler happy return nil } func jsonlHandleArray(w *bufio.Writer, dec *json.Decoder) error { w.WriteByte('[') for i := 0; true; i++ { t, err := dec.Token() if err == io.EOF { w.WriteByte(']') return nil } if err != nil { return err } if t == json.Delim(']') { w.WriteByte(']') return nil } if i > 0 { _, err := w.WriteString(", ") if err != nil { return errNoMoreOutput } } err = jsonlHandleToken(w, dec, t) if err != nil { return err } } // make the compiler happy return nil } func jsonlHandleObject(w *bufio.Writer, dec *json.Decoder) error { w.WriteByte('{') for i := 0; true; i++ { t, err := dec.Token() if err == io.EOF { w.WriteByte('}') return nil } if err != nil { return err } if t == json.Delim('}') { w.WriteByte('}') return nil } if i > 0 { _, err := w.WriteString(", ") if err != nil { return errNoMoreOutput } } k, ok := t.(string) if !ok { return errors.New(`expected a string for a key-value pair`) } err = jsonlHandleString(w, k) if err != nil { return err } w.WriteString(": ") t, err = dec.Token() if err == io.EOF { return errors.New(`expected a value for a key-value pair`) } err = jsonlHandleToken(w, dec, t) if err != nil { return err } } // make the compiler happy return nil } func jsonlHandleString(w *bufio.Writer, s string) error { w.WriteByte('"') for i := range s { w.Write(escapedStringBytes[s[i]]) } w.WriteByte('"') return nil } const jsonsInfo = ` jsons [options...] [filenames...] JSON Strings turns TSV (tab-separated values) data into a JSON array of objects whose values are strings or nulls, the latter being used for missing trailing values. ` type jsonsRunConfig struct { lines int keys []string live bool } func jsonsMain() { buffered := false args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(himaInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } var cfg jsonsRunConfig cfg.live = !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { cfg.live = false } } easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { return jsons(w, r, &cfg) }) if cfg.lines < 2 { os.Stdout.WriteString("[]\n") } else { os.Stdout.WriteString("\n]\n") } } func jsons(w *bufio.Writer, r io.Reader, cfg *jsonsRunConfig) error { sc := bufio.NewScanner(r) sc.Buffer(nil, maxLineBufSize) for i := 0; sc.Scan(); i++ { s := sc.Text() if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") { s = s[3:] } if cfg.lines == 0 { cfg.keys = jsonsEscapeKeys(s) cfg.lines++ continue } if cfg.lines == 1 { w.WriteString("[\n ") } else { if _, err := w.WriteString(",\n "); err != nil { return errNoMoreOutput } } jsonsEmitRow(w, s, cfg.keys) cfg.lines++ if !cfg.live { continue } if err := w.Flush(); err != nil { return errNoMoreOutput } } return sc.Err() } func jsonsEmitRow(w *bufio.Writer, line string, keys []string) { j := 0 w.WriteByte('{') loopTSV(line, func(i int, s string) { j = i if i > 0 { w.WriteString(", ") } w.WriteString(keys[i]) w.WriteString(": \"") for _, r := range s { if r == '\\' || r == '"' { w.WriteByte('\\') } w.WriteRune(r) } w.WriteByte('"') }) for i := j + 1; i < len(keys); i++ { if i > 0 { w.WriteString(", ") } w.WriteString(keys[i]) w.WriteString(": null") } w.WriteByte('}') } func jsonsEscapeKeys(line string) []string { var keys []string var sb strings.Builder loopTSV(line, func(i int, s string) { sb.WriteByte('"') for _, r := range s { if r == '\\' || r == '"' { sb.WriteByte('\\') } sb.WriteRune(r) } sb.WriteByte('"') keys = append(keys, sb.String()) sb.Reset() }) return keys } const matchInfo = ` match [options...] [regular expressions...] Only keep lines which match any of the extended-mode regular expressions given. When not given any regex, match non-empty lines by default. The options are, available both in single and double-dash versions -h, -help show this help message -i, -ins match regexes case-insensitively -l, -links add a regex to match HTTP/HTTPS links case-insensitively ` func matchMain() { nerr := 0 buffered := false sensitive := true args := os.Args[1:] out: for len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] case `-i`, `--i`, `-ins`, `--ins`: sensitive = false args = args[1:] case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(matchInfo[1:]) return default: break out } } if len(args) > 0 && args[0] == `--` { args = args[1:] } liveLines := !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { liveLines = false } } if len(args) == 0 { args = []string{`.`} } exprs := make([]*regexp.Regexp, 0, len(args)) for _, src := range args { var err error var exp *regexp.Regexp if !sensitive { exp, err = regexp.Compile(`(?i)` + src) } else { exp, err = regexp.Compile(src) } if err != nil { os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\n") nerr++ } exprs = append(exprs, exp) } if nerr > 0 { os.Exit(1) } sc := bufio.NewScanner(os.Stdin) sc.Buffer(nil, maxLineBufSize) bw := bufio.NewWriter(os.Stdout) defer bw.Flush() for i := 0; sc.Scan(); i++ { s := sc.Bytes() if i == 0 && hasPrefixBOM(s) { s = s[3:] } if match(s, exprs) { bw.Write(s) bw.WriteByte('\n') if !liveLines { continue } if err := bw.Flush(); err != nil { return } } } } const ncolInfo = ` ncol [options...] [filenames...] Nice COLumns realigns and styles data tables using ANSI color sequences. In particular, all auto-detected numbers are styled so they're easier to read at a glance. Input tables can be either lines of space-separated values or tab-separated values, and are auto-detected using the first non-empty line. When not given filepaths to read data from, this tool reads from standard input by default. The options are, available both in single and double-dash versions -h show this help message -help show this help message -no-sums avoid showing a final row with column sums -unsummed avoid showing a final row with column sums -no-tiles avoid showing color-coded tiles at the start of lines -untiled avoid showing color-coded tiles at the start of lines ` const columnGap = 2 // altDigitStyle is used to make 4+ digit-runs easier to read const altDigitStyle = "\x1b[38;2;168;168;168m" func ncolMain() { sums := true tiles := true args := os.Args[1:] out: for len(args) > 0 { switch args[0] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(ncolInfo[1:]) return case `-no-sums`, `--no-sums`, `-no-totals`, `--no-totals`, `-unsummed`, `--unsummed`, `-untotaled`, `--untotaled`, `-untotalled`, `--untotalled`: sums = false args = args[1:] continue case `-no-tiles`, `--no-tiles`, `-untiled`, `--untiled`: tiles = false args = args[1:] continue default: break out } } if len(args) > 0 && args[0] == `--` { args = args[1:] } var res table res.ShowTiles = tiles res.ShowSums = sums if err := ncolRun(args, &res); err != nil { os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\n") os.Exit(1) } } // table has all summary info gathered from the data, along with the row // themselves, stored as lines/strings type table struct { Columns int Rows []string MaxWidth []int MaxDotDecimals []int Numeric []int Sums []float64 LoopItems func(line string, items int, t *table, f itemFunc) int sb strings.Builder ShowTiles bool ShowSums bool } type itemFunc func(i int, s string, t *table) func ncolRun(paths []string, res *table) error { for _, p := range paths { if err := ncolHandleFile(res, p); err != nil { return err } } if len(paths) == 0 { if err := ncolHandleReader(res, os.Stdin); err != nil { return err } } bw := bufio.NewWriterSize(os.Stdout, bufSize) defer bw.Flush() ncol(bw, res) return nil } func ncolHandleFile(res *table, path string) error { f, err := os.Open(path) if err != nil { // on windows, file-not-found error messages may mention `CreateFile`, // even when trying to open files in read-only mode return errors.New(`can't open file named ` + path) } defer f.Close() return ncolHandleReader(res, f) } func ncolHandleReader(t *table, r io.Reader) error { sc := bufio.NewScanner(r) sc.Buffer(nil, maxLineBufSize) for i := 0; sc.Scan(); i++ { s := sc.Text() if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") { s = s[3:] } if len(s) == 0 { continue } t.Rows = append(t.Rows, s) if t.Columns == 0 { if t.LoopItems == nil { if strings.IndexByte(s, '\t') >= 0 { t.LoopItems = loopItemsTSV } else { t.LoopItems = loopItemsSSV } } const maxInt = int(^uint(0) >> 1) t.Columns = t.LoopItems(s, maxInt, t, doNothing) } t.LoopItems(s, t.Columns, t, ncolUpdateItem) } return sc.Err() } // doNothing is given to LoopItems to count items, while doing nothing else func doNothing(i int, s string, t *table) { } func ncolUpdateItem(i int, s string, t *table) { // ensure column-info-slices have enough room if i >= len(t.MaxWidth) { t.MaxWidth = append(t.MaxWidth, 0) t.MaxDotDecimals = append(t.MaxDotDecimals, 0) t.Numeric = append(t.Numeric, 0) t.Sums = append(t.Sums, 0) } // keep track of widest rune-counts for each column w := countWidth(s) if t.MaxWidth[i] < w { t.MaxWidth[i] = w } // update stats for numeric items if checkNumeric(s, &(t.sb)) { dd := countDotDecimals(s) if t.MaxDotDecimals[i] < dd { t.MaxDotDecimals[i] = dd } t.Numeric[i]++ f, _ := strconv.ParseFloat(t.sb.String(), 64) t.Sums[i] += f } } // loopItemsSSV loops over a line's items, allocation-free style; when given // empty strings, the callback func is never called func loopItemsSSV(s string, max int, t *table, f itemFunc) int { i := 0 s = trimTrailingSpaces(s) for { s = trimLeadingSpaces(s) if len(s) == 0 { return i } if i+1 == max { f(i, s, t) return i + 1 } j := strings.IndexByte(s, ' ') if j < 0 { f(i, s, t) return i + 1 } f(i, s[:j], t) s = s[j+1:] i++ } } // loopItemsTSV loops over a line's tab-separated items, allocation-free style; // when given empty strings, the callback func is never called func loopItemsTSV(s string, max int, t *table, f itemFunc) int { if len(s) == 0 { return 0 } i := 0 for { if i+1 == max { f(i, s, t) return i + 1 } j := strings.IndexByte(s, '\t') if j < 0 { f(i, s, t) return i + 1 } f(i, s[:j], t) s = s[j+1:] i++ } } // checkNumeric checks if a string is valid/useable as a number func checkNumeric(s string, sb *strings.Builder) bool { if len(s) == 0 { return false } sb.Reset() s = skipLeadingEscapeSequences(s) if len(s) > 0 && (s[0] == '+' || s[0] == '-') { sb.WriteByte(s[0]) s = s[1:] } s = skipLeadingEscapeSequences(s) if len(s) == 0 { return false } if b := s[0]; b == '.' { sb.WriteByte(b) return checkDigits(s[1:], sb) } digits := 0 for { s = skipLeadingEscapeSequences(s) if len(s) == 0 { break } b := s[0] sb.WriteByte(b) if b == '.' { return checkDigits(s[1:], sb) } if !('0' <= b && b <= '9') { return false } digits++ s = s[1:] } s = skipLeadingEscapeSequences(s) return len(s) == 0 && digits > 0 } func checkDigits(s string, sb *strings.Builder) bool { if len(s) == 0 { return false } digits := 0 for { s = skipLeadingEscapeSequences(s) if len(s) == 0 { break } if b := s[0]; '0' <= b && b <= '9' { sb.WriteByte(b) s = s[1:] digits++ } else { return false } } s = skipLeadingEscapeSequences(s) return len(s) == 0 && digits > 0 } func ncol(w *bufio.Writer, t *table) { // make sums row first, as final alignments are usually affected by these var sums []string if t.ShowSums { sums = make([]string, 0, t.Columns) } for i := 0; i < t.Columns && t.ShowSums; i++ { s := `-` width := 1 if t.Numeric[i] > 0 { decs := t.MaxDotDecimals[i] if decs > 0 { decs-- } var buf [64]byte s = string(strconv.AppendFloat(buf[:0], t.Sums[i], 'f', decs, 64)) width = len(s) } if t.MaxWidth[i] < width { t.MaxWidth[i] = width } sums = append(sums, s) } due := 0 showItem := func(i int, s string, t *table) { if i > 0 { due += columnGap } if checkNumeric(s, &(t.sb)) { dd := countDotDecimals(s) rpad := t.MaxDotDecimals[i] - dd width := countWidth(s) lpad := t.MaxWidth[i] - (width + rpad) + due writeSpaces(w, lpad) f, _ := strconv.ParseFloat(t.sb.String(), 64) writeNumericItem(w, s, numericStyle(f)) due = rpad return } writeSpaces(w, due) w.WriteString(s) due = t.MaxWidth[i] - countWidth(s) } writeTile := func(i int, s string, t *table) { if len(s) == 0 { w.WriteString("\x1b[0m○") return } if checkNumeric(s, &(t.sb)) { f, _ := strconv.ParseFloat(t.sb.String(), 64) w.WriteString(numericStyle(f)) w.WriteString("■") return } if s[0] == ' ' || s[len(s)-1] == ' ' { w.WriteString("\x1b[38;2;196;160;0m■") return } w.WriteString("\x1b[38;2;128;128;128m■") } // show realigned rows for _, line := range t.Rows { due = 0 if t.ShowTiles { end := t.LoopItems(line, t.Columns, t, writeTile) if end < len(t.MaxWidth)-1 { w.WriteString("\x1b[0m") } for i := end; i < len(t.MaxWidth); i++ { w.WriteString("×") } w.WriteString("\x1b[0m") due += columnGap } t.LoopItems(line, t.Columns, t, showItem) if w.WriteByte('\n') != nil { return } } if t.Columns > 0 && t.ShowSums { realignSums(w, t, sums) } } func realignSums(w *bufio.Writer, t *table, sums []string) { due := 0 if t.ShowTiles { due += t.Columns + columnGap } for i, s := range sums { if i > 0 { due += columnGap } if t.Numeric[i] == 0 { writeSpaces(w, due) w.WriteString(s) due = t.MaxWidth[i] - countWidth(s) continue } lpad := t.MaxWidth[i] - len(s) + due writeSpaces(w, lpad) writeNumericItem(w, s, numericStyle(t.Sums[i])) due = 0 } w.WriteByte('\n') } func writeRowTiles(w *bufio.Writer, s string, t *table, writeTile itemFunc) { end := t.LoopItems(s, t.Columns, t, writeTile) if end < len(t.MaxWidth)-1 { w.WriteString("\x1b[0m") } for i := end + 1; i < len(t.MaxWidth); i++ { w.WriteString("×") } w.WriteString("\x1b[0m") } func numericStyle(f float64) string { if f > 0 { if float64(int64(f)) == f { return "\x1b[38;2;0;135;0m" } return "\x1b[38;2;0;155;95m" } if f < 0 { if float64(int64(f)) == f { return "\x1b[38;2;204;0;0m" } return "\x1b[38;2;215;95;95m" } if f == 0 { return "\x1b[38;2;0;95;215m" } return "\x1b[38;2;128;128;128m" } func writeNumericItem(w *bufio.Writer, s string, startStyle string) { w.WriteString(startStyle) if len(s) > 0 && (s[0] == '-' || s[0] == '+') { w.WriteByte(s[0]) s = s[1:] } dot := strings.IndexByte(s, '.') if dot < 0 { restyleDigits(w, s, altDigitStyle) w.WriteString("\x1b[0m") return } if len(s[:dot]) > 3 { restyleDigits(w, s[:dot], altDigitStyle) w.WriteString("\x1b[0m") w.WriteString(startStyle) w.WriteByte('.') } else { w.WriteString(s[:dot]) w.WriteByte('.') } rest := s[dot+1:] restyleDigits(w, rest, altDigitStyle) if len(rest) < 4 { w.WriteString("\x1b[0m") } } // restyleDigits renders a run of digits as alternating styled/unstyled runs // of 3 digits, which greatly improves readability, and is the only purpose // of this app; string is assumed to be all decimal digits func restyleDigits(w *bufio.Writer, digits string, altStyle string) { if len(digits) < 4 { // digit sequence is short, so emit it as is w.WriteString(digits) return } // separate leading 0..2 digits which don't align with the 3-digit groups i := len(digits) % 3 // emit leading digits unstyled, if there are any w.WriteString(digits[:i]) // the rest is guaranteed to have a length which is a multiple of 3 digits = digits[i:] // start by styling, unless there were no leading digits style := i != 0 for len(digits) > 0 { if style { w.WriteString(altStyle) w.WriteString(digits[:3]) w.WriteString("\x1b[0m") } else { w.WriteString(digits[:3]) } // advance to the next triple: the start of this func is supposed // to guarantee this step always works digits = digits[3:] // alternate between styled and unstyled 3-digit groups style = !style } } const njsonInfo = ` njson [filepath...] Nice JSON shows JSON data as ANSI-styled indented lines, using 2 spaces for each indentation level. ` // indent is how many spaces each indentation level uses const njsonIndent = 2 const ( // njsonBoolStyle is bluish, and very distinct from all other colors used njsonBoolStyle = "\x1b[38;2;95;175;215m" // njsonKeyStyle is magenta, and very distinct from normal strings njsonKeyStyle = "\x1b[38;2;135;95;255m" // njsonNullStyle is a light-gray, just like syntax elements, but the word // `null` is wide enough to stand out from syntax items at a glance njsonNullStyle = njsonSyntaxStyle // njsonPositiveNumberStyle is a nice green njsonPositiveNumberStyle = "\x1b[38;2;0;135;95m" // njsonNegativeNumberStyle is a nice red njsonNegativeNumberStyle = "\x1b[38;2;204;0;0m" // njsonZeroNumberStyle is a nice blue njsonZeroNumberStyle = "\x1b[38;2;0;95;215m" // njsonStringStyle used to be bluish, but it's better to keep it plain, // which also minimizes how many different colors the output can show njsonStringStyle = `` // njsonSyntaxStyle is a light-gray, not too light, not too dark njsonSyntaxStyle = "\x1b[38;2;168;168;168m" ) func njsonMain() { args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(himaInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } if len(args) > 1 { handleError(nil, errors.New(`multiple inputs not allowed`)) return } // figure out whether input should come from a named file or from stdin var paths [1]string paths[0] = `-` if len(args) > 0 { paths[0] = args[0] } easyboxRun(paths[:], func(w *bufio.Writer, r io.Reader, name string) error { return njson(w, r) }) } func njson(w *bufio.Writer, r io.Reader) error { dec := json.NewDecoder(r) // using string-like json.Number values instead of float64 ones avoids // unneeded reformatting of numbers; reformatting parsed float64 values // can potentially even drop/change decimals, causing the output not to // match the input digits exactly, which is best to avoid dec.UseNumber() t, err := dec.Token() if err == io.EOF { return errors.New(`empty input isn't valid JSON`) } if err != nil { return err } if err := njsonHandleToken(w, dec, t, 0, 0); err != nil { return err } // don't forget to end the last output line w.WriteByte('\n') if _, err := dec.Token(); err != io.EOF { return errors.New(`unexpected trailing JSON data`) } return nil } func njsonHandleToken(w *bufio.Writer, d *json.Decoder, t json.Token, pre, level int) error { switch t := t.(type) { case json.Delim: switch t { case json.Delim('['): return njsonHandleArray(w, d, pre, level) case json.Delim('{'): return njsonHandleObject(w, d, pre, level) default: // return fmt.Errorf(`unsupported JSON delimiter %v`, t) return errors.New(`unsupported JSON delimiter`) } case nil: writeSpaces(w, njsonIndent*pre) w.WriteString(njsonNullStyle + "null\x1b[0m") return nil case bool: writeSpaces(w, njsonIndent*pre) if t { w.WriteString(njsonBoolStyle + "true\x1b[0m") } else { w.WriteString(njsonBoolStyle + "false\x1b[0m") } return nil case string: return njsonHandleString(w, t, pre) case json.Number: return njsonHandleNumber(w, t, pre) default: // return fmt.Errorf(`unsupported token type %T`, t) return errors.New(`unsupported token type`) } } func njsonHandleArray(w *bufio.Writer, d *json.Decoder, pre, level int) error { for i := 0; true; i++ { t, err := d.Token() if err != nil { return err } if t == json.Delim(']') { if i == 0 { writeSpaces(w, njsonIndent*pre) w.WriteString(njsonSyntaxStyle + "[]\x1b[0m") } else { w.WriteString("\n") writeSpaces(w, njsonIndent*level) w.WriteString(njsonSyntaxStyle + "]\x1b[0m") } return nil } if i == 0 { writeSpaces(w, njsonIndent*pre) w.WriteString(njsonSyntaxStyle + "[\x1b[0m\n") } else { // this is a good spot to check for early-quit opportunities w.WriteString(njsonSyntaxStyle + ",\x1b[0m\n") if err := w.Flush(); err != nil { // a write error may be the consequence of stdout being closed, // perhaps by another app along a pipe return errNoMoreOutput } } if err := njsonHandleToken(w, d, t, level+1, level+1); err != nil { return err } } // make the compiler happy return nil } func njsonHandleKey(w *bufio.Writer, s string, pre int) error { writeSpaces(w, njsonIndent*pre) w.WriteString(njsonSyntaxStyle + "\"\x1b[0m" + njsonKeyStyle) w.WriteString(s) w.WriteString(njsonSyntaxStyle + "\":\x1b[0m ") return nil } func njsonHandleNumber(w *bufio.Writer, n json.Number, pre int) error { writeSpaces(w, njsonIndent*pre) f, _ := n.Float64() if f > 0 { w.WriteString(njsonPositiveNumberStyle) } else if f < 0 { w.WriteString(njsonNegativeNumberStyle) } else { w.WriteString(njsonZeroNumberStyle) } w.WriteString(n.String()) w.WriteString("\x1b[0m") return nil } func njsonHandleObject(w *bufio.Writer, d *json.Decoder, pre, level int) error { for i := 0; true; i++ { t, err := d.Token() if err != nil { return err } if t == json.Delim('}') { if i == 0 { writeSpaces(w, njsonIndent*pre) w.WriteString(njsonSyntaxStyle + "{}\x1b[0m") } else { w.WriteString("\n") writeSpaces(w, njsonIndent*level) w.WriteString(njsonSyntaxStyle + "}\x1b[0m") } return nil } if i == 0 { writeSpaces(w, njsonIndent*pre) w.WriteString(njsonSyntaxStyle + "{\x1b[0m\n") } else { // this is a good spot to check for early-quit opportunities w.WriteString(njsonSyntaxStyle + ",\x1b[0m\n") if err := w.Flush(); err != nil { // a write error may be the consequence of stdout being closed, // perhaps by another app along a pipe return errNoMoreOutput } } // the stdlib's JSON parser is supposed to complain about non-string // keys anyway, but make sure just in case k, ok := t.(string) if !ok { return errors.New(`expected key to be a string`) } if err := njsonHandleKey(w, k, level+1); err != nil { return err } // handle value t, err = d.Token() if err != nil { return err } if err := njsonHandleToken(w, d, t, 0, level+1); err != nil { return err } } // make the compiler happy return nil } func njsonHandleString(w *bufio.Writer, s string, pre int) error { writeSpaces(w, njsonIndent*pre) w.WriteString(njsonSyntaxStyle + "\"\x1b[0m" + njsonStringStyle) for i := range s { w.Write(escapedStringBytes[s[i]]) } w.WriteString(njsonSyntaxStyle + "\"\x1b[0m") return nil } const nnInfo = ` nn [options...] [file...] Nice Numbers is an app which renders the UTF-8 text it's given to make long numbers much easier to read. It does so by alternating 3-digit groups which are colored using ANSI-codes with plain/unstyled 3-digit groups. Unlike the common practice of inserting commas between 3-digit groups, this trick doesn't widen the original text, keeping alignments across lines the same. Input is assumed to be UTF-8, and all CRLF byte-pairs are turned into line feeds. All (optional) leading options start with either single or double-dash, and most of them change the style/color used. Some of the options are, shown in their single-dash form: -h show this help message -help show this help message -b use a blue color -blue use a blue color -bold bold-style digits -g use a green color -gray use a gray color (default) -green use a green color -hi use a highlighting/inverse style -m use a magenta color -magenta use a magenta color -o use an orange color -orange use an orange color -r use a red color -red use a red color -u underline digits -underline underline digits ` func nnMain() { args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(nnInfo[1:]) return } } options := true if len(args) > 0 && args[0] == `--` { options = false args = args[1:] } style, _ := lookupStyle(`gray`) // if the first argument is 1 or 2 dashes followed by a supported // style-name, change the style used if options && len(args) > 0 && strings.HasPrefix(args[0], `-`) { name := args[0] name = strings.TrimPrefix(name, `-`) name = strings.TrimPrefix(name, `-`) args = args[1:] // check if the `dedashed` argument is a supported style-name if s, ok := lookupStyle(name); ok { style = s } else { os.Stderr.WriteString(`invalid style name `) os.Stderr.WriteString(name) os.Stderr.WriteString("\n") os.Exit(1) } } easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { return restyle(w, r, style) }) } func restyle(w *bufio.Writer, r io.Reader, style string) error { sc := bufio.NewScanner(r) sc.Buffer(nil, maxLineBufSize) for i := 0; sc.Scan(); i++ { s := sc.Text() if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") { s = s[3:] } restyleLine(w, s, style) w.WriteByte('\n') if err := w.Flush(); err != nil { // a write error may be the consequence of stdout being closed, // perhaps by another app along a pipe return errNoMoreOutput } } return sc.Err() } func lookupStyle(name string) (style string, ok bool) { if alias, ok := styleAliases[name]; ok { name = alias } style, ok = styles[name] return style, ok } // restyleLine renders the line given, using ANSI-styles to make any long // numbers in it more legible; this func doesn't emit a line-feed, which // is up to its caller func restyleLine(w *bufio.Writer, line string, style string) { for len(line) > 0 { i := indexDigit(line) if i < 0 { // no (more) digits to style for sure w.WriteString(line) return } // emit line before current digit-run w.WriteString(line[:i]) // advance to the start of the current digit-run line = line[i:] // see where the digit-run ends j := indexNonDigit(line) if j < 0 { // the digit-run goes until the end restyleDigits(w, line, style) return } // emit styled digit-run restyleDigits(w, line[:j], style) // skip right past the end of the digit-run line = line[j:] } } const plainInfo = ` plain [options...] [file...] Turn potentially ANSI-styled plain-text into actual plain-text. Input is assumed to be UTF-8, and all CRLF byte-pairs are turned into line feeds. All (optional) leading options start with either single or double-dash: -h show this help message -help show this help message ` func plainMain() { buffered := false args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(plainInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } liveLines := !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { liveLines = false } } easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { return plain(w, r, liveLines) }) } func plain(w *bufio.Writer, r io.Reader, live bool) error { sc := bufio.NewScanner(r) sc.Buffer(nil, maxLineBufSize) for i := 0; sc.Scan(); i++ { s := sc.Bytes() if i == 0 && hasPrefixBOM(s) { s = s[3:] } for line := s; len(line) > 0; { i, j := indexEscapeSequence(line) if i < 0 { w.Write(line) break } if j < 0 { j = len(line) } if i > 0 { w.Write(line[:i]) } line = line[j:] } if w.WriteByte('\n') != nil { return errNoMoreOutput } if !live { continue } if err := w.Flush(); err != nil { return errNoMoreOutput } } return sc.Err() } const primesInfo = ` primes [options...] [count...] Show the first few prime numbers, starting from the lowest and showing one per line. When not given how many primes to find, the default is 1 million. All (optional) leading options start with either single or double-dash: -h show this help message -help show this help message ` func primesMain() { howMany := 1_000_000 if len(os.Args) > 1 { switch os.Args[1] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(primesInfo[1:]) return } n, err := strconv.Atoi(os.Args[1]) if err != nil { os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\n") os.Exit(1) } if n < 0 { n = 0 } howMany = n } primes(howMany) } func primes(left int) { bw := bufio.NewWriterSize(os.Stdout, bufSize) defer bw.Flush() // 24 bytes are always enough for any 64-bit integer var buf [24]byte // 2 is the only even prime number if left > 0 { bw.WriteString("2\n") left-- } for n := uint64(3); left > 0; n += 2 { if oddPrime(n) { bw.Write(strconv.AppendUint(buf[:0], n, 10)) if err := bw.WriteByte('\n'); err != nil { // assume errors come from closed stdout pipes return } left-- } } } // oddPrime assumes the number given to it is odd func oddPrime(n uint64) bool { max := uint64(math.Sqrt(float64(n))) for div := uint64(3); div <= max; div += 2 { if n%div == 0 { return false } } return true } const realignInfo = ` realign [options...] [filenames...] Realign all detected columns, right-aligning any detected numbers in any column. ANSI style-codes are also kept as given. The only option available is to show this help message, using any of "-h", "--h", "-help", or "--help", without the quotes. ` func realignMain() { args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(realignInfo[1:]) return case `--`: args = args[1:] } } if err := realignRun(args); err != nil { os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\n") os.Exit(1) } } func realignRun(paths []string) error { var res table for _, p := range paths { if err := realignHandleFile(&res, p); err != nil { return err } } if len(paths) == 0 { if err := realignHandleReader(&res, os.Stdin); err != nil { return err } } bw := bufio.NewWriterSize(os.Stdout, bufSize) defer bw.Flush() realign(bw, res) return nil } func realignHandleFile(res *table, path string) error { f, err := os.Open(path) if err != nil { // on windows, file-not-found error messages may mention `CreateFile`, // even when trying to open files in read-only mode return errors.New(`can't open file named ` + path) } defer f.Close() return realignHandleReader(res, f) } func realignHandleReader(t *table, r io.Reader) error { sc := bufio.NewScanner(r) sc.Buffer(nil, maxLineBufSize) for i := 0; sc.Scan(); i++ { s := sc.Text() if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") { s = s[3:] } if len(s) == 0 { if len(t.Rows) > 0 { t.Rows = append(t.Rows, ``) } continue } t.Rows = append(t.Rows, s) if t.Columns == 0 { if t.LoopItems == nil { if strings.IndexByte(s, '\t') >= 0 { t.LoopItems = loopItemsTSV } else { t.LoopItems = loopItemsSSV } } const maxInt = int(^uint(0) >> 1) t.LoopItems(s, maxInt, t, updateColumnCount) } t.LoopItems(s, t.Columns, t, realignUpdateItem) } return sc.Err() } func updateColumnCount(i int, s string, t *table) { t.Columns = i + 1 } func realignUpdateItem(i int, s string, t *table) { // ensure column-info-slices have enough room if i >= len(t.MaxWidth) { t.MaxWidth = append(t.MaxWidth, 0) t.MaxDotDecimals = append(t.MaxDotDecimals, 0) } // keep track of widest rune-counts for each column w := countWidth(s) if t.MaxWidth[i] < w { t.MaxWidth[i] = w } // update stats for numeric items if isNumeric(s) { dd := countDotDecimals(s) if t.MaxDotDecimals[i] < dd { t.MaxDotDecimals[i] = dd } } } // isNumeric checks if a string is valid/useable as a number func isNumeric(s string) bool { if len(s) == 0 { return false } s = skipLeadingEscapeSequences(s) if len(s) > 0 && (s[0] == '+' || s[0] == '-') { s = s[1:] } s = skipLeadingEscapeSequences(s) if len(s) == 0 { return false } if s[0] == '.' { return isDigits(s[1:]) } digits := 0 for { s = skipLeadingEscapeSequences(s) if len(s) == 0 { break } if s[0] == '.' { return isDigits(s[1:]) } if !('0' <= s[0] && s[0] <= '9') { return false } digits++ s = s[1:] } s = skipLeadingEscapeSequences(s) return len(s) == 0 && digits > 0 } func isDigits(s string) bool { if len(s) == 0 { return false } digits := 0 for { s = skipLeadingEscapeSequences(s) if len(s) == 0 { break } if '0' <= s[0] && s[0] <= '9' { s = s[1:] digits++ } else { return false } } s = skipLeadingEscapeSequences(s) return len(s) == 0 && digits > 0 } func realign(w *bufio.Writer, t table) { due := 0 showItem := func(i int, s string, t *table) { if i > 0 { due += 2 } if isNumeric(s) { dd := countDotDecimals(s) rpad := t.MaxDotDecimals[i] - dd width := countWidth(s) lpad := t.MaxWidth[i] - (width + rpad) + due writeSpaces(w, lpad) w.WriteString(s) due = rpad return } writeSpaces(w, due) w.WriteString(s) due = t.MaxWidth[i] - countWidth(s) } for _, line := range t.Rows { due = 0 if len(line) > 0 { t.LoopItems(line, t.Columns, &t, showItem) } if w.WriteByte('\n') != nil { break } } } const squeezeInfo = ` squeeze [filenames...] Ignore leading/trailing spaces (and carriage-returns) on lines, also turning all runs of multiple consecutive spaces into single spaces. Spaces around tabs are ignored as well. ` func squeezeMain() { buffered := false args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-b`, `--b`, `-buffered`, `--buffered`: buffered = true args = args[1:] case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(squeezeInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } liveLines := !buffered if !buffered { if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { liveLines = false } } easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { return squeeze(w, r, liveLines) }) } func squeeze(w *bufio.Writer, r io.Reader, live bool) error { sc := bufio.NewScanner(r) sc.Buffer(nil, maxLineBufSize) for i := 0; sc.Scan(); i++ { s := sc.Bytes() if i == 0 && bytes.HasPrefix(s, []byte{0xef, 0xbb, 0xbf}) { s = s[3:] } writeSqueezed(w, s) if w.WriteByte('\n') != nil { return errNoMoreOutput } if !live { continue } if err := w.Flush(); err != nil { return errNoMoreOutput } } return sc.Err() } func writeSqueezed(w *bufio.Writer, s []byte) { // ignore leading spaces for len(s) > 0 && s[0] == ' ' { s = s[1:] } // ignore trailing spaces for len(s) > 0 && s[len(s)-1] == ' ' { s = s[:len(s)-1] } i := 0 space := false for i < len(s) { switch b := s[i]; b { case ' ': space = true i++ case '\t': space = false i++ for i < len(s) && s[i] == ' ' { i++ } w.WriteByte('\t') default: if space { w.WriteByte(' ') space = false } w.WriteByte(b) } } } const tcatlInfo = ` tcatl [options...] [file...] Title and Concatenate lines emits lines from all the named sources given, preceding each file's contents with its name, using an ANSI reverse style. The name "-" stands for the standard input. When no names are given, the standard input is used by default. All (optional) leading options start with either single or double-dash: -h show this help message -help show this help message ` func tcatlMain() { args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(tcatlInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { fmt.Fprintf(w, "\x1b[7m%-80s\x1b[0m\n", name) return catl(w, r, false) }) } const utfateInfo = ` utfate [options...] [file...] This app turns plain-text input into UTF-8. Supported input formats are - ASCII - UTF-8 - UTF-8 with a leading BOM - UTF-16 BE - UTF-16 LE - UTF-32 BE - UTF-32 LE All (optional) leading options start with either single or double-dash: -h show this help message -help show this help message ` func utfateMain() { args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(fixlinesInfo[1:]) return } } if len(args) > 0 && args[0] == `--` { args = args[1:] } if len(os.Args) > 1 { switch os.Args[1] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(utfateInfo[1:]) return } } easyboxRun(args, func(w *bufio.Writer, r io.Reader, name string) error { return utfate(w, r) }) } func utfate(w io.Writer, r io.Reader) error { br := bufio.NewReaderSize(r, bufSize) bw := bufio.NewWriterSize(w, bufSize) defer bw.Flush() lead, err := br.Peek(4) if err != nil && err != io.EOF { return err } if bytes.HasPrefix(lead, []byte{'\x00', '\x00', '\xfe', '\xff'}) { br.Discard(4) return utf32toUTF8(bw, br, binary.BigEndian) } if bytes.HasPrefix(lead, []byte{'\xff', '\xfe', '\x00', '\x00'}) { br.Discard(4) return utf32toUTF8(bw, br, binary.LittleEndian) } if bytes.HasPrefix(lead, []byte{'\xfe', '\xff'}) { br.Discard(2) return utf16toUTF8(bw, br, readBytePairBE) } if bytes.HasPrefix(lead, []byte{'\xff', '\xfe'}) { br.Discard(2) return utf16toUTF8(bw, br, readBytePairLE) } if bytes.HasPrefix(lead, []byte{'\xef', '\xbb', '\xbf'}) { br.Discard(3) return handleUTF8(bw, br) } return handleUTF8(bw, br) } func handleUTF8(w *bufio.Writer, r *bufio.Reader) error { for { c, _, err := r.ReadRune() if c == unicode.ReplacementChar { return errors.New(`invalid UTF-8 stream`) } if err == io.EOF { return nil } if err != nil { return err } if _, err := w.WriteRune(c); err != nil { return errNoMoreOutput } } } // readPairFunc narrows source-code lines below type readPairFunc func(*bufio.Reader) (byte, byte, error) // utf16toUTF8 handles UTF-16 inputs for func utfate func utf16toUTF8(w *bufio.Writer, r *bufio.Reader, read2 readPairFunc) error { for { a, b, err := read2(r) if err == io.EOF { return nil } if err != nil { return err } c := rune(256*int(a) + int(b)) if utf16.IsSurrogate(c) { a, b, err := read2(r) if err == io.EOF { return nil } if err != nil { return err } next := rune(256*int(a) + int(b)) c = utf16.DecodeRune(c, next) } if _, err := w.WriteRune(c); err != nil { return errNoMoreOutput } } } // readBytePairBE gets you a pair of bytes in big-endian (original) order func readBytePairBE(br *bufio.Reader) (byte, byte, error) { a, err := br.ReadByte() if err != nil { return a, 0, err } b, err := br.ReadByte() return a, b, err } // readBytePairLE gets you a pair of bytes in little-endian order func readBytePairLE(br *bufio.Reader) (byte, byte, error) { a, b, err := readBytePairBE(br) return b, a, err } // utf32toUTF8 handles UTF-32 inputs for func utfate func utf32toUTF8(w *bufio.Writer, r *bufio.Reader, o binary.ByteOrder) error { var n uint32 for { err := binary.Read(r, o, &n) if err == io.EOF { return nil } if err != nil { return err } if _, err := w.WriteRune(rune(n)); err != nil { return errNoMoreOutput } } }