/* The MIT License (MIT) Copyright (c) 2026 pacman64 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* To compile a smaller-sized command-line app, you can use the `go` command as follows: go build -ldflags "-s -w" -trimpath coby.go */ package main import ( "bufio" "errors" "io" "io/fs" "os" "path/filepath" "runtime" "strconv" "sync" ) const info = ` coby [options...] [files/folders...] COunt BYtes finds out some simple byte-related stats, counting - bytes - lines - how many lines have trailing spaces (trails) - how many lines end with a CRLF pair - all-bits-off (null) bytes - all-bits-on (full) bytes - top-bit-on (high) bytes - which unicode byte-order-mark (bom) sequence the data start with Some of these stats (lines, CRLFs, BOMs) only make sense for plain-text data, and thus may not be meaningful for general binary data. The output is TSV (tab-separated values) lines, where the first line has all the column names. When no filepaths are given, the standard input is used by default. All folder names given expand recursively into all filenames in them. A mix of files/folders is supported for convenience. The only option available is to show this help message, using any of "-h", "--h", "-help", or "--help", without the quotes. ` // header has all the values for the first output line var header = []string{ `name`, `bytes`, `lines`, `lf`, `crlf`, `spaces`, `tabs`, `trails`, `nulls`, `fulls`, `highs`, `bom`, } // event has what the output-reporting task needs to show the results of a // task which has just completed, perhaps unsuccessfully type event struct { // Index points to the task's entry in the results-slice Index int // Stats has all the byte-related stats Stats stats // Err is the completed task's error, or lack of Err error } func main() { args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(info[1:]) return case `--`: args = args[1:] } } // show first/heading line right away, to let users know things are // happening for i, s := range header { if i > 0 { os.Stdout.WriteString("\t") } os.Stdout.WriteString(s) } // assume an error means later stages/apps in a pipe had enough input and // quit successfully, so quit successfully too _, err := os.Stdout.WriteString("\n") if err != nil { return } // names has all filepaths given, ignoring repetitions names, ok := findAllFiles(args) if !ok { os.Exit(1) } if len(names) == 0 { names = []string{`-`} } events := make(chan event) go handleInputs(names, events) if !handleOutput(os.Stdout, len(names), events) { os.Exit(1) } } // handleInputs launches all the tasks which do the actual work, limiting how // many inputs are being worked on at the same time func handleInputs(names []string, events chan<- event) { defer close(events) // allow the output-reporter task to end var tasks sync.WaitGroup // the number of tasks is always known in advance tasks.Add(len(names)) // permissions is buffered to limit concurrency to the core-count permissions := make(chan struct{}, runtime.NumCPU()) defer close(permissions) for i, name := range names { // wait until some concurrency-room is available, before proceeding permissions <- struct{}{} go func(i int, name string) { defer tasks.Done() res, err := handleInput(name) <-permissions events <- event{Index: i, Stats: res, Err: err} }(i, name) } // wait for all inputs, before closing the `events` channel, which in turn // would quit the whole app right away tasks.Wait() } // handleInput handles each work-item for func handleInputs func handleInput(path string) (stats, error) { var res stats res.name = path if path == `-` { err := res.updateStats(os.Stdin) return res, err } f, err := os.Open(path) if err != nil { res.result = resultError // on windows, file-not-found error messages may mention `CreateFile`, // even when trying to open files in read-only mode return res, errors.New(`can't open file named ` + path) } defer f.Close() err = res.updateStats(f) return res, err } // handleOutput asynchronously updates output as results are known, whether // it's errors or successful results; returns whether it succeeded, which // means no errors happened func handleOutput(w io.Writer, inputs int, events <-chan event) (ok bool) { bw := bufio.NewWriter(w) defer bw.Flush() ok = true results := make([]stats, inputs) // keep track of which tasks are over, so that on each event all leading // results which are ready are shown: all of this ensures prompt output // updates as soon as results come in, while keeping the original order // of the names/filepaths given resultsLeft := results for v := range events { results[v.Index] = v.Stats if v.Err != nil { ok = false bw.Flush() showError(v.Err) // stay in the current loop, in case this failure was keeping // previous successes from showing up } for len(resultsLeft) > 0 { if resultsLeft[0].result == resultPending { break } if err := showResult(bw, resultsLeft[0]); err != nil { // assume later stages/apps in a pipe had enough input return ok } resultsLeft = resultsLeft[1:] } // show leading results immediately, if any bw.Flush() } return ok } func showError(err error) { os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\n") } // showResult shows a TSV line for results marked as successful, doing nothing // when given other types of results func showResult(w *bufio.Writer, s stats) error { if s.result != resultSuccess { return nil } var buf [24]byte w.WriteString(s.name) w.Write(strconv.AppendUint(append(buf[:0], '\t'), uint64(s.bytes), 10)) w.Write(strconv.AppendUint(append(buf[:0], '\t'), uint64(s.lines), 10)) w.Write(strconv.AppendUint(append(buf[:0], '\t'), uint64(s.lf), 10)) w.Write(strconv.AppendUint(append(buf[:0], '\t'), uint64(s.crlf), 10)) w.Write(strconv.AppendUint(append(buf[:0], '\t'), uint64(s.spaces), 10)) w.Write(strconv.AppendUint(append(buf[:0], '\t'), uint64(s.tabs), 10)) w.Write(strconv.AppendUint(append(buf[:0], '\t'), uint64(s.trailing), 10)) w.Write(strconv.AppendUint(append(buf[:0], '\t'), uint64(s.nulls), 10)) w.Write(strconv.AppendUint(append(buf[:0], '\t'), uint64(s.fulls), 10)) w.Write(strconv.AppendUint(append(buf[:0], '\t'), uint64(s.highs), 10)) w.WriteByte('\t') w.WriteString(bomLegend[s.bom]) return w.WriteByte('\n') } // findAllFiles can be given a mix of file/folder paths, finding all files // recursively in folders, avoiding duplicates func findAllFiles(paths []string) (files []string, success bool) { rec := filepath.WalkDir got := make(map[string]struct{}) success = true for _, path := range paths { if _, ok := got[path]; ok { continue } got[path] = struct{}{} // a dash means standard input if path == `-` { files = append(files, path) continue } info, err := os.Stat(path) if os.IsNotExist(err) { // on windows, file-not-found messages may mention `CreateFile`, // even when trying to open files in read-only mode err = errors.New(`can't find file/folder named ` + path) } if err != nil { showError(err) success = false continue } if !info.IsDir() { files = append(files, path) continue } err = rec(path, func(path string, info fs.DirEntry, err error) error { if _, ok := got[path]; ok { if info.IsDir() { return fs.SkipDir } return nil } got[path] = struct{}{} if err != nil { showError(err) success = false return err } if info.IsDir() { return nil } files = append(files, path) return nil }) if err != nil { showError(err) success = false } } return files, success } // counter makes it easy to change the int-size of almost all counters type counter uint64 // statResult constrains possible result-states/values in type stats type statResult int const ( // resultPending is the default not-yet-ready result-status resultPending = statResult(0) // resultError means result should show as an error, instead of data resultError = statResult(1) // resultSuccess means a result's stats are ready to show resultSuccess = statResult(2) ) // bomType is the type for the byte-order-mark enumeration type bomType int const ( noBOM = bomType(0) utf8BOM = bomType(1) utf16leBOM = bomType(2) utf16beBOM = bomType(3) utf32leBOM = bomType(4) utf32beBOM = bomType(5) ) // bomLegend has the string-equivalents of the bomType constants var bomLegend = []string{ ``, `UTF-8`, `UTF-16 LE`, `UTF-16 BE`, `UTF-32 LE`, `UTF-32 BE`, } // stats has all the size-stats for some input, as well as a way to // skip showing results, in case of an error such as `file not found` type stats struct { // bytes counts all bytes read bytes counter // lines counts lines, and is 0 only when the byte-count is also 0 lines counter // maxWidth is maximum byte-width of lines, excluding carriage-returns // and/or line-feeds maxWidth counter // nulls counts all-bits-off bytes nulls counter // fulls counts all-bits-on bytes fulls counter // highs counts bytes with their `top` (highest-order) bit on highs counter // spaces counts ASCII spaces spaces counter // tabs counts ASCII tabs tabs counter // trailing counts lines with trailing spaces in them trailing counter // lf counts ASCII line-feeds as their own byte-values: this means its // value will always be at least the same as field `crlf` lf counter // crlf counts ASCII CRLF byte-pairs crlf counter // the type of byte-order mark detected bom bomType // name is the filepath of the file/source these stats are about name string // results keeps track of whether results are valid and/or ready result statResult } // updateStats does what it says, reading everything from a reader func (res *stats) updateStats(r io.Reader) error { err := res.updateUsing(r) if err == io.EOF { err = nil } if err == nil { res.result = resultSuccess } else { res.result = resultError } return err } func checkBOM(data []byte) bomType { d := data l := len(data) if l >= 3 && data[0] == 0xef && data[1] == 0xbb && data[2] == 0xbf { return utf8BOM } if l >= 4 && d[0] == 0xff && d[1] == 0xfe && d[2] == 0 && d[3] == 0 { return utf32leBOM } if l >= 4 && d[0] == 0 && d[1] == 0 && d[2] == 0xfe && d[3] == 0xff { return utf32beBOM } if l >= 2 && data[0] == 0xff && data[1] == 0xfe { return utf16leBOM } if l >= 2 && data[0] == 0xfe && data[1] == 0xff { return utf16beBOM } return noBOM } // updateUsing helps func updateStats do its job func (res *stats) updateUsing(r io.Reader) error { var buf [32 * 1024]byte var tallies [256]uint64 var width counter var prev1, prev2 byte for { n, err := r.Read(buf[:]) if n < 1 { res.lines = counter(tallies['\n']) res.tabs = counter(tallies['\t']) res.spaces = counter(tallies[' ']) res.lf = counter(tallies['\n']) res.nulls = counter(tallies[0]) res.fulls = counter(tallies[255]) for i := 128; i < len(tallies); i++ { res.highs += counter(tallies[i]) } if err == io.EOF { return res.handleEnd(width, prev1, prev2) } return err } chunk := buf[:n] if res.bytes == 0 { res.bom = checkBOM(chunk) } res.bytes += counter(n) for _, b := range chunk { // count values without branching, because it's fun tallies[b]++ if b != '\n' { prev2 = prev1 prev1 = b width++ continue } // handle line-feeds crlf := count(prev1, '\r') res.crlf += crlf // count lines with trailing spaces, whether these end with // a CRLF byte-pair or just a line-feed byte if prev1 == ' ' || (prev2 == ' ' && prev1 == '\r') { res.trailing++ } // exclude any CR from the current line's width-count width -= crlf if res.maxWidth < width { res.maxWidth = width } prev2 = prev1 prev1 = b width = 0 } } } // handleEnd fixes/finalizes stats when input data end; this func is only // meant to be used by func updateStats, since it takes some of the latter's // local variables func (res *stats) handleEnd(width counter, prev1, prev2 byte) error { if prev1 == ' ' || (prev2 == ' ' && prev1 == '\r') { res.trailing++ } if res.maxWidth < width { res.maxWidth = width } // avoid reporting 0 lines with a non-0 byte-count: this is unlike the // standard cmd-line tool `wc` if res.bytes > 0 && prev1 != '\n' { res.lines++ } return nil } // count checks if 2 bytes are the same, returning either 0 or 1, which can // be added directly/branchlessly to totals func count(x, y byte) counter { var c counter if x == y { c = 1 } else { c = 0 } return c }