File: ./info.txt
   1 coby [files/folders...]
   2 
   3 COunt BYtes finds out some simple byte-related stats, counting
   4 
   5     - bytes
   6     - lines
   7     - how many lines have trailing spaces
   8     - how many lines end with a CRLF pair
   9     - all-off (0) bytes
  10     - all-on (255) bytes
  11     - high-bytes (128+)
  12 
  13 The output is TSV (tab-separated values) lines, where the first line has
  14 all the column names.
  15 
  16 When no filepaths are given, the standard input is used by default. All
  17 folder names given expand recursively into all filenames in them.
     File: ./main.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "errors"
   6     "io"
   7     "io/fs"
   8     "os"
   9     "path/filepath"
  10     "runtime"
  11     "strconv"
  12     "sync"
  13 
  14     _ "embed"
  15 )
  16 
  17 //go:embed info.txt
  18 var info string
  19 
  20 // header is the first output line
  21 var header = []string{
  22     `name`,
  23     `bytes`,
  24     `runes`,
  25     `lines`,
  26     `lf`,
  27     `crlf`,
  28     `spaces`,
  29     `tabs`,
  30     `trails`,
  31     `nulls`,
  32     `fulls`,
  33     `highs`,
  34 }
  35 
  36 // event has what the output-reporting task needs to show the results of a
  37 // task which has just completed, perhaps unsuccessfully
  38 type event struct {
  39     // Index points to the task's entry in the results-slice
  40     Index int
  41 
  42     // Stats has all the byte-related stats
  43     Stats stats
  44 
  45     // Err is the completed task's error, or lack of
  46     Err error
  47 }
  48 
  49 func main() {
  50     if len(os.Args) > 1 {
  51         switch os.Args[1] {
  52         case `-h`, `--h`, `-help`, `--help`:
  53             os.Stderr.WriteString(info)
  54             return
  55         }
  56     }
  57 
  58     // show first/heading line right away, to let users know things are
  59     // happening
  60     for i, s := range header {
  61         if i > 0 {
  62             os.Stdout.WriteString("\t")
  63         }
  64         os.Stdout.WriteString(s)
  65     }
  66     // assume an error means later stages/apps in a pipe had enough input and
  67     // quit successfully, so quit successfully too
  68     _, err := os.Stdout.WriteString("\n")
  69     if err != nil {
  70         return
  71     }
  72 
  73     // names has all filepaths given, ignoring repetitions
  74     names, ok := findAllFiles(deduplicate(os.Args[1:]))
  75     if !ok {
  76         os.Exit(1)
  77     }
  78     names = deduplicate(names)
  79     if len(names) == 0 {
  80         names = []string{`-`}
  81     }
  82 
  83     events := make(chan event)
  84     go handleInputs(names, events)
  85     if !handleOutput(os.Stdout, len(names), events) {
  86         os.Exit(1)
  87     }
  88 }
  89 
  90 // handleInputs launches all the tasks which do the actual work, limiting how
  91 // many inputs are being worked on at the same time
  92 func handleInputs(names []string, events chan event) {
  93     // allow output-reporter task to end, and thus the app
  94     defer close(events)
  95 
  96     // permissions limits how many worker tasks can be active at the same
  97     // time: when given many filepaths to work on, rate-limiting avoids
  98     // a massive number of concurrent tasks which read and process input
  99     permissions := make(chan struct{}, runtime.NumCPU())
 100     defer close(permissions)
 101 
 102     var inputs sync.WaitGroup
 103     for i := range names {
 104         // wait until some concurrency-room is available
 105         permissions <- struct{}{}
 106         inputs.Add(1)
 107 
 108         go func(i int) {
 109             defer inputs.Done()
 110             res, err := handleInput(names[i])
 111             events <- event{i, res, err}
 112             <-permissions
 113         }(i)
 114     }
 115 
 116     // wait for all inputs, before closing the `events` channel
 117     inputs.Wait()
 118 }
 119 
 120 // handleInput handles each work-item for func handleInputs
 121 func handleInput(path string) (stats, error) {
 122     var res stats
 123     res.name = path
 124 
 125     if path == `-` {
 126         err := res.updateStats(os.Stdin)
 127         return res, err
 128     }
 129 
 130     f, err := os.Open(path)
 131     if err != nil {
 132         res.result = resultError
 133         // on windows, file-not-found error messages may mention `CreateFile`,
 134         // even when trying to open files in read-only mode
 135         return res, errors.New(`can't open file named ` + path)
 136     }
 137     defer f.Close()
 138 
 139     err = res.updateStats(f)
 140     return res, err
 141 }
 142 
 143 // handleOutput asynchronously updates output as results are known, whether
 144 // it's errors or successful results; returns whether it succeeded, which
 145 // means no errors happened
 146 func handleOutput(w io.Writer, rescount int, events chan event) (ok bool) {
 147     ok = true
 148     bw := bufio.NewWriter(w)
 149     defer bw.Flush()
 150 
 151     results := make([]stats, rescount)
 152 
 153     // keep track of which tasks are over, so that on each event all leading
 154     // results which are ready are shown: all of this ensures prompt output
 155     // updates as soon as results come in, while keeping the original order
 156     // of the names/filepaths given
 157     resultsLeft := results
 158 
 159     for v := range events {
 160         results[v.Index] = v.Stats
 161         if v.Err != nil {
 162             ok = false
 163             bw.Flush()
 164             showError(v.Err)
 165 
 166             // stay in the current loop, in case this failure was keeping
 167             // previous successes from showing up
 168         }
 169 
 170         n := countLeadingReady(resultsLeft)
 171 
 172         for _, res := range resultsLeft[:n] {
 173             if err := showResult(bw, res); err != nil {
 174                 // assume later stages/apps in a pipe had enough input and
 175                 // quit successfully, so quit successfully too
 176                 return true
 177             }
 178         }
 179         resultsLeft = resultsLeft[n:]
 180 
 181         // flush output-buffer only if anything new was shown
 182         if n > 0 {
 183             bw.Flush()
 184         }
 185     }
 186 
 187     return ok
 188 }
 189 
 190 // showError standardizes how errors from this app look
 191 func showError(err error) {
 192     os.Stderr.WriteString("\x1b[31m")
 193     os.Stderr.WriteString(err.Error())
 194     os.Stderr.WriteString("\x1b[0m\n")
 195 }
 196 
 197 // showResult does what it says
 198 func showResult(w *bufio.Writer, res stats) error {
 199     if res.result == resultError {
 200         return nil
 201     }
 202 
 203     var buf [64]byte
 204     w.WriteString(res.name)
 205     w.Write([]byte{'\t'})
 206     w.Write(strconv.AppendUint(buf[:0], uint64(res.bytes), 10))
 207     w.Write([]byte{'\t'})
 208     w.Write(strconv.AppendUint(buf[:0], uint64(res.runes), 10))
 209     w.Write([]byte{'\t'})
 210     w.Write(strconv.AppendUint(buf[:0], uint64(res.lines), 10))
 211     w.Write([]byte{'\t'})
 212     w.Write(strconv.AppendUint(buf[:0], uint64(res.lf), 10))
 213     w.Write([]byte{'\t'})
 214     w.Write(strconv.AppendUint(buf[:0], uint64(res.crlf), 10))
 215     w.Write([]byte{'\t'})
 216     w.Write(strconv.AppendUint(buf[:0], uint64(res.spaces), 10))
 217     w.Write([]byte{'\t'})
 218     w.Write(strconv.AppendUint(buf[:0], uint64(res.tabs), 10))
 219     w.Write([]byte{'\t'})
 220     w.Write(strconv.AppendUint(buf[:0], uint64(res.trailing), 10))
 221     w.Write([]byte{'\t'})
 222     w.Write(strconv.AppendUint(buf[:0], uint64(res.nulls), 10))
 223     w.Write([]byte{'\t'})
 224     w.Write(strconv.AppendUint(buf[:0], uint64(res.fulls), 10))
 225     w.Write([]byte{'\t'})
 226     w.Write(strconv.AppendUint(buf[:0], uint64(res.highs), 10))
 227     _, err := w.Write([]byte{'\n'})
 228     return err
 229 }
 230 
 231 // deduplicate ensures items only appear once, keeping the original slice
 232 // unchanged
 233 func deduplicate(src []string) []string {
 234     var unique []string
 235     got := make(map[string]struct{})
 236 
 237     for _, s := range src {
 238         if _, ok := got[s]; ok {
 239             continue
 240         }
 241 
 242         unique = append(unique, s)
 243         got[s] = struct{}{}
 244     }
 245 
 246     return unique
 247 }
 248 
 249 // findAllFiles does what it says, given a mix of file/folder paths, finding
 250 // all files recursively in the case of folders
 251 func findAllFiles(paths []string) (found []string, ok bool) {
 252     var unique []string
 253     got := make(map[string]struct{})
 254     ok = true
 255 
 256     for _, root := range paths {
 257         // a dash means standard input
 258         if root == `-` {
 259             if _, ok := got[root]; ok {
 260                 continue
 261             }
 262 
 263             unique = append(unique, root)
 264             got[root] = struct{}{}
 265             continue
 266         }
 267 
 268         _, err := os.Stat(root)
 269         if os.IsNotExist(err) {
 270             ok = false
 271             // on windows, file-not-found error messages may mention `CreateFile`,
 272             // even when trying to open files in read-only mode
 273             err := errors.New(`can't find file/folder named ` + root)
 274             showError(err)
 275             continue
 276         }
 277 
 278         err = filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
 279             if err != nil {
 280                 return err
 281             }
 282 
 283             if d.IsDir() {
 284                 return nil
 285             }
 286 
 287             if _, ok := got[path]; ok {
 288                 return nil
 289             }
 290 
 291             unique = append(unique, path)
 292             got[path] = struct{}{}
 293             return nil
 294         })
 295 
 296         if err != nil {
 297             ok = false
 298             showError(err)
 299         }
 300     }
 301 
 302     return unique, ok
 303 }
     File: ./mit-license.txt
   1 The MIT License (MIT)
   2 
   3 Copyright (c) 2026 pacman64
   4 
   5 Permission is hereby granted, free of charge, to any person obtaining a copy of
   6 this software and associated documentation files (the "Software"), to deal
   7 in the Software without restriction, including without limitation the rights to
   8 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9 of the Software, and to permit persons to whom the Software is furnished to do
  10 so, subject to the following conditions:
  11 
  12 The above copyright notice and this permission notice shall be included in all
  13 copies or substantial portions of the Software.
  14 
  15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21 SOFTWARE.
     File: ./stats.go
   1 package main
   2 
   3 import (
   4     "io"
   5 )
   6 
   7 // isZero enables branchless-counting, when xor-compared bytes are used
   8 // as indices for it
   9 var isZero = [256]byte{1}
  10 
  11 // counter makes it easy to change the int-size of almost all counters
  12 type counter int
  13 
  14 // statResult constrains possible result-states/values in type stats
  15 type statResult int
  16 
  17 const (
  18     // resultPending is the default not-yet-ready result-status
  19     resultPending = statResult(0)
  20 
  21     // resultError signals result should show as an error, instead of data
  22     resultError = statResult(1)
  23 
  24     // resultSuccess means result can be shown
  25     resultSuccess = statResult(2)
  26 )
  27 
  28 // stats has all the size-stats for some input, as well as a way to
  29 // skip showing results, in case of an error such as `file not found`
  30 type stats struct {
  31     // bytes counts all bytes read
  32     bytes int
  33 
  34     // lines counts lines, and is 0 only when the byte-count is also 0
  35     lines counter
  36 
  37     // runes counts utf-8 sequences, each of which can use up to 4 bytes and
  38     // is usually a complete symbol: `emoji` country-flags are commonly-used
  39     // counter-examples, as these `symbols` need 2 runes, using 8 bytes each
  40     runes counter
  41 
  42     // maxWidth is maximum byte-width of lines, excluding carriage-returns
  43     // and/or line-feeds
  44     maxWidth counter
  45 
  46     // nulls counts all-bits-off bytes
  47     nulls counter
  48 
  49     // fulls counts all-bits-on bytes
  50     fulls counter
  51 
  52     // highs counts bytes with their `top` (highest-order) bit on
  53     highs counter
  54 
  55     // spaces counts ASCII spaces
  56     spaces counter
  57 
  58     // tabs counts ASCII tabs
  59     tabs counter
  60 
  61     // trailing counts lines with trailing spaces in them
  62     trailing counter
  63 
  64     // lf counts ASCII line-feeds as their own byte-values: this means its
  65     // value will always be at least the same as field `crlf`
  66     lf counter
  67 
  68     // crlf counts ASCII CRLF byte-pairs
  69     crlf counter
  70 
  71     // name is the filepath of the file/source these stats are about
  72     name string
  73 
  74     // results keeps track of whether results are valid and/or ready
  75     result statResult
  76 }
  77 
  78 // updateStats does what it says, reading everything from a reader
  79 func (res *stats) updateStats(r io.Reader) error {
  80     err := res.updateUsing(r)
  81     if err == io.EOF {
  82         err = nil
  83     }
  84 
  85     if err == nil {
  86         res.result = resultSuccess
  87     } else {
  88         res.result = resultError
  89     }
  90     return err
  91 }
  92 
  93 // updateUsing helps func updateStats do its job
  94 func (res *stats) updateUsing(r io.Reader) error {
  95     var width counter
  96     var prev1, prev2 byte
  97     var buf [16 * 1024]byte
  98     var tallies [256]uint64
  99 
 100     for {
 101         n, err := r.Read(buf[:])
 102         if n < 1 {
 103             if err == io.EOF {
 104                 res.lines = counter(tallies['\n'])
 105                 res.tabs = counter(tallies['\t'])
 106                 res.spaces = counter(tallies[' '])
 107                 res.lf = counter(tallies['\n'])
 108                 res.nulls = counter(tallies[0])
 109                 res.fulls = counter(tallies[255])
 110                 for i := 128; i < 256; i++ {
 111                     res.highs += counter(tallies[i])
 112                 }
 113                 return res.handleEnd(width, prev1, prev2)
 114             }
 115             return err
 116         }
 117 
 118         res.bytes += n
 119         chunk := buf[:n]
 120 
 121         for _, b := range chunk {
 122             // count values without branching, because it's fun
 123             tallies[b]++
 124 
 125             // handle non-ASCII runes, assuming input is valid UTF-8
 126             res.runes += 1 - count(b&0xc0, 0x80)
 127 
 128             // handle line-feeds
 129             if b == '\n' {
 130                 crlf := count(prev1, '\r')
 131                 res.crlf += crlf
 132 
 133                 // count lines with trailing spaces, whether these end with
 134                 // a CRLF byte-pair or just a line-feed byte
 135                 if prev1 == ' ' || (prev2 == ' ' && prev1 == '\r') {
 136                     res.trailing++
 137                 }
 138 
 139                 // exclude any CR from the current line's width-count
 140                 width -= crlf
 141                 if res.maxWidth < width {
 142                     res.maxWidth = width
 143                 }
 144 
 145                 prev2 = prev1
 146                 prev1 = b
 147                 width = 0
 148                 continue
 149             }
 150 
 151             prev2 = prev1
 152             prev1 = b
 153             width++
 154         }
 155     }
 156 }
 157 
 158 // handleEnd fixes/finalizes stats when input data end; this func is only
 159 // meant to be used by func updateStats, since it takes some of the latter's
 160 // local variables
 161 func (res *stats) handleEnd(width counter, prev1, prev2 byte) error {
 162     if prev1 == ' ' || (prev2 == ' ' && prev1 == '\r') {
 163         res.trailing++
 164     }
 165 
 166     if res.maxWidth < width {
 167         res.maxWidth = width
 168     }
 169 
 170     // avoid reporting 0 lines with a non-0 byte-count: this is unlike the
 171     // standard cmd-line tool `wc`
 172     if res.bytes > 0 && prev1 != '\n' {
 173         res.lines++
 174     }
 175 
 176     return nil
 177 }
 178 
 179 // count checks if 2 bytes are the same, returning either 0 or 1, which can
 180 // be added directly/branchlessly to totals
 181 // func count(x, y byte) counter {
 182 //  return counter(isZero[x^y])
 183 // }
 184 
 185 // count checks if 2 bytes are the same, returning either 0 or 1, which can
 186 // be added directly/branchlessly to totals
 187 func count(x, y byte) counter {
 188     if x != y {
 189         return 0
 190     }
 191     return 1
 192 }
 193 
 194 // countLeadingReady finds how many items are ready to show at the start of a
 195 // results-slice, which ensures output matches the original item-order
 196 func countLeadingReady(values []stats) int {
 197     for i, v := range values {
 198         if v.result == resultPending {
 199             return i
 200         }
 201     }
 202     return len(values)
 203 }
     File: ./stats_test.go
   1 package main
   2 
   3 import (
   4     "strings"
   5     "testing"
   6 )
   7 
   8 func TestCount(t *testing.T) {
   9     for x := 0; x < 256; x++ {
  10         for y := 0; y < 256; y++ {
  11             var exp counter
  12             if x == y {
  13                 exp = 1
  14             }
  15 
  16             if got := count(byte(x), byte(y)); got != exp {
  17                 t.Fatalf(`%d, %d: expected %v, but got %v`, x, y, exp, got)
  18                 return
  19             }
  20         }
  21     }
  22 }
  23 
  24 func TestCountLeadingReady(t *testing.T) {
  25     for size := 0; size <= 20; size++ {
  26         for exp := 0; exp < size; exp++ {
  27             values := make([]stats, size)
  28             for i := 0; i < exp; i++ {
  29                 v := resultSuccess
  30                 if i%2 == 1 {
  31                     v = resultError
  32                 }
  33                 values[i].result = v
  34             }
  35 
  36             if got := countLeadingReady(values); got != exp {
  37                 const fs = `size %d: expected %d, instead of %d`
  38                 t.Fatalf(fs, size, exp, got)
  39             }
  40         }
  41     }
  42 }
  43 
  44 func TestStats(t *testing.T) {
  45     var tests = []struct {
  46         Input    string
  47         Expected stats
  48     }{
  49         {
  50             ``,
  51             stats{},
  52         },
  53         {
  54             `abc`,
  55             stats{lines: 1, runes: 3, maxWidth: 3},
  56         },
  57         {
  58             "abc\tdef\r\n",
  59             stats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1},
  60         },
  61         {
  62             "abc\tdef\r\n",
  63             stats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1},
  64         },
  65         {
  66             "abc\tdef \r\n123\t456  789 ",
  67             stats{
  68                 lines: 2, runes: 23, maxWidth: 13,
  69                 spaces: 4, tabs: 2, trailing: 2, lf: 1, crlf: 1,
  70             },
  71         },
  72     }
  73 
  74     for _, tc := range tests {
  75         t.Run(tc.Input, func(t *testing.T) {
  76             var got stats
  77             err := got.updateStats(strings.NewReader(tc.Input))
  78             if err != nil {
  79                 t.Error(err)
  80                 return
  81             }
  82 
  83             tc.Expected.bytes = len(tc.Input)
  84             tc.Expected.result = resultSuccess
  85             if got != tc.Expected {
  86                 t.Fatalf("expected\n%#v,\ngot\n%#v", tc.Expected, got)
  87                 return
  88             }
  89         })
  90     }
  91 }