File: coby/info.txt
   1 coby [files/folders...]
   2 
   3 COunt BYtes finds out some simple byte-related stats, counting
   4 
   5     - bytes
   6     - lines
   7     - how many lines have trailing spaces
   8     - how many lines end with a CRLF pair
   9     - all-off (0) bytes
  10     - all-on (255) bytes
  11     - high-bytes (128+)
  12 
  13 The output is TSV (tab-separated values) lines, where the first line has
  14 all the column names.
  15 
  16 When no filepaths are given, the standard input is used by default. All
  17 folder names given expand recursively into all filenames in them.

     File: coby/main.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "errors"
   6     "io"
   7     "io/fs"
   8     "os"
   9     "path/filepath"
  10     "runtime"
  11     "strconv"
  12     "sync"
  13 
  14     _ "embed"
  15 )
  16 
  17 // Note: the code is avoiding using the fmt package to save hundreds of
  18 // kilobytes on the resulting executable, which is a noticeable difference.
  19 
  20 //go:embed info.txt
  21 var info string
  22 
  23 // header is the first output line
  24 var header = []string{
  25     `name`,
  26     `bytes`,
  27     `runes`,
  28     `lines`,
  29     `lf`,
  30     `crlf`,
  31     `spaces`,
  32     `tabs`,
  33     `trails`,
  34     `nulls`,
  35     `fulls`,
  36     `highs`,
  37 }
  38 
  39 // event has what the output-reporting task needs to show the results of a
  40 // task which has just completed, perhaps unsuccessfully
  41 type event struct {
  42     // Index points to the task's entry in the results-slice
  43     Index int
  44 
  45     // Stats has all the byte-related stats
  46     Stats stats
  47 
  48     // Err is the completed task's error, or lack of
  49     Err error
  50 }
  51 
  52 func main() {
  53     if len(os.Args) > 1 {
  54         switch os.Args[1] {
  55         case `-h`, `--h`, `-help`, `--help`:
  56             os.Stderr.WriteString(info)
  57             return
  58         }
  59     }
  60 
  61     // show first/heading line right away, to let users know things are
  62     // happening
  63     for i, s := range header {
  64         if i > 0 {
  65             os.Stdout.WriteString("\t")
  66         }
  67         os.Stdout.WriteString(s)
  68     }
  69     // assume an error means later stages/apps in a pipe had enough input and
  70     // quit successfully, so quit successfully too
  71     _, err := os.Stdout.WriteString("\n")
  72     if err != nil {
  73         return
  74     }
  75 
  76     // names has all filepaths given, ignoring repetitions
  77     names, ok := findAllFiles(deduplicate(os.Args[1:]))
  78     if !ok {
  79         os.Exit(1)
  80     }
  81     names = deduplicate(names)
  82     if len(names) == 0 {
  83         names = []string{`-`}
  84     }
  85 
  86     events := make(chan event)
  87     go handleInputs(names, events)
  88     if !handleOutput(os.Stdout, len(names), events) {
  89         os.Exit(1)
  90     }
  91 }
  92 
  93 // handleInputs launches all the tasks which do the actual work, limiting how
  94 // many inputs are being worked on at the same time
  95 func handleInputs(names []string, events chan event) {
  96     // allow output-reporter task to end, and thus the app
  97     defer close(events)
  98 
  99     // permissions limits how many worker tasks can be active at the same
 100     // time: when given many filepaths to work on, rate-limiting avoids
 101     // a massive number of concurrent tasks which read and process input
 102     permissions := make(chan struct{}, runtime.NumCPU())
 103     defer close(permissions)
 104 
 105     var inputs sync.WaitGroup
 106     for i := range names {
 107         // wait until some concurrency-room is available
 108         permissions <- struct{}{}
 109         inputs.Add(1)
 110 
 111         go func(i int) {
 112             defer inputs.Done()
 113             res, err := handleInput(names[i])
 114             events <- event{i, res, err}
 115             <-permissions
 116         }(i)
 117     }
 118 
 119     // wait for all inputs, before closing the `events` channel
 120     inputs.Wait()
 121 }
 122 
 123 // handleInput handles each work-item for func handleInputs
 124 func handleInput(path string) (stats, error) {
 125     var res stats
 126     res.name = path
 127 
 128     if path == `-` {
 129         err := res.updateStats(os.Stdin)
 130         return res, err
 131     }
 132 
 133     f, err := os.Open(path)
 134     if err != nil {
 135         res.result = resultError
 136         // on windows, file-not-found error messages may mention `CreateFile`,
 137         // even when trying to open files in read-only mode
 138         return res, errors.New(`can't open file named ` + path)
 139     }
 140     defer f.Close()
 141 
 142     err = res.updateStats(f)
 143     return res, err
 144 }
 145 
 146 // handleOutput asynchronously updates output as results are known, whether
 147 // it's errors or successful results; returns whether it succeeded, which
 148 // means no errors happened
 149 func handleOutput(w io.Writer, rescount int, events chan event) (ok bool) {
 150     ok = true
 151     bw := bufio.NewWriter(w)
 152     defer bw.Flush()
 153 
 154     results := make([]stats, rescount)
 155 
 156     // keep track of which tasks are over, so that on each event all leading
 157     // results which are ready are shown: all of this ensures prompt output
 158     // updates as soon as results come in, while keeping the original order
 159     // of the names/filepaths given
 160     resultsLeft := results
 161 
 162     for v := range events {
 163         results[v.Index] = v.Stats
 164         if v.Err != nil {
 165             ok = false
 166             bw.Flush()
 167             showError(v.Err)
 168 
 169             // stay in the current loop, in case this failure was keeping
 170             // previous successes from showing up
 171         }
 172 
 173         n := countLeadingReady(resultsLeft)
 174 
 175         for _, res := range resultsLeft[:n] {
 176             if err := showResult(bw, res); err != nil {
 177                 // assume later stages/apps in a pipe had enough input and
 178                 // quit successfully, so quit successfully too
 179                 return true
 180             }
 181         }
 182         resultsLeft = resultsLeft[n:]
 183 
 184         // flush output-buffer only if anything new was shown
 185         if n > 0 {
 186             bw.Flush()
 187         }
 188     }
 189 
 190     return ok
 191 }
 192 
 193 // showError standardizes how errors from this app look
 194 func showError(err error) {
 195     os.Stderr.WriteString("\x1b[31m")
 196     os.Stderr.WriteString(err.Error())
 197     os.Stderr.WriteString("\x1b[0m\n")
 198 }
 199 
 200 // showResult does what it says
 201 func showResult(w *bufio.Writer, res stats) error {
 202     if res.result == resultError {
 203         return nil
 204     }
 205 
 206     var buf [64]byte
 207     w.WriteString(res.name)
 208     w.Write([]byte{'\t'})
 209     w.Write(strconv.AppendUint(buf[:0], uint64(res.bytes), 10))
 210     w.Write([]byte{'\t'})
 211     w.Write(strconv.AppendUint(buf[:0], uint64(res.runes), 10))
 212     w.Write([]byte{'\t'})
 213     w.Write(strconv.AppendUint(buf[:0], uint64(res.lines), 10))
 214     w.Write([]byte{'\t'})
 215     w.Write(strconv.AppendUint(buf[:0], uint64(res.lf), 10))
 216     w.Write([]byte{'\t'})
 217     w.Write(strconv.AppendUint(buf[:0], uint64(res.crlf), 10))
 218     w.Write([]byte{'\t'})
 219     w.Write(strconv.AppendUint(buf[:0], uint64(res.spaces), 10))
 220     w.Write([]byte{'\t'})
 221     w.Write(strconv.AppendUint(buf[:0], uint64(res.tabs), 10))
 222     w.Write([]byte{'\t'})
 223     w.Write(strconv.AppendUint(buf[:0], uint64(res.trailing), 10))
 224     w.Write([]byte{'\t'})
 225     w.Write(strconv.AppendUint(buf[:0], uint64(res.nulls), 10))
 226     w.Write([]byte{'\t'})
 227     w.Write(strconv.AppendUint(buf[:0], uint64(res.fulls), 10))
 228     w.Write([]byte{'\t'})
 229     w.Write(strconv.AppendUint(buf[:0], uint64(res.highs), 10))
 230     _, err := w.Write([]byte{'\n'})
 231     return err
 232 }
 233 
 234 // deduplicate ensures items only appear once, keeping the original slice
 235 // unchanged
 236 func deduplicate(src []string) []string {
 237     var unique []string
 238     got := make(map[string]struct{})
 239 
 240     for _, s := range src {
 241         if _, ok := got[s]; ok {
 242             continue
 243         }
 244 
 245         unique = append(unique, s)
 246         got[s] = struct{}{}
 247     }
 248 
 249     return unique
 250 }
 251 
 252 // findAllFiles does what it says, given a mix of file/folder paths, finding
 253 // all files recursively in the case of folders
 254 func findAllFiles(paths []string) (found []string, ok bool) {
 255     var unique []string
 256     got := make(map[string]struct{})
 257     ok = true
 258 
 259     for _, root := range paths {
 260         // a dash means standard input
 261         if root == `-` {
 262             if _, ok := got[root]; ok {
 263                 continue
 264             }
 265 
 266             unique = append(unique, root)
 267             got[root] = struct{}{}
 268             continue
 269         }
 270 
 271         _, err := os.Stat(root)
 272         if os.IsNotExist(err) {
 273             ok = false
 274             // on windows, file-not-found error messages may mention `CreateFile`,
 275             // even when trying to open files in read-only mode
 276             err := errors.New(`can't find file/folder named ` + root)
 277             showError(err)
 278             continue
 279         }
 280 
 281         err = filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
 282             if err != nil {
 283                 return err
 284             }
 285 
 286             if d.IsDir() {
 287                 return nil
 288             }
 289 
 290             if _, ok := got[path]; ok {
 291                 return nil
 292             }
 293 
 294             unique = append(unique, path)
 295             got[path] = struct{}{}
 296             return nil
 297         })
 298 
 299         if err != nil {
 300             ok = false
 301             showError(err)
 302         }
 303     }
 304 
 305     return unique, ok
 306 }

     File: coby/mit-license.txt
   1 The MIT License (MIT)
   2 
   3 Copyright © 2025 pacman64
   4 
   5 Permission is hereby granted, free of charge, to any person obtaining a copy of
   6 this software and associated documentation files (the “Software”), to deal
   7 in the Software without restriction, including without limitation the rights to
   8 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9 of the Software, and to permit persons to whom the Software is furnished to do
  10 so, subject to the following conditions:
  11 
  12 The above copyright notice and this permission notice shall be included in all
  13 copies or substantial portions of the Software.
  14 
  15 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21 SOFTWARE.

     File: coby/stats.go
   1 package main
   2 
   3 import (
   4     "io"
   5 )
   6 
   7 // isZero enables branchless-counting, when xor-compared bytes are used
   8 // as indices for it
   9 var isZero = [256]byte{1}
  10 
  11 // counter makes it easy to change the int-size of almost all counters
  12 type counter int
  13 
  14 // statResult constrains possible result-states/values in type stats
  15 type statResult int
  16 
  17 const (
  18     // resultPending is the default not-yet-ready result-status
  19     resultPending = statResult(0)
  20 
  21     // resultError signals result should show as an error, instead of data
  22     resultError = statResult(1)
  23 
  24     // resultSuccess means result can be shown
  25     resultSuccess = statResult(2)
  26 )
  27 
  28 // stats has all the size-stats for some input, as well as a way to
  29 // skip showing results, in case of an error such as `file not found`
  30 type stats struct {
  31     // bytes counts all bytes read
  32     bytes int
  33 
  34     // lines counts lines, and is 0 only when the byte-count is also 0
  35     lines counter
  36 
  37     // runes counts utf-8 sequences, each of which can use up to 4 bytes and
  38     // is usually a complete symbol: `emoji` country-flags are commonly-used
  39     // counter-examples, as these `symbols` need 2 runes, using 8 bytes each
  40     runes counter
  41 
  42     // maxWidth is maximum byte-width of lines, excluding carriage-returns
  43     // and/or line-feeds
  44     maxWidth counter
  45 
  46     // nulls counts all-bits-off bytes
  47     nulls counter
  48 
  49     // fulls counts all-bits-on bytes
  50     fulls counter
  51 
  52     // highs counts bytes with their `top` (highest-order) bit on
  53     highs counter
  54 
  55     // spaces counts ASCII spaces
  56     spaces counter
  57 
  58     // tabs counts ASCII tabs
  59     tabs counter
  60 
  61     // trailing counts lines with trailing spaces in them
  62     trailing counter
  63 
  64     // lf counts ASCII line-feeds as their own byte-values: this means its
  65     // value will always be at least the same as field `crlf`
  66     lf counter
  67 
  68     // crlf counts ASCII CRLF byte-pairs
  69     crlf counter
  70 
  71     // name is the filepath of the file/source these stats are about
  72     name string
  73 
  74     // results keeps track of whether results are valid and/or ready
  75     result statResult
  76 }
  77 
  78 // updateStats does what it says, reading everything from a reader
  79 func (res *stats) updateStats(r io.Reader) error {
  80     err := res.updateUsing(r)
  81     if err == io.EOF {
  82         err = nil
  83     }
  84 
  85     if err == nil {
  86         res.result = resultSuccess
  87     } else {
  88         res.result = resultError
  89     }
  90     return err
  91 }
  92 
  93 // updateUsing helps func updateStats do its job
  94 func (res *stats) updateUsing(r io.Reader) error {
  95     var width counter
  96     var prev1, prev2 byte
  97     var buf [16 * 1024]byte
  98     var tallies [256]uint64
  99 
 100     for {
 101         n, err := r.Read(buf[:])
 102         if n < 1 {
 103             if err == io.EOF {
 104                 res.lines = counter(tallies['\n'])
 105                 res.tabs = counter(tallies['\t'])
 106                 res.spaces = counter(tallies[' '])
 107                 res.lf = counter(tallies['\n'])
 108                 res.nulls = counter(tallies[0])
 109                 res.fulls = counter(tallies[255])
 110                 for i := 128; i < 256; i++ {
 111                     res.highs += counter(tallies[i])
 112                 }
 113                 return res.handleEnd(width, prev1, prev2)
 114             }
 115             return err
 116         }
 117 
 118         res.bytes += n
 119         chunk := buf[:n]
 120 
 121         for _, b := range chunk {
 122             // count values without branching, because it's fun
 123             tallies[b]++
 124 
 125             // handle non-ASCII runes, assuming input is valid UTF-8
 126             res.runes += 1 - count(b&0xc0, 0x80)
 127 
 128             // handle line-feeds
 129             if b == '\n' {
 130                 crlf := count(prev1, '\r')
 131                 res.crlf += crlf
 132 
 133                 // count lines with trailing spaces, whether these end with
 134                 // a CRLF byte-pair or just a line-feed byte
 135                 if prev1 == ' ' || (prev2 == ' ' && prev1 == '\r') {
 136                     res.trailing++
 137                 }
 138 
 139                 // exclude any CR from the current line's width-count
 140                 width -= crlf
 141                 if res.maxWidth < width {
 142                     res.maxWidth = width
 143                 }
 144 
 145                 prev2 = prev1
 146                 prev1 = b
 147                 width = 0
 148                 continue
 149             }
 150 
 151             prev2 = prev1
 152             prev1 = b
 153             width++
 154         }
 155     }
 156 }
 157 
 158 // handleEnd fixes/finalizes stats when input data end; this func is only
 159 // meant to be used by func updateStats, since it takes some of the latter's
 160 // local variables
 161 func (res *stats) handleEnd(width counter, prev1, prev2 byte) error {
 162     if prev1 == ' ' || (prev2 == ' ' && prev1 == '\r') {
 163         res.trailing++
 164     }
 165 
 166     if res.maxWidth < width {
 167         res.maxWidth = width
 168     }
 169 
 170     // avoid reporting 0 lines with a non-0 byte-count: this is unlike the
 171     // standard cmd-line tool `wc`
 172     if res.bytes > 0 && prev1 != '\n' {
 173         res.lines++
 174     }
 175 
 176     return nil
 177 }
 178 
 179 // count checks if 2 bytes are the same, returning either 0 or 1, which can
 180 // be added directly/branchlessly to totals
 181 // func count(x, y byte) counter {
 182 //  return counter(isZero[x^y])
 183 // }
 184 
 185 // count checks if 2 bytes are the same, returning either 0 or 1, which can
 186 // be added directly/branchlessly to totals
 187 func count(x, y byte) counter {
 188     if x != y {
 189         return 0
 190     }
 191     return 1
 192 }
 193 
 194 // countLeadingReady finds how many items are ready to show at the start of a
 195 // results-slice, which ensures output matches the original item-order
 196 func countLeadingReady(values []stats) int {
 197     for i, v := range values {
 198         if v.result == resultPending {
 199             return i
 200         }
 201     }
 202     return len(values)
 203 }

     File: coby/stats_test.go
   1 package main
   2 
   3 import (
   4     "strings"
   5     "testing"
   6 )
   7 
   8 func TestCount(t *testing.T) {
   9     for x := 0; x < 256; x++ {
  10         for y := 0; y < 256; y++ {
  11             var exp counter
  12             if x == y {
  13                 exp = 1
  14             }
  15 
  16             if got := count(byte(x), byte(y)); got != exp {
  17                 t.Fatalf(`%d, %d: expected %v, but got %v`, x, y, exp, got)
  18                 return
  19             }
  20         }
  21     }
  22 }
  23 
  24 func TestCountLeadingReady(t *testing.T) {
  25     for size := 0; size <= 20; size++ {
  26         for exp := 0; exp < size; exp++ {
  27             values := make([]stats, size)
  28             for i := 0; i < exp; i++ {
  29                 v := resultSuccess
  30                 if i%2 == 1 {
  31                     v = resultError
  32                 }
  33                 values[i].result = v
  34             }
  35 
  36             if got := countLeadingReady(values); got != exp {
  37                 const fs = `size %d: expected %d, instead of %d`
  38                 t.Fatalf(fs, size, exp, got)
  39             }
  40         }
  41     }
  42 }
  43 
  44 func TestStats(t *testing.T) {
  45     var tests = []struct {
  46         Input    string
  47         Expected stats
  48     }{
  49         {
  50             ``,
  51             stats{},
  52         },
  53         {
  54             `abc`,
  55             stats{lines: 1, runes: 3, maxWidth: 3},
  56         },
  57         {
  58             "abc\tdef\r\n",
  59             stats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1},
  60         },
  61         {
  62             "abc\tdef\r\n",
  63             stats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1},
  64         },
  65         {
  66             "abc\tdef \r\n123\t456  789 ",
  67             stats{
  68                 lines: 2, runes: 23, maxWidth: 13,
  69                 spaces: 4, tabs: 2, trailing: 2, lf: 1, crlf: 1,
  70             },
  71         },
  72     }
  73 
  74     for _, tc := range tests {
  75         t.Run(tc.Input, func(t *testing.T) {
  76             var got stats
  77             err := got.updateStats(strings.NewReader(tc.Input))
  78             if err != nil {
  79                 t.Error(err)
  80                 return
  81             }
  82 
  83             tc.Expected.bytes = len(tc.Input)
  84             tc.Expected.result = resultSuccess
  85             if got != tc.Expected {
  86                 t.Fatalf("expected\n%#v,\ngot\n%#v", tc.Expected, got)
  87                 return
  88             }
  89         })
  90     }
  91 }