File: ./info.txt 1 coby [files/folders...] 2 3 COunt BYtes finds out some simple byte-related stats, counting 4 5 - bytes 6 - lines 7 - how many lines have trailing spaces 8 - how many lines end with a CRLF pair 9 - all-off (0) bytes 10 - all-on (255) bytes 11 - high-bytes (128+) 12 13 The output is TSV (tab-separated values) lines, where the first line has 14 all the column names. 15 16 When no filepaths are given, the standard input is used by default. All 17 folder names given expand recursively into all filenames in them. File: ./main.go 1 package main 2 3 import ( 4 "bufio" 5 "errors" 6 "io" 7 "io/fs" 8 "os" 9 "path/filepath" 10 "runtime" 11 "strconv" 12 "sync" 13 14 _ "embed" 15 ) 16 17 //go:embed info.txt 18 var info string 19 20 // header is the first output line 21 var header = []string{ 22 `name`, 23 `bytes`, 24 `runes`, 25 `lines`, 26 `lf`, 27 `crlf`, 28 `spaces`, 29 `tabs`, 30 `trails`, 31 `nulls`, 32 `fulls`, 33 `highs`, 34 } 35 36 // event has what the output-reporting task needs to show the results of a 37 // task which has just completed, perhaps unsuccessfully 38 type event struct { 39 // Index points to the task's entry in the results-slice 40 Index int 41 42 // Stats has all the byte-related stats 43 Stats stats 44 45 // Err is the completed task's error, or lack of 46 Err error 47 } 48 49 func main() { 50 if len(os.Args) > 1 { 51 switch os.Args[1] { 52 case `-h`, `--h`, `-help`, `--help`: 53 os.Stderr.WriteString(info) 54 return 55 } 56 } 57 58 // show first/heading line right away, to let users know things are 59 // happening 60 for i, s := range header { 61 if i > 0 { 62 os.Stdout.WriteString("\t") 63 } 64 os.Stdout.WriteString(s) 65 } 66 // assume an error means later stages/apps in a pipe had enough input and 67 // quit successfully, so quit successfully too 68 _, err := os.Stdout.WriteString("\n") 69 if err != nil { 70 return 71 } 72 73 // names has all filepaths given, ignoring repetitions 74 names, ok := findAllFiles(deduplicate(os.Args[1:])) 75 if !ok { 76 os.Exit(1) 77 } 78 names = deduplicate(names) 79 if len(names) == 0 { 80 names = []string{`-`} 81 } 82 83 events := make(chan event) 84 go handleInputs(names, events) 85 if !handleOutput(os.Stdout, len(names), events) { 86 os.Exit(1) 87 } 88 } 89 90 // handleInputs launches all the tasks which do the actual work, limiting how 91 // many inputs are being worked on at the same time 92 func handleInputs(names []string, events chan event) { 93 // allow output-reporter task to end, and thus the app 94 defer close(events) 95 96 // permissions limits how many worker tasks can be active at the same 97 // time: when given many filepaths to work on, rate-limiting avoids 98 // a massive number of concurrent tasks which read and process input 99 permissions := make(chan struct{}, runtime.NumCPU()) 100 defer close(permissions) 101 102 var inputs sync.WaitGroup 103 for i := range names { 104 // wait until some concurrency-room is available 105 permissions <- struct{}{} 106 inputs.Add(1) 107 108 go func(i int) { 109 defer inputs.Done() 110 res, err := handleInput(names[i]) 111 events <- event{i, res, err} 112 <-permissions 113 }(i) 114 } 115 116 // wait for all inputs, before closing the `events` channel 117 inputs.Wait() 118 } 119 120 // handleInput handles each work-item for func handleInputs 121 func handleInput(path string) (stats, error) { 122 var res stats 123 res.name = path 124 125 if path == `-` { 126 err := res.updateStats(os.Stdin) 127 return res, err 128 } 129 130 f, err := os.Open(path) 131 if err != nil { 132 res.result = resultError 133 // on windows, file-not-found error messages may mention `CreateFile`, 134 // even when trying to open files in read-only mode 135 return res, errors.New(`can't open file named ` + path) 136 } 137 defer f.Close() 138 139 err = res.updateStats(f) 140 return res, err 141 } 142 143 // handleOutput asynchronously updates output as results are known, whether 144 // it's errors or successful results; returns whether it succeeded, which 145 // means no errors happened 146 func handleOutput(w io.Writer, rescount int, events chan event) (ok bool) { 147 ok = true 148 bw := bufio.NewWriter(w) 149 defer bw.Flush() 150 151 results := make([]stats, rescount) 152 153 // keep track of which tasks are over, so that on each event all leading 154 // results which are ready are shown: all of this ensures prompt output 155 // updates as soon as results come in, while keeping the original order 156 // of the names/filepaths given 157 resultsLeft := results 158 159 for v := range events { 160 results[v.Index] = v.Stats 161 if v.Err != nil { 162 ok = false 163 bw.Flush() 164 showError(v.Err) 165 166 // stay in the current loop, in case this failure was keeping 167 // previous successes from showing up 168 } 169 170 n := countLeadingReady(resultsLeft) 171 172 for _, res := range resultsLeft[:n] { 173 if err := showResult(bw, res); err != nil { 174 // assume later stages/apps in a pipe had enough input and 175 // quit successfully, so quit successfully too 176 return true 177 } 178 } 179 resultsLeft = resultsLeft[n:] 180 181 // flush output-buffer only if anything new was shown 182 if n > 0 { 183 bw.Flush() 184 } 185 } 186 187 return ok 188 } 189 190 // showError standardizes how errors from this app look 191 func showError(err error) { 192 os.Stderr.WriteString("\x1b[31m") 193 os.Stderr.WriteString(err.Error()) 194 os.Stderr.WriteString("\x1b[0m\n") 195 } 196 197 // showResult does what it says 198 func showResult(w *bufio.Writer, res stats) error { 199 if res.result == resultError { 200 return nil 201 } 202 203 var buf [64]byte 204 w.WriteString(res.name) 205 w.Write([]byte{'\t'}) 206 w.Write(strconv.AppendUint(buf[:0], uint64(res.bytes), 10)) 207 w.Write([]byte{'\t'}) 208 w.Write(strconv.AppendUint(buf[:0], uint64(res.runes), 10)) 209 w.Write([]byte{'\t'}) 210 w.Write(strconv.AppendUint(buf[:0], uint64(res.lines), 10)) 211 w.Write([]byte{'\t'}) 212 w.Write(strconv.AppendUint(buf[:0], uint64(res.lf), 10)) 213 w.Write([]byte{'\t'}) 214 w.Write(strconv.AppendUint(buf[:0], uint64(res.crlf), 10)) 215 w.Write([]byte{'\t'}) 216 w.Write(strconv.AppendUint(buf[:0], uint64(res.spaces), 10)) 217 w.Write([]byte{'\t'}) 218 w.Write(strconv.AppendUint(buf[:0], uint64(res.tabs), 10)) 219 w.Write([]byte{'\t'}) 220 w.Write(strconv.AppendUint(buf[:0], uint64(res.trailing), 10)) 221 w.Write([]byte{'\t'}) 222 w.Write(strconv.AppendUint(buf[:0], uint64(res.nulls), 10)) 223 w.Write([]byte{'\t'}) 224 w.Write(strconv.AppendUint(buf[:0], uint64(res.fulls), 10)) 225 w.Write([]byte{'\t'}) 226 w.Write(strconv.AppendUint(buf[:0], uint64(res.highs), 10)) 227 _, err := w.Write([]byte{'\n'}) 228 return err 229 } 230 231 // deduplicate ensures items only appear once, keeping the original slice 232 // unchanged 233 func deduplicate(src []string) []string { 234 var unique []string 235 got := make(map[string]struct{}) 236 237 for _, s := range src { 238 if _, ok := got[s]; ok { 239 continue 240 } 241 242 unique = append(unique, s) 243 got[s] = struct{}{} 244 } 245 246 return unique 247 } 248 249 // findAllFiles does what it says, given a mix of file/folder paths, finding 250 // all files recursively in the case of folders 251 func findAllFiles(paths []string) (found []string, ok bool) { 252 var unique []string 253 got := make(map[string]struct{}) 254 ok = true 255 256 for _, root := range paths { 257 // a dash means standard input 258 if root == `-` { 259 if _, ok := got[root]; ok { 260 continue 261 } 262 263 unique = append(unique, root) 264 got[root] = struct{}{} 265 continue 266 } 267 268 _, err := os.Stat(root) 269 if os.IsNotExist(err) { 270 ok = false 271 // on windows, file-not-found error messages may mention `CreateFile`, 272 // even when trying to open files in read-only mode 273 err := errors.New(`can't find file/folder named ` + root) 274 showError(err) 275 continue 276 } 277 278 err = filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { 279 if err != nil { 280 return err 281 } 282 283 if d.IsDir() { 284 return nil 285 } 286 287 if _, ok := got[path]; ok { 288 return nil 289 } 290 291 unique = append(unique, path) 292 got[path] = struct{}{} 293 return nil 294 }) 295 296 if err != nil { 297 ok = false 298 showError(err) 299 } 300 } 301 302 return unique, ok 303 } File: ./mit-license.txt 1 The MIT License (MIT) 2 3 Copyright (c) 2026 pacman64 4 5 Permission is hereby granted, free of charge, to any person obtaining a copy of 6 this software and associated documentation files (the "Software"), to deal 7 in the Software without restriction, including without limitation the rights to 8 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 of the Software, and to permit persons to whom the Software is furnished to do 10 so, subject to the following conditions: 11 12 The above copyright notice and this permission notice shall be included in all 13 copies or substantial portions of the Software. 14 15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 SOFTWARE. File: ./stats.go 1 package main 2 3 import ( 4 "io" 5 ) 6 7 // isZero enables branchless-counting, when xor-compared bytes are used 8 // as indices for it 9 var isZero = [256]byte{1} 10 11 // counter makes it easy to change the int-size of almost all counters 12 type counter int 13 14 // statResult constrains possible result-states/values in type stats 15 type statResult int 16 17 const ( 18 // resultPending is the default not-yet-ready result-status 19 resultPending = statResult(0) 20 21 // resultError signals result should show as an error, instead of data 22 resultError = statResult(1) 23 24 // resultSuccess means result can be shown 25 resultSuccess = statResult(2) 26 ) 27 28 // stats has all the size-stats for some input, as well as a way to 29 // skip showing results, in case of an error such as `file not found` 30 type stats struct { 31 // bytes counts all bytes read 32 bytes int 33 34 // lines counts lines, and is 0 only when the byte-count is also 0 35 lines counter 36 37 // runes counts utf-8 sequences, each of which can use up to 4 bytes and 38 // is usually a complete symbol: `emoji` country-flags are commonly-used 39 // counter-examples, as these `symbols` need 2 runes, using 8 bytes each 40 runes counter 41 42 // maxWidth is maximum byte-width of lines, excluding carriage-returns 43 // and/or line-feeds 44 maxWidth counter 45 46 // nulls counts all-bits-off bytes 47 nulls counter 48 49 // fulls counts all-bits-on bytes 50 fulls counter 51 52 // highs counts bytes with their `top` (highest-order) bit on 53 highs counter 54 55 // spaces counts ASCII spaces 56 spaces counter 57 58 // tabs counts ASCII tabs 59 tabs counter 60 61 // trailing counts lines with trailing spaces in them 62 trailing counter 63 64 // lf counts ASCII line-feeds as their own byte-values: this means its 65 // value will always be at least the same as field `crlf` 66 lf counter 67 68 // crlf counts ASCII CRLF byte-pairs 69 crlf counter 70 71 // name is the filepath of the file/source these stats are about 72 name string 73 74 // results keeps track of whether results are valid and/or ready 75 result statResult 76 } 77 78 // updateStats does what it says, reading everything from a reader 79 func (res *stats) updateStats(r io.Reader) error { 80 err := res.updateUsing(r) 81 if err == io.EOF { 82 err = nil 83 } 84 85 if err == nil { 86 res.result = resultSuccess 87 } else { 88 res.result = resultError 89 } 90 return err 91 } 92 93 // updateUsing helps func updateStats do its job 94 func (res *stats) updateUsing(r io.Reader) error { 95 var width counter 96 var prev1, prev2 byte 97 var buf [16 * 1024]byte 98 var tallies [256]uint64 99 100 for { 101 n, err := r.Read(buf[:]) 102 if n < 1 { 103 if err == io.EOF { 104 res.lines = counter(tallies['\n']) 105 res.tabs = counter(tallies['\t']) 106 res.spaces = counter(tallies[' ']) 107 res.lf = counter(tallies['\n']) 108 res.nulls = counter(tallies[0]) 109 res.fulls = counter(tallies[255]) 110 for i := 128; i < 256; i++ { 111 res.highs += counter(tallies[i]) 112 } 113 return res.handleEnd(width, prev1, prev2) 114 } 115 return err 116 } 117 118 res.bytes += n 119 chunk := buf[:n] 120 121 for _, b := range chunk { 122 // count values without branching, because it's fun 123 tallies[b]++ 124 125 // handle non-ASCII runes, assuming input is valid UTF-8 126 res.runes += 1 - count(b&0xc0, 0x80) 127 128 // handle line-feeds 129 if b == '\n' { 130 crlf := count(prev1, '\r') 131 res.crlf += crlf 132 133 // count lines with trailing spaces, whether these end with 134 // a CRLF byte-pair or just a line-feed byte 135 if prev1 == ' ' || (prev2 == ' ' && prev1 == '\r') { 136 res.trailing++ 137 } 138 139 // exclude any CR from the current line's width-count 140 width -= crlf 141 if res.maxWidth < width { 142 res.maxWidth = width 143 } 144 145 prev2 = prev1 146 prev1 = b 147 width = 0 148 continue 149 } 150 151 prev2 = prev1 152 prev1 = b 153 width++ 154 } 155 } 156 } 157 158 // handleEnd fixes/finalizes stats when input data end; this func is only 159 // meant to be used by func updateStats, since it takes some of the latter's 160 // local variables 161 func (res *stats) handleEnd(width counter, prev1, prev2 byte) error { 162 if prev1 == ' ' || (prev2 == ' ' && prev1 == '\r') { 163 res.trailing++ 164 } 165 166 if res.maxWidth < width { 167 res.maxWidth = width 168 } 169 170 // avoid reporting 0 lines with a non-0 byte-count: this is unlike the 171 // standard cmd-line tool `wc` 172 if res.bytes > 0 && prev1 != '\n' { 173 res.lines++ 174 } 175 176 return nil 177 } 178 179 // count checks if 2 bytes are the same, returning either 0 or 1, which can 180 // be added directly/branchlessly to totals 181 // func count(x, y byte) counter { 182 // return counter(isZero[x^y]) 183 // } 184 185 // count checks if 2 bytes are the same, returning either 0 or 1, which can 186 // be added directly/branchlessly to totals 187 func count(x, y byte) counter { 188 if x != y { 189 return 0 190 } 191 return 1 192 } 193 194 // countLeadingReady finds how many items are ready to show at the start of a 195 // results-slice, which ensures output matches the original item-order 196 func countLeadingReady(values []stats) int { 197 for i, v := range values { 198 if v.result == resultPending { 199 return i 200 } 201 } 202 return len(values) 203 } File: ./stats_test.go 1 package main 2 3 import ( 4 "strings" 5 "testing" 6 ) 7 8 func TestCount(t *testing.T) { 9 for x := 0; x < 256; x++ { 10 for y := 0; y < 256; y++ { 11 var exp counter 12 if x == y { 13 exp = 1 14 } 15 16 if got := count(byte(x), byte(y)); got != exp { 17 t.Fatalf(`%d, %d: expected %v, but got %v`, x, y, exp, got) 18 return 19 } 20 } 21 } 22 } 23 24 func TestCountLeadingReady(t *testing.T) { 25 for size := 0; size <= 20; size++ { 26 for exp := 0; exp < size; exp++ { 27 values := make([]stats, size) 28 for i := 0; i < exp; i++ { 29 v := resultSuccess 30 if i%2 == 1 { 31 v = resultError 32 } 33 values[i].result = v 34 } 35 36 if got := countLeadingReady(values); got != exp { 37 const fs = `size %d: expected %d, instead of %d` 38 t.Fatalf(fs, size, exp, got) 39 } 40 } 41 } 42 } 43 44 func TestStats(t *testing.T) { 45 var tests = []struct { 46 Input string 47 Expected stats 48 }{ 49 { 50 ``, 51 stats{}, 52 }, 53 { 54 `abc`, 55 stats{lines: 1, runes: 3, maxWidth: 3}, 56 }, 57 { 58 "abc\tdef\r\n", 59 stats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1}, 60 }, 61 { 62 "abc\tdef\r\n", 63 stats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1}, 64 }, 65 { 66 "abc\tdef \r\n123\t456 789 ", 67 stats{ 68 lines: 2, runes: 23, maxWidth: 13, 69 spaces: 4, tabs: 2, trailing: 2, lf: 1, crlf: 1, 70 }, 71 }, 72 } 73 74 for _, tc := range tests { 75 t.Run(tc.Input, func(t *testing.T) { 76 var got stats 77 err := got.updateStats(strings.NewReader(tc.Input)) 78 if err != nil { 79 t.Error(err) 80 return 81 } 82 83 tc.Expected.bytes = len(tc.Input) 84 tc.Expected.result = resultSuccess 85 if got != tc.Expected { 86 t.Fatalf("expected\n%#v,\ngot\n%#v", tc.Expected, got) 87 return 88 } 89 }) 90 } 91 }