File: coby/info.txt 1 coby [files/folders...] 2 3 COunt BYtes finds out some simple byte-related stats, counting 4 5 - bytes 6 - lines 7 - how many lines have trailing spaces 8 - how many lines end with a CRLF pair 9 - all-off (0) bytes 10 - all-on (255) bytes 11 - high-bytes (128+) 12 13 The output is TSV (tab-separated values) lines, where the first line has 14 all the column names. 15 16 When no filepaths are given, the standard input is used by default. All 17 folder names given expand recursively into all filenames in them. File: coby/main.go 1 package main 2 3 import ( 4 "bufio" 5 "errors" 6 "io" 7 "io/fs" 8 "os" 9 "path/filepath" 10 "runtime" 11 "strconv" 12 "sync" 13 14 _ "embed" 15 ) 16 17 // Note: the code is avoiding using the fmt package to save hundreds of 18 // kilobytes on the resulting executable, which is a noticeable difference. 19 20 //go:embed info.txt 21 var info string 22 23 // header is the first output line 24 var header = []string{ 25 `name`, 26 `bytes`, 27 `runes`, 28 `lines`, 29 `lf`, 30 `crlf`, 31 `spaces`, 32 `tabs`, 33 `trails`, 34 `nulls`, 35 `fulls`, 36 `highs`, 37 } 38 39 // event has what the output-reporting task needs to show the results of a 40 // task which has just completed, perhaps unsuccessfully 41 type event struct { 42 // Index points to the task's entry in the results-slice 43 Index int 44 45 // Stats has all the byte-related stats 46 Stats stats 47 48 // Err is the completed task's error, or lack of 49 Err error 50 } 51 52 func main() { 53 if len(os.Args) > 1 { 54 switch os.Args[1] { 55 case `-h`, `--h`, `-help`, `--help`: 56 os.Stderr.WriteString(info) 57 return 58 } 59 } 60 61 // show first/heading line right away, to let users know things are 62 // happening 63 for i, s := range header { 64 if i > 0 { 65 os.Stdout.WriteString("\t") 66 } 67 os.Stdout.WriteString(s) 68 } 69 // assume an error means later stages/apps in a pipe had enough input and 70 // quit successfully, so quit successfully too 71 _, err := os.Stdout.WriteString("\n") 72 if err != nil { 73 return 74 } 75 76 // names has all filepaths given, ignoring repetitions 77 names, ok := findAllFiles(deduplicate(os.Args[1:])) 78 if !ok { 79 os.Exit(1) 80 } 81 names = deduplicate(names) 82 if len(names) == 0 { 83 names = []string{`-`} 84 } 85 86 events := make(chan event) 87 go handleInputs(names, events) 88 if !handleOutput(os.Stdout, len(names), events) { 89 os.Exit(1) 90 } 91 } 92 93 // handleInputs launches all the tasks which do the actual work, limiting how 94 // many inputs are being worked on at the same time 95 func handleInputs(names []string, events chan event) { 96 // allow output-reporter task to end, and thus the app 97 defer close(events) 98 99 // permissions limits how many worker tasks can be active at the same 100 // time: when given many filepaths to work on, rate-limiting avoids 101 // a massive number of concurrent tasks which read and process input 102 permissions := make(chan struct{}, runtime.NumCPU()) 103 defer close(permissions) 104 105 var inputs sync.WaitGroup 106 for i := range names { 107 // wait until some concurrency-room is available 108 permissions <- struct{}{} 109 inputs.Add(1) 110 111 go func(i int) { 112 defer inputs.Done() 113 res, err := handleInput(names[i]) 114 events <- event{i, res, err} 115 <-permissions 116 }(i) 117 } 118 119 // wait for all inputs, before closing the `events` channel 120 inputs.Wait() 121 } 122 123 // handleInput handles each work-item for func handleInputs 124 func handleInput(path string) (stats, error) { 125 var res stats 126 res.name = path 127 128 if path == `-` { 129 err := res.updateStats(os.Stdin) 130 return res, err 131 } 132 133 f, err := os.Open(path) 134 if err != nil { 135 res.result = resultError 136 // on windows, file-not-found error messages may mention `CreateFile`, 137 // even when trying to open files in read-only mode 138 return res, errors.New(`can't open file named ` + path) 139 } 140 defer f.Close() 141 142 err = res.updateStats(f) 143 return res, err 144 } 145 146 // handleOutput asynchronously updates output as results are known, whether 147 // it's errors or successful results; returns whether it succeeded, which 148 // means no errors happened 149 func handleOutput(w io.Writer, rescount int, events chan event) (ok bool) { 150 ok = true 151 bw := bufio.NewWriter(w) 152 defer bw.Flush() 153 154 results := make([]stats, rescount) 155 156 // keep track of which tasks are over, so that on each event all leading 157 // results which are ready are shown: all of this ensures prompt output 158 // updates as soon as results come in, while keeping the original order 159 // of the names/filepaths given 160 resultsLeft := results 161 162 for v := range events { 163 results[v.Index] = v.Stats 164 if v.Err != nil { 165 ok = false 166 bw.Flush() 167 showError(v.Err) 168 169 // stay in the current loop, in case this failure was keeping 170 // previous successes from showing up 171 } 172 173 n := countLeadingReady(resultsLeft) 174 175 for _, res := range resultsLeft[:n] { 176 if err := showResult(bw, res); err != nil { 177 // assume later stages/apps in a pipe had enough input and 178 // quit successfully, so quit successfully too 179 return true 180 } 181 } 182 resultsLeft = resultsLeft[n:] 183 184 // flush output-buffer only if anything new was shown 185 if n > 0 { 186 bw.Flush() 187 } 188 } 189 190 return ok 191 } 192 193 // showError standardizes how errors from this app look 194 func showError(err error) { 195 os.Stderr.WriteString("\x1b[31m") 196 os.Stderr.WriteString(err.Error()) 197 os.Stderr.WriteString("\x1b[0m\n") 198 } 199 200 // showResult does what it says 201 func showResult(w *bufio.Writer, res stats) error { 202 if res.result == resultError { 203 return nil 204 } 205 206 var buf [64]byte 207 w.WriteString(res.name) 208 w.Write([]byte{'\t'}) 209 w.Write(strconv.AppendUint(buf[:0], uint64(res.bytes), 10)) 210 w.Write([]byte{'\t'}) 211 w.Write(strconv.AppendUint(buf[:0], uint64(res.runes), 10)) 212 w.Write([]byte{'\t'}) 213 w.Write(strconv.AppendUint(buf[:0], uint64(res.lines), 10)) 214 w.Write([]byte{'\t'}) 215 w.Write(strconv.AppendUint(buf[:0], uint64(res.lf), 10)) 216 w.Write([]byte{'\t'}) 217 w.Write(strconv.AppendUint(buf[:0], uint64(res.crlf), 10)) 218 w.Write([]byte{'\t'}) 219 w.Write(strconv.AppendUint(buf[:0], uint64(res.spaces), 10)) 220 w.Write([]byte{'\t'}) 221 w.Write(strconv.AppendUint(buf[:0], uint64(res.tabs), 10)) 222 w.Write([]byte{'\t'}) 223 w.Write(strconv.AppendUint(buf[:0], uint64(res.trailing), 10)) 224 w.Write([]byte{'\t'}) 225 w.Write(strconv.AppendUint(buf[:0], uint64(res.nulls), 10)) 226 w.Write([]byte{'\t'}) 227 w.Write(strconv.AppendUint(buf[:0], uint64(res.fulls), 10)) 228 w.Write([]byte{'\t'}) 229 w.Write(strconv.AppendUint(buf[:0], uint64(res.highs), 10)) 230 _, err := w.Write([]byte{'\n'}) 231 return err 232 } 233 234 // deduplicate ensures items only appear once, keeping the original slice 235 // unchanged 236 func deduplicate(src []string) []string { 237 var unique []string 238 got := make(map[string]struct{}) 239 240 for _, s := range src { 241 if _, ok := got[s]; ok { 242 continue 243 } 244 245 unique = append(unique, s) 246 got[s] = struct{}{} 247 } 248 249 return unique 250 } 251 252 // findAllFiles does what it says, given a mix of file/folder paths, finding 253 // all files recursively in the case of folders 254 func findAllFiles(paths []string) (found []string, ok bool) { 255 var unique []string 256 got := make(map[string]struct{}) 257 ok = true 258 259 for _, root := range paths { 260 // a dash means standard input 261 if root == `-` { 262 if _, ok := got[root]; ok { 263 continue 264 } 265 266 unique = append(unique, root) 267 got[root] = struct{}{} 268 continue 269 } 270 271 _, err := os.Stat(root) 272 if os.IsNotExist(err) { 273 ok = false 274 // on windows, file-not-found error messages may mention `CreateFile`, 275 // even when trying to open files in read-only mode 276 err := errors.New(`can't find file/folder named ` + root) 277 showError(err) 278 continue 279 } 280 281 err = filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { 282 if err != nil { 283 return err 284 } 285 286 if d.IsDir() { 287 return nil 288 } 289 290 if _, ok := got[path]; ok { 291 return nil 292 } 293 294 unique = append(unique, path) 295 got[path] = struct{}{} 296 return nil 297 }) 298 299 if err != nil { 300 ok = false 301 showError(err) 302 } 303 } 304 305 return unique, ok 306 } File: coby/mit-license.txt 1 The MIT License (MIT) 2 3 Copyright © 2025 pacman64 4 5 Permission is hereby granted, free of charge, to any person obtaining a copy of 6 this software and associated documentation files (the “Software”), to deal 7 in the Software without restriction, including without limitation the rights to 8 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 of the Software, and to permit persons to whom the Software is furnished to do 10 so, subject to the following conditions: 11 12 The above copyright notice and this permission notice shall be included in all 13 copies or substantial portions of the Software. 14 15 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 SOFTWARE. File: coby/stats.go 1 package main 2 3 import ( 4 "io" 5 ) 6 7 // isZero enables branchless-counting, when xor-compared bytes are used 8 // as indices for it 9 var isZero = [256]byte{1} 10 11 // counter makes it easy to change the int-size of almost all counters 12 type counter int 13 14 // statResult constrains possible result-states/values in type stats 15 type statResult int 16 17 const ( 18 // resultPending is the default not-yet-ready result-status 19 resultPending = statResult(0) 20 21 // resultError signals result should show as an error, instead of data 22 resultError = statResult(1) 23 24 // resultSuccess means result can be shown 25 resultSuccess = statResult(2) 26 ) 27 28 // stats has all the size-stats for some input, as well as a way to 29 // skip showing results, in case of an error such as `file not found` 30 type stats struct { 31 // bytes counts all bytes read 32 bytes int 33 34 // lines counts lines, and is 0 only when the byte-count is also 0 35 lines counter 36 37 // runes counts utf-8 sequences, each of which can use up to 4 bytes and 38 // is usually a complete symbol: `emoji` country-flags are commonly-used 39 // counter-examples, as these `symbols` need 2 runes, using 8 bytes each 40 runes counter 41 42 // maxWidth is maximum byte-width of lines, excluding carriage-returns 43 // and/or line-feeds 44 maxWidth counter 45 46 // nulls counts all-bits-off bytes 47 nulls counter 48 49 // fulls counts all-bits-on bytes 50 fulls counter 51 52 // highs counts bytes with their `top` (highest-order) bit on 53 highs counter 54 55 // spaces counts ASCII spaces 56 spaces counter 57 58 // tabs counts ASCII tabs 59 tabs counter 60 61 // trailing counts lines with trailing spaces in them 62 trailing counter 63 64 // lf counts ASCII line-feeds as their own byte-values: this means its 65 // value will always be at least the same as field `crlf` 66 lf counter 67 68 // crlf counts ASCII CRLF byte-pairs 69 crlf counter 70 71 // name is the filepath of the file/source these stats are about 72 name string 73 74 // results keeps track of whether results are valid and/or ready 75 result statResult 76 } 77 78 // updateStats does what it says, reading everything from a reader 79 func (res *stats) updateStats(r io.Reader) error { 80 err := res.updateUsing(r) 81 if err == io.EOF { 82 err = nil 83 } 84 85 if err == nil { 86 res.result = resultSuccess 87 } else { 88 res.result = resultError 89 } 90 return err 91 } 92 93 // updateUsing helps func updateStats do its job 94 func (res *stats) updateUsing(r io.Reader) error { 95 var width counter 96 var prev1, prev2 byte 97 var buf [16 * 1024]byte 98 var tallies [256]uint64 99 100 for { 101 n, err := r.Read(buf[:]) 102 if n < 1 { 103 if err == io.EOF { 104 res.lines = counter(tallies['\n']) 105 res.tabs = counter(tallies['\t']) 106 res.spaces = counter(tallies[' ']) 107 res.lf = counter(tallies['\n']) 108 res.nulls = counter(tallies[0]) 109 res.fulls = counter(tallies[255]) 110 for i := 128; i < 256; i++ { 111 res.highs += counter(tallies[i]) 112 } 113 return res.handleEnd(width, prev1, prev2) 114 } 115 return err 116 } 117 118 res.bytes += n 119 chunk := buf[:n] 120 121 for _, b := range chunk { 122 // count values without branching, because it's fun 123 tallies[b]++ 124 125 // handle non-ASCII runes, assuming input is valid UTF-8 126 res.runes += 1 - count(b&0xc0, 0x80) 127 128 // handle line-feeds 129 if b == '\n' { 130 crlf := count(prev1, '\r') 131 res.crlf += crlf 132 133 // count lines with trailing spaces, whether these end with 134 // a CRLF byte-pair or just a line-feed byte 135 if prev1 == ' ' || (prev2 == ' ' && prev1 == '\r') { 136 res.trailing++ 137 } 138 139 // exclude any CR from the current line's width-count 140 width -= crlf 141 if res.maxWidth < width { 142 res.maxWidth = width 143 } 144 145 prev2 = prev1 146 prev1 = b 147 width = 0 148 continue 149 } 150 151 prev2 = prev1 152 prev1 = b 153 width++ 154 } 155 } 156 } 157 158 // handleEnd fixes/finalizes stats when input data end; this func is only 159 // meant to be used by func updateStats, since it takes some of the latter's 160 // local variables 161 func (res *stats) handleEnd(width counter, prev1, prev2 byte) error { 162 if prev1 == ' ' || (prev2 == ' ' && prev1 == '\r') { 163 res.trailing++ 164 } 165 166 if res.maxWidth < width { 167 res.maxWidth = width 168 } 169 170 // avoid reporting 0 lines with a non-0 byte-count: this is unlike the 171 // standard cmd-line tool `wc` 172 if res.bytes > 0 && prev1 != '\n' { 173 res.lines++ 174 } 175 176 return nil 177 } 178 179 // count checks if 2 bytes are the same, returning either 0 or 1, which can 180 // be added directly/branchlessly to totals 181 // func count(x, y byte) counter { 182 // return counter(isZero[x^y]) 183 // } 184 185 // count checks if 2 bytes are the same, returning either 0 or 1, which can 186 // be added directly/branchlessly to totals 187 func count(x, y byte) counter { 188 if x != y { 189 return 0 190 } 191 return 1 192 } 193 194 // countLeadingReady finds how many items are ready to show at the start of a 195 // results-slice, which ensures output matches the original item-order 196 func countLeadingReady(values []stats) int { 197 for i, v := range values { 198 if v.result == resultPending { 199 return i 200 } 201 } 202 return len(values) 203 } File: coby/stats_test.go 1 package main 2 3 import ( 4 "strings" 5 "testing" 6 ) 7 8 func TestCount(t *testing.T) { 9 for x := 0; x < 256; x++ { 10 for y := 0; y < 256; y++ { 11 var exp counter 12 if x == y { 13 exp = 1 14 } 15 16 if got := count(byte(x), byte(y)); got != exp { 17 t.Fatalf(`%d, %d: expected %v, but got %v`, x, y, exp, got) 18 return 19 } 20 } 21 } 22 } 23 24 func TestCountLeadingReady(t *testing.T) { 25 for size := 0; size <= 20; size++ { 26 for exp := 0; exp < size; exp++ { 27 values := make([]stats, size) 28 for i := 0; i < exp; i++ { 29 v := resultSuccess 30 if i%2 == 1 { 31 v = resultError 32 } 33 values[i].result = v 34 } 35 36 if got := countLeadingReady(values); got != exp { 37 const fs = `size %d: expected %d, instead of %d` 38 t.Fatalf(fs, size, exp, got) 39 } 40 } 41 } 42 } 43 44 func TestStats(t *testing.T) { 45 var tests = []struct { 46 Input string 47 Expected stats 48 }{ 49 { 50 ``, 51 stats{}, 52 }, 53 { 54 `abc`, 55 stats{lines: 1, runes: 3, maxWidth: 3}, 56 }, 57 { 58 "abc\tdef\r\n", 59 stats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1}, 60 }, 61 { 62 "abc\tdef\r\n", 63 stats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1}, 64 }, 65 { 66 "abc\tdef \r\n123\t456 789 ", 67 stats{ 68 lines: 2, runes: 23, maxWidth: 13, 69 spaces: 4, tabs: 2, trailing: 2, lf: 1, crlf: 1, 70 }, 71 }, 72 } 73 74 for _, tc := range tests { 75 t.Run(tc.Input, func(t *testing.T) { 76 var got stats 77 err := got.updateStats(strings.NewReader(tc.Input)) 78 if err != nil { 79 t.Error(err) 80 return 81 } 82 83 tc.Expected.bytes = len(tc.Input) 84 tc.Expected.result = resultSuccess 85 if got != tc.Expected { 86 t.Fatalf("expected\n%#v,\ngot\n%#v", tc.Expected, got) 87 return 88 } 89 }) 90 } 91 }