File: coby/info.txt 1 coby [files/folders...] 2 3 COunt BYtes finds out some simple byte-related stats, counting 4 5 - bytes 6 - lines 7 - how many lines have trailing spaces 8 - how many lines end with a CRLF pair 9 - all-off (0) bytes 10 - all-on (255) bytes 11 - high-bytes (128+) 12 13 The output is TSV (tab-separated values) lines, where the first line has 14 all the column names. 15 16 When no filepaths are given, the standard input is used by default. All 17 folder names given expand recursively into all filenames in them. File: coby/main.go 1 package main 2 3 import ( 4 "bufio" 5 "errors" 6 "io" 7 "io/fs" 8 "os" 9 "path/filepath" 10 "runtime" 11 "strconv" 12 "sync" 13 14 _ "embed" 15 ) 16 17 // Note: the code is avoiding using the fmt package to save hundreds of 18 // kilobytes on the resulting executable, which is a noticeable difference. 19 20 //go:embed info.txt 21 var info string 22 23 // header is the first output line 24 var header = []string{ 25 `name`, 26 `bytes`, 27 `runes`, 28 `lines`, 29 `lf`, 30 `crlf`, 31 `spaces`, 32 `tabs`, 33 `trails`, 34 `nulls`, 35 `fulls`, 36 `highs`, 37 } 38 39 // event has what the output-reporting task needs to show the results of a 40 // task which has just completed, perhaps unsuccessfully 41 type event struct { 42 // Index points to the task's entry in the results-slice 43 Index int 44 45 // Stats has all the byte-related stats 46 Stats stats 47 48 // Err is the completed task's error, or lack of 49 Err error 50 } 51 52 func main() { 53 if len(os.Args) > 1 { 54 switch os.Args[1] { 55 case `-h`, `--h`, `-help`, `--help`: 56 os.Stderr.WriteString(info) 57 return 58 } 59 } 60 61 // show first/heading line right away, to let users know things are 62 // happening 63 for i, s := range header { 64 if i > 0 { 65 os.Stdout.WriteString("\t") 66 } 67 os.Stdout.WriteString(s) 68 } 69 // assume an error means later stages/apps in a pipe had enough input and 70 // quit successfully, so quit successfully too 71 _, err := os.Stdout.WriteString("\n") 72 if err != nil { 73 return 74 } 75 76 // names has all filepaths given, ignoring repetitions 77 names, ok := findAllFiles(unique(os.Args[1:])) 78 if !ok { 79 os.Exit(1) 80 } 81 if len(names) == 0 { 82 names = []string{`-`} 83 } 84 85 events := make(chan event) 86 go handleInputs(names, events) 87 if !handleOutput(os.Stdout, len(names), events) { 88 os.Exit(1) 89 } 90 } 91 92 // handleInputs launches all the tasks which do the actual work, limiting how 93 // many inputs are being worked on at the same time 94 func handleInputs(names []string, events chan event) { 95 // allow output-reporter task to end, and thus the app 96 defer close(events) 97 98 // permissions limits how many worker tasks can be active at the same 99 // time: when given many filepaths to work on, rate-limiting avoids 100 // a massive number of concurrent tasks which read and process input 101 permissions := make(chan struct{}, runtime.NumCPU()) 102 defer close(permissions) 103 104 var inputs sync.WaitGroup 105 for i := range names { 106 // wait until some concurrency-room is available 107 permissions <- struct{}{} 108 inputs.Add(1) 109 110 go func(i int) { 111 defer inputs.Done() 112 res, err := handleInput(names[i]) 113 events <- event{i, res, err} 114 <-permissions 115 }(i) 116 } 117 118 // wait for all inputs, before closing the `events` channel 119 inputs.Wait() 120 } 121 122 // handleInput handles each work-item for func handleInputs 123 func handleInput(path string) (stats, error) { 124 var res stats 125 res.name = path 126 127 if path == `-` { 128 err := res.updateStats(os.Stdin) 129 return res, err 130 } 131 132 f, err := os.Open(path) 133 if err != nil { 134 res.result = resultError 135 // on windows, file-not-found error messages may mention `CreateFile`, 136 // even when trying to open files in read-only mode 137 return res, errors.New(`can't open file named ` + path) 138 } 139 defer f.Close() 140 141 err = res.updateStats(f) 142 return res, err 143 } 144 145 // handleOutput asynchronously updates output as results are known, whether 146 // it's errors or successful results; returns whether it succeeded, which 147 // means no errors happened 148 func handleOutput(w io.Writer, rescount int, events chan event) (ok bool) { 149 bw := bufio.NewWriter(w) 150 defer bw.Flush() 151 152 results := make([]stats, rescount) 153 154 // keep track of which tasks are over, so that on each event all leading 155 // results which are ready are shown: all of this ensures prompt output 156 // updates as soon as results come in, while keeping the original order 157 // of the names/filepaths given 158 resultsLeft := results 159 160 for v := range events { 161 results[v.Index] = v.Stats 162 if v.Err != nil { 163 ok = false 164 bw.Flush() 165 showError(v.Err) 166 167 // stay in the current loop, in case this failure was keeping 168 // previous successes from showing up 169 } 170 171 n := countLeadingReady(resultsLeft) 172 173 for _, res := range resultsLeft[:n] { 174 if err := showResult(bw, res); err != nil { 175 // assume later stages/apps in a pipe had enough input and 176 // quit successfully, so quit successfully too 177 return true 178 } 179 } 180 resultsLeft = resultsLeft[n:] 181 182 // flush output-buffer only if anything new was shown 183 if n > 0 { 184 bw.Flush() 185 } 186 } 187 188 return ok 189 } 190 191 // showError standardizes how errors from this app look 192 func showError(err error) { 193 os.Stderr.WriteString("\x1b[31m") 194 os.Stderr.WriteString(err.Error()) 195 os.Stderr.WriteString("\x1b[0m\n") 196 } 197 198 // showResult does what it says 199 func showResult(w *bufio.Writer, res stats) error { 200 if res.result == resultError { 201 return nil 202 } 203 204 var buf [64]byte 205 w.WriteString(res.name) 206 w.Write([]byte{'\t'}) 207 w.Write(strconv.AppendUint(buf[:0], uint64(res.bytes), 10)) 208 w.Write([]byte{'\t'}) 209 w.Write(strconv.AppendUint(buf[:0], uint64(res.runes), 10)) 210 w.Write([]byte{'\t'}) 211 w.Write(strconv.AppendUint(buf[:0], uint64(res.lines), 10)) 212 w.Write([]byte{'\t'}) 213 w.Write(strconv.AppendUint(buf[:0], uint64(res.lf), 10)) 214 w.Write([]byte{'\t'}) 215 w.Write(strconv.AppendUint(buf[:0], uint64(res.crlf), 10)) 216 w.Write([]byte{'\t'}) 217 w.Write(strconv.AppendUint(buf[:0], uint64(res.spaces), 10)) 218 w.Write([]byte{'\t'}) 219 w.Write(strconv.AppendUint(buf[:0], uint64(res.tabs), 10)) 220 w.Write([]byte{'\t'}) 221 w.Write(strconv.AppendUint(buf[:0], uint64(res.trailing), 10)) 222 w.Write([]byte{'\t'}) 223 w.Write(strconv.AppendUint(buf[:0], uint64(res.nulls), 10)) 224 w.Write([]byte{'\t'}) 225 w.Write(strconv.AppendUint(buf[:0], uint64(res.fulls), 10)) 226 w.Write([]byte{'\t'}) 227 w.Write(strconv.AppendUint(buf[:0], uint64(res.highs), 10)) 228 _, err := w.Write([]byte{'\n'}) 229 return err 230 } 231 232 // unique ensures items only appear once in the result, keeping the original 233 // slice unchanged 234 func unique(src []string) []string { 235 var unique []string 236 got := make(map[string]struct{}) 237 for _, s := range src { 238 if _, ok := got[s]; ok { 239 continue 240 } 241 unique = append(unique, s) 242 got[s] = struct{}{} 243 } 244 return unique 245 } 246 247 // findAllFiles does what it says, given a mix of file/folder paths, finding 248 // all files recursively in the case of folders 249 func findAllFiles(paths []string) (found []string, ok bool) { 250 var unique []string 251 got := make(map[string]struct{}) 252 ok = true 253 254 for _, root := range paths { 255 // a dash means standard input 256 if root == `-` { 257 if _, ok := got[root]; ok { 258 continue 259 } 260 261 unique = append(unique, root) 262 got[root] = struct{}{} 263 continue 264 } 265 266 _, err := os.Stat(root) 267 if os.IsNotExist(err) { 268 ok = false 269 // on windows, file-not-found error messages may mention `CreateFile`, 270 // even when trying to open files in read-only mode 271 err := errors.New(`can't find file/folder named ` + root) 272 showError(err) 273 continue 274 } 275 276 err = filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { 277 if err != nil { 278 return err 279 } 280 281 if d.IsDir() { 282 return nil 283 } 284 285 if _, ok := got[path]; ok { 286 return nil 287 } 288 289 unique = append(unique, path) 290 got[path] = struct{}{} 291 return nil 292 }) 293 294 if err != nil { 295 ok = false 296 showError(err) 297 } 298 } 299 300 return unique, ok 301 } File: coby/mit-license.txt 1 The MIT License (MIT) 2 3 Copyright © 2024 pacman64 4 5 Permission is hereby granted, free of charge, to any person obtaining a copy of 6 this software and associated documentation files (the “Software”), to deal 7 in the Software without restriction, including without limitation the rights to 8 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 of the Software, and to permit persons to whom the Software is furnished to do 10 so, subject to the following conditions: 11 12 The above copyright notice and this permission notice shall be included in all 13 copies or substantial portions of the Software. 14 15 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 SOFTWARE. File: coby/stats.go 1 package main 2 3 import ( 4 "io" 5 ) 6 7 // isZero enables branchless-counting, when xor-compared bytes are used 8 // as indices for it 9 var isZero = [256]byte{1} 10 11 // counter makes it easy to change the int-size of almost all counters 12 type counter int 13 14 // statResult constrains possible result-states/values in type stats 15 type statResult int 16 17 const ( 18 // resultPending is the default not-yet-ready result-status 19 resultPending = statResult(0) 20 21 // resultError signals result should show as an error, instead of data 22 resultError = statResult(1) 23 24 // resultSuccess means result can be shown 25 resultSuccess = statResult(2) 26 ) 27 28 // stats has all the size-stats for some input, as well as a way to 29 // skip showing results, in case of an error such as `file not found` 30 type stats struct { 31 // bytes counts all bytes read 32 bytes int 33 34 // lines counts lines, and is 0 only when the byte-count is also 0 35 lines counter 36 37 // runes counts utf-8 sequences, each of which can use up to 4 bytes and 38 // is usually a complete symbol: `emoji` country-flags are commonly-used 39 // counter-examples, as these `symbols` need 2 runes, using 8 bytes each 40 runes counter 41 42 // maxWidth is maximum byte-width of lines, excluding carriage-returns 43 // and/or line-feeds 44 maxWidth counter 45 46 // nulls counts all-bits-off bytes 47 nulls counter 48 49 // fulls counts all-bits-on bytes 50 fulls counter 51 52 // highs counts bytes with their `top` (highest-order) bit on 53 highs counter 54 55 // spaces counts ASCII spaces 56 spaces counter 57 58 // tabs counts ASCII tabs 59 tabs counter 60 61 // trailing counts lines with trailing spaces in them 62 trailing counter 63 64 // lf counts ASCII line-feeds as their own byte-values: this means its 65 // value will always be at least the same as field `crlf` 66 lf counter 67 68 // crlf counts ASCII CRLF byte-pairs 69 crlf counter 70 71 // name is the filepath of the file/source these stats are about 72 name string 73 74 // results keeps track of whether results are valid and/or ready 75 result statResult 76 } 77 78 // updateStats does what it says, reading everything from a reader 79 func (res *stats) updateStats(r io.Reader) error { 80 err := res.updateUsing(r) 81 if err == io.EOF { 82 err = nil 83 } 84 85 if err == nil { 86 res.result = resultSuccess 87 } else { 88 res.result = resultError 89 } 90 return err 91 } 92 93 // updateUsing helps func updateStats do its job 94 func (res *stats) updateUsing(r io.Reader) error { 95 var width counter 96 var highRun int 97 var prev1, prev2 byte 98 var buf [16 * 1024]byte 99 var tallies [256]uint64 100 101 for { 102 n, err := r.Read(buf[:]) 103 if n < 1 { 104 if err == io.EOF { 105 res.tabs = counter(tallies['\t']) 106 res.spaces = counter(tallies[' ']) 107 res.lf = counter(tallies['\n']) 108 res.nulls = counter(tallies[0]) 109 res.fulls = counter(tallies[255]) 110 for i := 128; i < 256; i++ { 111 res.highs += counter(tallies[i]) 112 } 113 return res.handleEnd(width, prev1, highRun) 114 } 115 return err 116 } 117 118 res.bytes += n 119 chunk := buf[:n] 120 121 for _, b := range chunk { 122 // count values without branching, because it's fun 123 tallies[b]++ 124 125 // handle non-ASCII runes, assuming input is valid UTF-8 126 if b >= 128 { 127 if highRun < 3 { 128 highRun++ 129 } else { 130 highRun = 0 131 res.runes++ 132 width++ 133 } 134 135 prev2 = prev1 136 prev1 = b 137 continue 138 } 139 140 // handle line-feeds 141 if b == '\n' { 142 res.lines++ 143 144 crlf := count(prev1, '\r') 145 res.crlf += crlf 146 147 // count lines with trailing spaces, whether these end with 148 // a CRLF byte-pair or just a line-feed byte 149 res.trailing += count(prev1, ' ') 150 res.trailing += crlf & count(prev2, ' ') 151 152 // exclude any CR from the current line's width-count 153 width -= crlf 154 if res.maxWidth < width { 155 res.maxWidth = width 156 } 157 158 prev2 = prev1 159 prev1 = b 160 161 res.runes++ 162 highRun = 0 163 width = 0 164 continue 165 } 166 167 prev2 = prev1 168 prev1 = b 169 170 res.runes++ 171 highRun = 0 172 width++ 173 } 174 } 175 } 176 177 // handleEnd fixes/finalizes stats when input data end; this func is only 178 // meant to be used by func updateStats, since it takes some of the latter's 179 // local variables 180 func (res *stats) handleEnd(width counter, prev1 byte, highRun int) error { 181 if prev1 == ' ' { 182 res.trailing++ 183 } 184 185 if res.maxWidth < width { 186 res.maxWidth = width 187 } 188 189 // avoid reporting 0 lines with a non-0 byte-count: this is unlike the 190 // standard cmd-line tool `wc` 191 if res.bytes > 0 && prev1 != '\n' { 192 res.lines++ 193 } 194 195 if highRun > 0 { 196 res.runes++ 197 } 198 return nil 199 } 200 201 // count checks if 2 bytes are the same, returning either 0 or 1, which can 202 // be added directly/branchlessly to totals 203 func count(x, y byte) counter { 204 return counter(isZero[x^y]) 205 } 206 207 // countLeadingReady finds how many items are ready to show at the start of a 208 // results-slice, which ensures output matches the original item-order 209 func countLeadingReady(values []stats) int { 210 for i, v := range values { 211 if v.result == resultPending { 212 return i 213 } 214 } 215 return len(values) 216 } File: coby/stats_test.go 1 package main 2 3 import ( 4 "strings" 5 "testing" 6 ) 7 8 func TestCount(t *testing.T) { 9 for x := 0; x < 256; x++ { 10 for y := 0; y < 256; y++ { 11 var exp counter 12 if x == y { 13 exp = 1 14 } 15 16 if got := count(byte(x), byte(y)); got != exp { 17 t.Fatalf(`%d, %d: expected %v, but got %v`, x, y, exp, got) 18 return 19 } 20 } 21 } 22 } 23 24 func TestCountLeadingReady(t *testing.T) { 25 for size := 0; size <= 20; size++ { 26 for exp := 0; exp < size; exp++ { 27 values := make([]stats, size) 28 for i := 0; i < exp; i++ { 29 v := resultSuccess 30 if i%2 == 1 { 31 v = resultError 32 } 33 values[i].result = v 34 } 35 36 if got := countLeadingReady(values); got != exp { 37 const fs = `size %d: expected %d, instead of %d` 38 t.Fatalf(fs, size, exp, got) 39 } 40 } 41 } 42 } 43 44 func TestStats(t *testing.T) { 45 var tests = []struct { 46 Input string 47 Expected stats 48 }{ 49 { 50 ``, 51 stats{}, 52 }, 53 { 54 `abc`, 55 stats{lines: 1, runes: 3, maxWidth: 3}, 56 }, 57 { 58 "abc\tdef\r\n", 59 stats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1}, 60 }, 61 { 62 "abc\tdef\r\n", 63 stats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1}, 64 }, 65 { 66 "abc\tdef \r\n123\t456 789 ", 67 stats{ 68 lines: 2, runes: 23, maxWidth: 13, 69 spaces: 4, tabs: 2, trailing: 2, lf: 1, crlf: 1, 70 }, 71 }, 72 } 73 74 for _, tc := range tests { 75 t.Run(tc.Input, func(t *testing.T) { 76 var got stats 77 err := got.updateStats(strings.NewReader(tc.Input)) 78 if err != nil { 79 t.Error(err) 80 return 81 } 82 83 tc.Expected.bytes = len(tc.Input) 84 tc.Expected.result = resultSuccess 85 if got != tc.Expected { 86 t.Fatalf("expected\n%#v,\ngot\n%#v", tc.Expected, got) 87 return 88 } 89 }) 90 } 91 }