File: coby/go.mod 1 module coby 2 3 go 1.18 File: coby/info.txt 1 coby [files/folders...] 2 3 COunt BYtes finds out some simple byte-related stats, counting 4 5 - bytes 6 - lines 7 - how many lines have trailing spaces 8 - how many lines end with a CRLF pair 9 - all-off (0) bytes 10 - all-on (255) bytes 11 - high-bytes (128+) 12 13 The output is TSV (tab-separated values) lines, where the first line has 14 all the column names. 15 16 When no filepaths are given, the standard input is used by default. All 17 folder names given expand recursively into all filenames in them. File: coby/main.go 1 package main 2 3 import ( 4 "bufio" 5 "errors" 6 "io" 7 "io/fs" 8 "os" 9 "path/filepath" 10 "runtime" 11 "strconv" 12 "sync" 13 14 _ "embed" 15 ) 16 17 // Note: the code is avoiding using the fmt package to save hundreds of 18 // kilobytes on the resulting executable, which is a noticeable difference. 19 20 //go:embed info.txt 21 var info string 22 23 // header is the first output line 24 var header = []string{ 25 `name`, 26 `bytes`, 27 `runes`, 28 `lines`, 29 `lf`, 30 `crlf`, 31 `spaces`, 32 `tabs`, 33 `trails`, 34 `nulls`, 35 `fulls`, 36 `highs`, 37 } 38 39 // event has what the output-reporting task needs to show the results of a 40 // task which has just completed, perhaps unsuccessfully 41 type event struct { 42 // Index points to the task's entry in the results-slice 43 Index int 44 45 // Err is the completed task's error, or lack of 46 Err error 47 } 48 49 func main() { 50 if len(os.Args) > 1 { 51 switch os.Args[1] { 52 case `-h`, `--h`, `-help`, `--help`: 53 os.Stderr.WriteString(info) 54 return 55 } 56 } 57 58 // show first/heading line right away, to let users know things are 59 // happening 60 for i, s := range header { 61 if i > 0 { 62 os.Stdout.WriteString("\t") 63 } 64 os.Stdout.WriteString(s) 65 } 66 // assume an error means later stages/apps in a pipe had enough input and 67 // quit successfully, so quit successfully too 68 _, err := os.Stdout.WriteString("\n") 69 if err != nil { 70 return 71 } 72 73 // names has all filepaths given, ignoring repetitions 74 names, ok := findAllFiles(unique(os.Args[1:])) 75 if !ok { 76 os.Exit(1) 77 } 78 if len(names) == 0 { 79 names = []string{`-`} 80 } 81 82 // results has all its items updated concurrently: this is safe to do, 83 // as the tasks update values in separate indices of this slice, and 84 // when an item is ready to show, its values aren't changing anymore 85 results := make([]stats, len(names)) 86 87 events := make(chan event) 88 go handleInputs(names, results, events) 89 if !handleOutput(os.Stdout, results, events) { 90 os.Exit(1) 91 } 92 } 93 94 // handleInputs launches all the tasks which do the actual work, limiting how 95 // many inputs are being worked on at the same time 96 func handleInputs(names []string, results []stats, events chan event) { 97 // allow output-reporter task to end, and thus the app 98 defer close(events) 99 100 // permissions limits how many worker tasks can be active at the same 101 // time: when given many filepaths to work on, rate-limiting avoids 102 // a massive number of concurrent tasks which read and process input 103 permissions := make(chan struct{}, runtime.NumCPU()) 104 defer close(permissions) 105 106 var inputs sync.WaitGroup 107 for i := range names { 108 // wait until some concurrency-room is available 109 permissions <- struct{}{} 110 inputs.Add(1) 111 112 go func(i int) { 113 defer inputs.Done() 114 err := handleInput(&results[i], names[i]) 115 events <- event{i, err} 116 <-permissions 117 }(i) 118 } 119 120 // wait for all inputs, before closing the `events` channel 121 inputs.Wait() 122 } 123 124 // handleInput handles each work-item for func handleInputs 125 func handleInput(res *stats, path string) error { 126 res.name = path 127 128 if path == `-` { 129 return res.updateStats(os.Stdin) 130 } 131 132 f, err := os.Open(path) 133 if err != nil { 134 res.result = resultError 135 // on windows, file-not-found error messages may mention `CreateFile`, 136 // even when trying to open files in read-only mode 137 return errors.New(`can't open file named ` + path) 138 } 139 defer f.Close() 140 141 return res.updateStats(f) 142 } 143 144 // handleOutput asynchronously updates output as results are known, whether 145 // it's errors or successful results; returns whether it succeeded, which 146 // means no errors happened 147 func handleOutput(w io.Writer, results []stats, events chan event) (ok bool) { 148 bw := bufio.NewWriter(w) 149 defer bw.Flush() 150 151 bw.Flush() 152 153 // keep track of which tasks are over, so that on each event all leading 154 // results which are ready are shown: all of this ensures prompt output 155 // updates as soon as results come in, while keeping the original order 156 // of the names/filepaths given 157 resultsLeft := results 158 159 for v := range events { 160 if v.Err != nil { 161 ok = false 162 bw.Flush() 163 showError(v.Err) 164 165 // stay in the current loop, in case this failure was keeping 166 // previous successes from showing up 167 } 168 169 n := countLeadingReady(resultsLeft) 170 171 for _, res := range resultsLeft[:n] { 172 if err := showResult(bw, res); err != nil { 173 // assume later stages/apps in a pipe had enough input and 174 // quit successfully, so quit successfully too 175 return true 176 } 177 } 178 resultsLeft = resultsLeft[n:] 179 180 // flush output-buffer only if anything new was shown 181 if n > 0 { 182 bw.Flush() 183 } 184 } 185 186 return ok 187 } 188 189 // showError standardizes how errors from this app look 190 func showError(err error) { 191 os.Stderr.WriteString("\x1b[31m") 192 os.Stderr.WriteString(err.Error()) 193 os.Stderr.WriteString("\x1b[0m\n") 194 } 195 196 // showResult does what it says 197 func showResult(w *bufio.Writer, res stats) error { 198 if res.result == resultError { 199 return nil 200 } 201 202 var buf [64]byte 203 w.WriteString(res.name) 204 w.Write([]byte{'\t'}) 205 w.Write(strconv.AppendUint(buf[:0], uint64(res.bytes), 10)) 206 w.Write([]byte{'\t'}) 207 w.Write(strconv.AppendUint(buf[:0], uint64(res.runes), 10)) 208 w.Write([]byte{'\t'}) 209 w.Write(strconv.AppendUint(buf[:0], uint64(res.lines), 10)) 210 w.Write([]byte{'\t'}) 211 w.Write(strconv.AppendUint(buf[:0], uint64(res.lf), 10)) 212 w.Write([]byte{'\t'}) 213 w.Write(strconv.AppendUint(buf[:0], uint64(res.crlf), 10)) 214 w.Write([]byte{'\t'}) 215 w.Write(strconv.AppendUint(buf[:0], uint64(res.spaces), 10)) 216 w.Write([]byte{'\t'}) 217 w.Write(strconv.AppendUint(buf[:0], uint64(res.tabs), 10)) 218 w.Write([]byte{'\t'}) 219 w.Write(strconv.AppendUint(buf[:0], uint64(res.trailing), 10)) 220 w.Write([]byte{'\t'}) 221 w.Write(strconv.AppendUint(buf[:0], uint64(res.nulls), 10)) 222 w.Write([]byte{'\t'}) 223 w.Write(strconv.AppendUint(buf[:0], uint64(res.fulls), 10)) 224 w.Write([]byte{'\t'}) 225 w.Write(strconv.AppendUint(buf[:0], uint64(res.highs), 10)) 226 _, err := w.Write([]byte{'\n'}) 227 return err 228 } 229 230 // unique ensures items only appear once in the result, keeping the original 231 // slice unchanged 232 func unique(src []string) []string { 233 var unique []string 234 got := make(map[string]struct{}) 235 for _, s := range src { 236 if _, ok := got[s]; ok { 237 continue 238 } 239 unique = append(unique, s) 240 got[s] = struct{}{} 241 } 242 return unique 243 } 244 245 // findAllFiles does what it says, given a mix of file/folder paths, finding 246 // all files recursively in the case of folders 247 func findAllFiles(paths []string) (found []string, ok bool) { 248 var unique []string 249 got := make(map[string]struct{}) 250 ok = true 251 252 for _, root := range paths { 253 // a dash means standard input 254 if root == `-` { 255 if _, ok := got[root]; ok { 256 continue 257 } 258 259 unique = append(unique, root) 260 got[root] = struct{}{} 261 continue 262 } 263 264 _, err := os.Stat(root) 265 if os.IsNotExist(err) { 266 ok = false 267 // on windows, file-not-found error messages may mention `CreateFile`, 268 // even when trying to open files in read-only mode 269 err := errors.New(`can't find file/folder named ` + root) 270 showError(err) 271 continue 272 } 273 274 err = filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { 275 if err != nil { 276 return err 277 } 278 279 if d.IsDir() { 280 return nil 281 } 282 283 if _, ok := got[path]; ok { 284 return nil 285 } 286 287 unique = append(unique, path) 288 got[path] = struct{}{} 289 return nil 290 }) 291 292 if err != nil { 293 ok = false 294 showError(err) 295 } 296 } 297 298 return unique, ok 299 } File: coby/mit-license.txt 1 The MIT License (MIT) 2 3 Copyright © 2024 pacman64 4 5 Permission is hereby granted, free of charge, to any person obtaining a copy of 6 this software and associated documentation files (the “Software”), to deal 7 in the Software without restriction, including without limitation the rights to 8 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 of the Software, and to permit persons to whom the Software is furnished to do 10 so, subject to the following conditions: 11 12 The above copyright notice and this permission notice shall be included in all 13 copies or substantial portions of the Software. 14 15 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 SOFTWARE. File: coby/stats.go 1 package main 2 3 import ( 4 "io" 5 ) 6 7 // isZero enables branchless-counting, when xor-compared bytes are used 8 // as indices for it 9 var isZero = [256]byte{1} 10 11 // counter makes it easy to change the int-size of almost all counters 12 type counter int 13 14 // statResult constrains possible result-states/values in type stats 15 type statResult int 16 17 const ( 18 // resultPending is the default not-yet-ready result-status 19 resultPending = statResult(0) 20 21 // resultError signals result should show as an error, instead of data 22 resultError = statResult(1) 23 24 // resultSuccess means result can be shown 25 resultSuccess = statResult(2) 26 ) 27 28 // stats has all the size-stats for some input, as well as a way to 29 // skip showing results, in case of an error such as `file not found` 30 type stats struct { 31 // bytes counts all bytes read 32 bytes int 33 34 // lines counts lines, and is 0 only when the byte-count is also 0 35 lines counter 36 37 // runes counts utf-8 sequences, each of which can use up to 4 bytes and 38 // is usually a complete symbol: `emoji` country-flags are commonly-used 39 // counter-examples, as these `symbols` need 2 runes, using 8 bytes each 40 runes counter 41 42 // maxWidth is maximum byte-width of lines, excluding carriage-returns 43 // and/or line-feeds 44 maxWidth counter 45 46 // nulls counts all-bits-off bytes 47 nulls counter 48 49 // fulls counts all-bits-on bytes 50 fulls counter 51 52 // highs counts bytes with their `top` (highest-order) bit on 53 highs counter 54 55 // spaces counts ASCII spaces 56 spaces counter 57 58 // tabs counts ASCII tabs 59 tabs counter 60 61 // trailing counts lines with trailing spaces in them 62 trailing counter 63 64 // lf counts ASCII line-feeds as their own byte-values: this means its 65 // value will always be at least the same as field `crlf` 66 lf counter 67 68 // crlf counts ASCII CRLF byte-pairs 69 crlf counter 70 71 // name is the filepath of the file/source these stats are about 72 name string 73 74 // results keeps track of whether results are valid and/or ready 75 result statResult 76 } 77 78 // updateStats does what it says, reading everything from a reader 79 func (res *stats) updateStats(r io.Reader) error { 80 err := res.updateUsing(r) 81 if err == io.EOF { 82 err = nil 83 } 84 85 if err == nil { 86 res.result = resultSuccess 87 } else { 88 res.result = resultError 89 } 90 return err 91 } 92 93 // updateUsing helps func updateStats do its job 94 func (res *stats) updateUsing(r io.Reader) error { 95 var width counter 96 var highRun int 97 var prev1, prev2 byte 98 var buf [16 * 1024]byte 99 var tallies [256]uint64 100 101 for { 102 n, err := r.Read(buf[:]) 103 if n < 1 { 104 if err == io.EOF { 105 res.tabs = counter(tallies['\t']) 106 res.spaces = counter(tallies[' ']) 107 res.lf = counter(tallies['\n']) 108 res.nulls = counter(tallies[0]) 109 res.fulls = counter(tallies[255]) 110 for i := 128; i < 256; i++ { 111 res.highs += counter(tallies[i]) 112 } 113 return res.handleEnd(width, prev1, highRun) 114 } 115 return err 116 } 117 118 res.bytes += n 119 chunk := buf[:n] 120 121 for _, b := range chunk { 122 // count values without branching, because it's fun 123 tallies[b]++ 124 125 // handle non-ASCII runes, assuming input is valid UTF-8 126 if b >= 128 { 127 if highRun < 3 { 128 highRun++ 129 } else { 130 highRun = 0 131 res.runes++ 132 width++ 133 } 134 135 prev2 = prev1 136 prev1 = b 137 continue 138 } 139 140 // handle line-feeds 141 if b == '\n' { 142 res.lines++ 143 144 crlf := count(prev1, '\r') 145 res.crlf += crlf 146 147 // count lines with trailing spaces, whether these end with 148 // a CRLF byte-pair or just a line-feed byte 149 res.trailing += count(prev1, ' ') 150 res.trailing += crlf & count(prev2, ' ') 151 152 // exclude any CR from the current line's width-count 153 width -= crlf 154 if res.maxWidth < width { 155 res.maxWidth = width 156 } 157 158 prev2 = prev1 159 prev1 = b 160 161 res.runes++ 162 highRun = 0 163 width = 0 164 continue 165 } 166 167 prev2 = prev1 168 prev1 = b 169 170 res.runes++ 171 highRun = 0 172 width++ 173 } 174 } 175 } 176 177 // handleEnd fixes/finalizes stats when input data end; this func is only 178 // meant to be used by func updateStats, since it takes some of the latter's 179 // local variables 180 func (res *stats) handleEnd(width counter, prev1 byte, highRun int) error { 181 if prev1 == ' ' { 182 res.trailing++ 183 } 184 185 if res.maxWidth < width { 186 res.maxWidth = width 187 } 188 189 // avoid reporting 0 lines with a non-0 byte-count: this is unlike the 190 // standard cmd-line tool `wc` 191 if res.bytes > 0 && prev1 != '\n' { 192 res.lines++ 193 } 194 195 if highRun > 0 { 196 res.runes++ 197 } 198 return nil 199 } 200 201 // count checks if 2 bytes are the same, returning either 0 or 1, which can 202 // be added directly/branchlessly to totals 203 func count(x, y byte) counter { 204 return counter(isZero[x^y]) 205 } 206 207 // countLeadingReady finds how many items are ready to show at the start of a 208 // results-slice, which ensures output matches the original item-order 209 func countLeadingReady(values []stats) int { 210 for i, v := range values { 211 if v.result == resultPending { 212 return i 213 } 214 } 215 return len(values) 216 } File: coby/stats_test.go 1 package main 2 3 import ( 4 "strings" 5 "testing" 6 ) 7 8 func TestCount(t *testing.T) { 9 for x := 0; x < 256; x++ { 10 for y := 0; y < 256; y++ { 11 var exp counter 12 if x == y { 13 exp = 1 14 } 15 16 if got := count(byte(x), byte(y)); got != exp { 17 t.Fatalf(`%d, %d: expected %v, but got %v`, x, y, exp, got) 18 return 19 } 20 } 21 } 22 } 23 24 func TestCountLeadingReady(t *testing.T) { 25 for size := 0; size <= 20; size++ { 26 for exp := 0; exp < size; exp++ { 27 values := make([]stats, size) 28 for i := 0; i < exp; i++ { 29 v := resultSuccess 30 if i%2 == 1 { 31 v = resultError 32 } 33 values[i].result = v 34 } 35 36 if got := countLeadingReady(values); got != exp { 37 const fs = `size %d: expected %d, instead of %d` 38 t.Fatalf(fs, size, exp, got) 39 } 40 } 41 } 42 } 43 44 func TestStats(t *testing.T) { 45 var tests = []struct { 46 Input string 47 Expected stats 48 }{ 49 { 50 ``, 51 stats{}, 52 }, 53 { 54 `abc`, 55 stats{lines: 1, runes: 3, maxWidth: 3}, 56 }, 57 { 58 "abc\tdef\r\n", 59 stats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1}, 60 }, 61 { 62 "abc\tdef\r\n", 63 stats{lines: 1, runes: 9, maxWidth: 7, tabs: 1, lf: 1, crlf: 1}, 64 }, 65 { 66 "abc\tdef \r\n123\t456 789 ", 67 stats{ 68 lines: 2, runes: 23, maxWidth: 13, 69 spaces: 4, tabs: 2, trailing: 2, lf: 1, crlf: 1, 70 }, 71 }, 72 } 73 74 for _, tc := range tests { 75 t.Run(tc.Input, func(t *testing.T) { 76 var got stats 77 err := got.updateStats(strings.NewReader(tc.Input)) 78 if err != nil { 79 t.Error(err) 80 return 81 } 82 83 tc.Expected.bytes = len(tc.Input) 84 tc.Expected.result = resultSuccess 85 if got != tc.Expected { 86 t.Fatalf("expected\n%#v,\ngot\n%#v", tc.Expected, got) 87 return 88 } 89 }) 90 } 91 }