File: shame512.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath shame512.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "crypto/sha512" 37 "encoding/hex" 38 "errors" 39 "io" 40 "io/fs" 41 "os" 42 "path/filepath" 43 "runtime" 44 "sync" 45 ) 46 47 const info = ` 48 shame512 [options...] [files/folders...] 49 50 Group files by common/same SHA-512 hashes: having the same hash means a high 51 chance of having the same contents, even though that's not a guarantee. 52 53 Files and folder names can be given together for convenience: folder names 54 are searched recursively to find all files, including in any subfolders. 55 56 The only option available is to show this help message, using any of 57 "-h", "--h", "-help", or "--help", without the quotes. 58 ` 59 60 func main() { 61 args := os.Args[1:] 62 63 if len(args) > 0 { 64 switch args[0] { 65 case `-h`, `--h`, `-help`, `--help`: 66 os.Stdout.WriteString(info[1:]) 67 return 68 69 case `--`: 70 args = args[1:] 71 } 72 } 73 74 // paths has all filepaths given, ignoring repetitions 75 paths, ok := findAllFiles(deduplicate(args)) 76 if !ok { 77 os.Exit(1) 78 } 79 if len(paths) == 0 { 80 paths = []string{`.`} 81 } 82 83 checksums := make([]string, len(paths)) 84 findChecksums(paths, checksums) 85 86 groups := make(map[string][]string) 87 for i, chsum := range checksums { 88 if chsum == `` { 89 continue 90 } 91 groups[chsum] = append(groups[chsum], paths[i]) 92 } 93 94 shown := 0 95 bw := bufio.NewWriter(os.Stdout) 96 defer bw.Flush() 97 98 for chsum, files := range groups { 99 if shown > 0 { 100 bw.WriteByte('\n') 101 } 102 103 bw.WriteString(chsum) 104 bw.WriteByte('\n') 105 106 for _, path := range files { 107 bw.WriteString(path) 108 bw.WriteByte('\n') 109 } 110 111 shown++ 112 } 113 } 114 115 // findAllFiles can be given a mix of file/folder paths, finding all files 116 // recursively in folders, avoiding duplicates 117 func findAllFiles(paths []string) (found []string, ok bool) { 118 res := make(chan any) 119 var all sync.WaitGroup 120 all.Add(1) 121 122 go func() { 123 defer all.Done() 124 got := make(map[string]struct{}) 125 ok = true 126 127 for v := range res { 128 if err, ok := v.(error); ok { 129 showError(``, err) 130 ok = false 131 continue 132 } 133 134 s, ok := v.(string) 135 if !ok { 136 showError(``, errors.New(`value is neither string nor error`)) 137 ok = false 138 continue 139 } 140 141 if _, ok := got[s]; ok { 142 continue 143 } 144 145 got[s] = struct{}{} 146 found = append(found, s) 147 } 148 }() 149 150 rec := func(path string, info fs.DirEntry, err error) error { 151 if err != nil { 152 res <- err 153 return err 154 } 155 156 if info.IsDir() { 157 return nil 158 } 159 160 res <- path 161 return nil 162 } 163 164 for _, s := range paths { 165 // a dash means standard input 166 if s == `-` { 167 res <- s 168 continue 169 } 170 171 info, err := os.Stat(s) 172 if os.IsNotExist(err) { 173 // on windows, file-not-found messages may mention `CreateFile`, 174 // even when trying to open files in read-only mode 175 res <- errors.New(`can't find file/folder named ` + s) 176 continue 177 } 178 179 if err != nil { 180 res <- err 181 continue 182 } 183 184 if !info.IsDir() { 185 res <- s 186 continue 187 } 188 189 if err := filepath.WalkDir(s, rec); err != nil { 190 res <- err 191 } 192 } 193 194 close(res) 195 all.Wait() 196 197 return found, ok 198 } 199 200 func showError(path string, err error) { 201 if path != `` { 202 os.Stderr.WriteString(path) 203 os.Stderr.WriteString(`: `) 204 } 205 os.Stderr.WriteString(err.Error()) 206 os.Stderr.WriteString("\n") 207 } 208 209 func deduplicate(values []string) []string { 210 got := make(map[string]struct{}) 211 unique := make([]string, 0, len(values)) 212 213 for _, s := range values { 214 if _, ok := got[s]; ok { 215 continue 216 } 217 got[s] = struct{}{} 218 unique = append(unique, s) 219 } 220 221 return unique 222 } 223 224 func findChecksums(paths []string, sums []string) { 225 // permissions limits how many worker tasks can be active at the same 226 // time: when given many filepaths to work on, rate-limiting avoids 227 // a massive number of concurrent tasks which read and process input 228 permissions := make(chan struct{}, runtime.NumCPU()) 229 defer close(permissions) 230 231 for i := range paths { 232 // wait until some concurrency-room is available 233 permissions <- struct{}{} 234 235 go func(i int) { 236 defer func() { <-permissions }() 237 238 chsum, err := sha(paths[i]) 239 if err != nil { 240 chsum = `` 241 showError(paths[i], err) 242 } 243 244 sums[i] = chsum 245 }(i) 246 } 247 } 248 249 // sha calculates a checksum for a file's contents 250 func sha(path string) (string, error) { 251 f, err := os.Open(path) 252 if err != nil { 253 return ``, err 254 } 255 defer f.Close() 256 257 sha := sha512.New() 258 _, err = io.Copy(sha, f) 259 if err != nil { 260 return ``, err 261 } 262 263 return hex.EncodeToString(sha.Sum(nil)), err 264 }