File: shame512.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath shame512.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "crypto/sha512" 37 "encoding/hex" 38 "errors" 39 "io" 40 "io/fs" 41 "os" 42 "path/filepath" 43 "runtime" 44 "sort" 45 "sync" 46 ) 47 48 const info = ` 49 shame512 [options...] [files/folders...] 50 51 Group files by common/same SHA-512 hashes: having the same hash means a high 52 chance of having the same contents, even though that's not a guarantee. 53 54 Files and folder names can be given together for convenience: folder names 55 are searched recursively to find all files, including in any subfolders. 56 57 The only option available is to show this help message, using any of 58 "-h", "--h", "-help", or "--help", without the quotes. 59 ` 60 61 func main() { 62 args := os.Args[1:] 63 64 if len(args) > 0 { 65 switch args[0] { 66 case `-h`, `--h`, `-help`, `--help`: 67 os.Stdout.WriteString(info[1:]) 68 return 69 70 case `--`: 71 args = args[1:] 72 } 73 } 74 75 // paths has all filepaths given, ignoring repetitions 76 paths, ok := findAllFiles(args) 77 if !ok { 78 os.Exit(1) 79 } 80 if len(paths) == 0 { 81 paths = []string{`.`} 82 } 83 84 checksums := make([]string, len(paths)) 85 findChecksums(paths, checksums) 86 87 // group filepaths by common checksum, remembering the index of the 88 // first item for each group, so the groups can be sorted by the 89 // original order their first filepath came from the command-line 90 indices := make(map[string]int) 91 groups := make(map[string][]string) 92 for i, chsum := range checksums { 93 if chsum == `` { 94 continue 95 } 96 g, ok := groups[chsum] 97 if !ok { 98 indices[chsum] = i 99 } 100 groups[chsum] = append(g, paths[i]) 101 } 102 103 // keys has the sorted checksums 104 keys := make([]string, len(groups)) 105 for chsum := range groups { 106 keys = append(keys, chsum) 107 } 108 sort.SliceStable(keys, func(i, j int) bool { 109 x := indices[keys[i]] 110 y := indices[keys[j]] 111 return x < y 112 }) 113 114 shown := 0 115 bw := bufio.NewWriter(os.Stdout) 116 defer bw.Flush() 117 118 for _, chsum := range keys { 119 if shown > 0 { 120 bw.WriteByte('\n') 121 } 122 123 bw.WriteString(chsum) 124 bw.WriteByte('\n') 125 126 files := groups[chsum] 127 for _, path := range files { 128 bw.WriteString(path) 129 bw.WriteByte('\n') 130 } 131 132 shown++ 133 } 134 } 135 136 // findAllFiles can be given a mix of file/folder paths, finding all files 137 // recursively in folders, avoiding duplicates 138 func findAllFiles(paths []string) (files []string, success bool) { 139 rec := filepath.WalkDir 140 got := make(map[string]struct{}) 141 success = true 142 143 for _, path := range paths { 144 if _, ok := got[path]; ok { 145 continue 146 } 147 got[path] = struct{}{} 148 149 // a dash means standard input 150 if path == `-` { 151 files = append(files, path) 152 continue 153 } 154 155 info, err := os.Stat(path) 156 if os.IsNotExist(err) { 157 // on windows, file-not-found messages may mention `CreateFile`, 158 // even when trying to open files in read-only mode 159 err = errors.New(`can't find file/folder named ` + path) 160 } 161 162 if err != nil { 163 showError(path, err) 164 success = false 165 continue 166 } 167 168 if !info.IsDir() { 169 files = append(files, path) 170 continue 171 } 172 173 err = rec(path, func(path string, info fs.DirEntry, err error) error { 174 if _, ok := got[path]; ok { 175 if info.IsDir() { 176 return fs.SkipDir 177 } 178 return nil 179 } 180 got[path] = struct{}{} 181 182 if err != nil { 183 showError(path, err) 184 success = false 185 return err 186 } 187 188 if info.IsDir() { 189 return nil 190 } 191 192 files = append(files, path) 193 return nil 194 }) 195 196 if err != nil { 197 showError(path, err) 198 success = false 199 } 200 } 201 202 return files, success 203 } 204 205 func showError(path string, err error) { 206 if path != `` { 207 os.Stderr.WriteString(path) 208 os.Stderr.WriteString(`: `) 209 } 210 os.Stderr.WriteString(err.Error()) 211 os.Stderr.WriteString("\n") 212 } 213 214 func findChecksums(paths []string, sums []string) { 215 var tasks sync.WaitGroup 216 // the number of tasks is always known in advance 217 tasks.Add(len(paths)) 218 219 // permissions is buffered to limit concurrency to the core-count 220 permissions := make(chan struct{}, runtime.NumCPU()) 221 defer close(permissions) 222 223 for i, path := range paths { 224 // wait until some concurrency-room is available, before proceeding 225 permissions <- struct{}{} 226 227 go func(i int, path string) { 228 defer tasks.Done() 229 230 chsum, err := sha(path) 231 if err != nil { 232 chsum = `` 233 showError(path, err) 234 } 235 236 sums[i] = chsum 237 <-permissions 238 }(i, path) 239 } 240 241 // wait for all tasks to finish 242 tasks.Wait() 243 } 244 245 // sha calculates a checksum for a file's contents 246 func sha(path string) (string, error) { 247 f, err := os.Open(path) 248 if err != nil { 249 return ``, err 250 } 251 defer f.Close() 252 253 sha := sha512.New() 254 _, err = io.Copy(sha, f) 255 if err != nil { 256 return ``, err 257 } 258 259 // buf has room to fit a SHA-512 hash exactly: while its hexadecimal-ASCII 260 // rendition is 128 bytes, the checksum itself is 64 bytes 261 var buf [64]byte 262 return hex.EncodeToString(sha.Sum(buf[:0])), err 263 }