File: shame512.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath shame512.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "crypto/sha512" 37 "encoding/hex" 38 "errors" 39 "io" 40 "io/fs" 41 "os" 42 "path/filepath" 43 "runtime" 44 "sort" 45 "sync" 46 ) 47 48 const info = ` 49 shame512 [options...] [files/folders...] 50 51 Group files by common/same SHA-512 hashes: having the same hash means a high 52 chance of having the same contents, even though that's not a guarantee. 53 54 Files and folder names can be given together for convenience: folder names 55 are searched recursively to find all files, including in any subfolders. 56 57 The only option available is to show this help message, using any of 58 "-h", "--h", "-help", or "--help", without the quotes. 59 ` 60 61 func main() { 62 args := os.Args[1:] 63 64 if len(args) > 0 { 65 switch args[0] { 66 case `-h`, `--h`, `-help`, `--help`: 67 os.Stdout.WriteString(info[1:]) 68 return 69 70 case `--`: 71 args = args[1:] 72 } 73 } 74 75 // paths has all filepaths given, ignoring repetitions 76 paths, ok := findAllFiles(args) 77 if !ok { 78 os.Exit(1) 79 } 80 if len(paths) == 0 { 81 paths = []string{`.`} 82 } 83 84 checksums := make([]string, len(paths)) 85 findChecksums(paths, checksums) 86 87 // group filepaths by common checksum, remembering the index of the 88 // first item for each group, so the groups can be sorted by the 89 // original order their first filepath came from the command-line 90 indices := make(map[string]int) 91 groups := make(map[string][]string) 92 for i, chsum := range checksums { 93 if chsum == `` { 94 continue 95 } 96 g, ok := groups[chsum] 97 if !ok { 98 indices[chsum] = i 99 } 100 groups[chsum] = append(g, paths[i]) 101 } 102 103 // keys has the sorted checksums 104 keys := make([]string, len(groups)) 105 for chsum := range groups { 106 keys = append(keys, chsum) 107 } 108 sort.SliceStable(keys, func(i, j int) bool { 109 x := indices[keys[i]] 110 y := indices[keys[j]] 111 return x < y 112 }) 113 114 shown := 0 115 bw := bufio.NewWriter(os.Stdout) 116 defer bw.Flush() 117 118 for _, chsum := range keys { 119 if shown > 0 { 120 bw.WriteByte('\n') 121 } 122 123 bw.WriteString(chsum) 124 bw.WriteByte('\n') 125 126 files := groups[chsum] 127 for _, path := range files { 128 bw.WriteString(path) 129 bw.WriteByte('\n') 130 } 131 132 shown++ 133 } 134 } 135 136 // findAllFiles can be given a mix of file/folder paths, finding all files 137 // recursively in folders, avoiding duplicates 138 func findAllFiles(paths []string) (files []string, success bool) { 139 walk := filepath.WalkDir 140 got := make(map[string]struct{}) 141 success = true 142 143 for _, path := range paths { 144 if _, ok := got[path]; ok { 145 continue 146 } 147 got[path] = struct{}{} 148 149 // a dash means standard input 150 if path == `-` { 151 files = append(files, path) 152 continue 153 } 154 155 info, err := os.Stat(path) 156 if os.IsNotExist(err) { 157 // on windows, file-not-found messages may mention `CreateFile`, 158 // even when trying to open files in read-only mode 159 err = errors.New(`can't find file/folder named ` + path) 160 } 161 162 if err != nil { 163 showError(path, err) 164 success = false 165 continue 166 } 167 168 if !info.IsDir() { 169 files = append(files, path) 170 continue 171 } 172 173 err = walk(path, func(path string, info fs.DirEntry, err error) error { 174 path, err = filepath.Abs(path) 175 if err != nil { 176 showError(path, err) 177 success = false 178 return err 179 } 180 181 if _, ok := got[path]; ok { 182 if info.IsDir() { 183 return fs.SkipDir 184 } 185 return nil 186 } 187 got[path] = struct{}{} 188 189 if info.IsDir() { 190 return nil 191 } 192 193 files = append(files, path) 194 return nil 195 }) 196 197 if err != nil { 198 showError(path, err) 199 success = false 200 } 201 } 202 203 return files, success 204 } 205 206 func showError(path string, err error) { 207 if path != `` { 208 os.Stderr.WriteString(path) 209 os.Stderr.WriteString(`: `) 210 } 211 os.Stderr.WriteString(err.Error()) 212 os.Stderr.WriteString("\n") 213 } 214 215 func findChecksums(paths []string, sums []string) { 216 var tasks sync.WaitGroup 217 // the number of tasks is always known in advance 218 tasks.Add(len(paths)) 219 220 // permissions is buffered to limit concurrency to the core-count 221 permissions := make(chan struct{}, runtime.NumCPU()) 222 defer close(permissions) 223 224 for i, path := range paths { 225 // wait until some concurrency-room is available, before proceeding 226 permissions <- struct{}{} 227 228 go func(i int, path string) { 229 defer tasks.Done() 230 231 chsum, err := sha(path) 232 if err != nil { 233 chsum = `` 234 showError(path, err) 235 } 236 237 sums[i] = chsum 238 <-permissions 239 }(i, path) 240 } 241 242 // wait for all tasks to finish 243 tasks.Wait() 244 } 245 246 // sha calculates a checksum for a file's contents 247 func sha(path string) (string, error) { 248 f, err := os.Open(path) 249 if err != nil { 250 return ``, err 251 } 252 defer f.Close() 253 254 sha := sha512.New() 255 _, err = io.Copy(sha, f) 256 if err != nil { 257 return ``, err 258 } 259 260 // buf has room to fit a SHA-512 hash exactly: while its hexadecimal-ASCII 261 // rendition is 128 bytes, the checksum itself is 64 bytes 262 var buf [64]byte 263 return hex.EncodeToString(sha.Sum(buf[:0])), err 264 }