/* The MIT License (MIT) Copyright (c) 2026 pacman64 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* To compile a smaller-sized command-line app, you can use the `go` command as follows: go build -ldflags "-s -w" -trimpath shame512.go */ package main import ( "bufio" "crypto/sha512" "encoding/hex" "errors" "io" "io/fs" "os" "path/filepath" "runtime" "sort" "sync" ) const info = ` shame512 [options...] [files/folders...] Group files by common/same SHA-512 hashes: having the same hash means a high chance of having the same contents, even though that's not a guarantee. Files and folder names can be given together for convenience: folder names are searched recursively to find all files, including in any subfolders. The only option available is to show this help message, using any of "-h", "--h", "-help", or "--help", without the quotes. ` func main() { args := os.Args[1:] if len(args) > 0 { switch args[0] { case `-h`, `--h`, `-help`, `--help`: os.Stdout.WriteString(info[1:]) return case `--`: args = args[1:] } } // paths has all filepaths given, ignoring repetitions paths, ok := findAllFiles(args) if !ok { os.Exit(1) } if len(paths) == 0 { paths = []string{`.`} } checksums := make([]string, len(paths)) findChecksums(paths, checksums) // group filepaths by common checksum, remembering the index of the // first item for each group, so the groups can be sorted by the // original order their first filepath came from the command-line indices := make(map[string]int) groups := make(map[string][]string) for i, chsum := range checksums { if chsum == `` { continue } g, ok := groups[chsum] if !ok { indices[chsum] = i } groups[chsum] = append(g, paths[i]) } // keys has the sorted checksums keys := make([]string, len(groups)) for chsum := range groups { keys = append(keys, chsum) } sort.SliceStable(keys, func(i, j int) bool { x := indices[keys[i]] y := indices[keys[j]] return x < y }) shown := 0 bw := bufio.NewWriter(os.Stdout) defer bw.Flush() for _, chsum := range keys { if shown > 0 { bw.WriteByte('\n') } bw.WriteString(chsum) bw.WriteByte('\n') files := groups[chsum] for _, path := range files { bw.WriteString(path) bw.WriteByte('\n') } shown++ } } // findAllFiles can be given a mix of file/folder paths, finding all files // recursively in folders, avoiding duplicates func findAllFiles(paths []string) (files []string, success bool) { rec := filepath.WalkDir got := make(map[string]struct{}) success = true for _, path := range paths { if _, ok := got[path]; ok { continue } got[path] = struct{}{} // a dash means standard input if path == `-` { files = append(files, path) continue } info, err := os.Stat(path) if os.IsNotExist(err) { // on windows, file-not-found messages may mention `CreateFile`, // even when trying to open files in read-only mode err = errors.New(`can't find file/folder named ` + path) } if err != nil { showError(path, err) success = false continue } if !info.IsDir() { files = append(files, path) continue } err = rec(path, func(path string, info fs.DirEntry, err error) error { if _, ok := got[path]; ok { if info.IsDir() { return fs.SkipDir } return nil } got[path] = struct{}{} if err != nil { showError(path, err) success = false return err } if info.IsDir() { return nil } files = append(files, path) return nil }) if err != nil { showError(path, err) success = false } } return files, success } func showError(path string, err error) { if path != `` { os.Stderr.WriteString(path) os.Stderr.WriteString(`: `) } os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\n") } func findChecksums(paths []string, sums []string) { var tasks sync.WaitGroup // the number of tasks is always known in advance tasks.Add(len(paths)) // permissions is buffered to limit concurrency to the core-count permissions := make(chan struct{}, runtime.NumCPU()) defer close(permissions) for i, path := range paths { // wait until some concurrency-room is available, before proceeding permissions <- struct{}{} go func(i int, path string) { defer tasks.Done() chsum, err := sha(path) if err != nil { chsum = `` showError(path, err) } sums[i] = chsum <-permissions }(i, path) } // wait for all tasks to finish tasks.Wait() } // sha calculates a checksum for a file's contents func sha(path string) (string, error) { f, err := os.Open(path) if err != nil { return ``, err } defer f.Close() sha := sha512.New() _, err = io.Copy(sha, f) if err != nil { return ``, err } // buf has room to fit a SHA-512 hash exactly: while its hexadecimal-ASCII // rendition is 128 bytes, the checksum itself is 64 bytes var buf [64]byte return hex.EncodeToString(sha.Sum(buf[:0])), err }