File: shame512.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath shame512.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "crypto/sha512"
  37     "encoding/hex"
  38     "errors"
  39     "io"
  40     "io/fs"
  41     "os"
  42     "path/filepath"
  43     "runtime"
  44     "sort"
  45     "sync"
  46 )
  47 
  48 const info = `
  49 shame512 [options...] [files/folders...]
  50 
  51 Group files by common/same SHA-512 hashes: having the same hash means a high
  52 chance of having the same contents, even though that's not a guarantee.
  53 
  54 Files and folder names can be given together for convenience: folder names
  55 are searched recursively to find all files, including in any subfolders.
  56 
  57 The only option available is to show this help message, using any of
  58 "-h", "--h", "-help", or "--help", without the quotes.
  59 `
  60 
  61 func main() {
  62     args := os.Args[1:]
  63 
  64     if len(args) > 0 {
  65         switch args[0] {
  66         case `-h`, `--h`, `-help`, `--help`:
  67             os.Stdout.WriteString(info[1:])
  68             return
  69 
  70         case `--`:
  71             args = args[1:]
  72         }
  73     }
  74 
  75     // paths has all filepaths given, ignoring repetitions
  76     paths, ok := findAllFiles(args)
  77     if !ok {
  78         os.Exit(1)
  79     }
  80     if len(paths) == 0 {
  81         paths = []string{`.`}
  82     }
  83 
  84     checksums := make([]string, len(paths))
  85     findChecksums(paths, checksums)
  86 
  87     // group filepaths by common checksum, remembering the index of the
  88     // first item for each group, so the groups can be sorted by the
  89     // original order their first filepath came from the command-line
  90     indices := make(map[string]int)
  91     groups := make(map[string][]string)
  92     for i, chsum := range checksums {
  93         if chsum == `` {
  94             continue
  95         }
  96         g, ok := groups[chsum]
  97         if !ok {
  98             indices[chsum] = i
  99         }
 100         groups[chsum] = append(g, paths[i])
 101     }
 102 
 103     // keys has the sorted checksums
 104     keys := make([]string, len(groups))
 105     for chsum := range groups {
 106         keys = append(keys, chsum)
 107     }
 108     sort.SliceStable(keys, func(i, j int) bool {
 109         x := indices[keys[i]]
 110         y := indices[keys[j]]
 111         return x < y
 112     })
 113 
 114     shown := 0
 115     bw := bufio.NewWriter(os.Stdout)
 116     defer bw.Flush()
 117 
 118     for _, chsum := range keys {
 119         if shown > 0 {
 120             bw.WriteByte('\n')
 121         }
 122 
 123         bw.WriteString(chsum)
 124         bw.WriteByte('\n')
 125 
 126         files := groups[chsum]
 127         for _, path := range files {
 128             bw.WriteString(path)
 129             bw.WriteByte('\n')
 130         }
 131 
 132         shown++
 133     }
 134 }
 135 
 136 // findAllFiles can be given a mix of file/folder paths, finding all files
 137 // recursively in folders, avoiding duplicates
 138 func findAllFiles(paths []string) (files []string, success bool) {
 139     rec := filepath.WalkDir
 140     got := make(map[string]struct{})
 141     success = true
 142 
 143     for _, path := range paths {
 144         if _, ok := got[path]; ok {
 145             continue
 146         }
 147         got[path] = struct{}{}
 148 
 149         // a dash means standard input
 150         if path == `-` {
 151             files = append(files, path)
 152             continue
 153         }
 154 
 155         info, err := os.Stat(path)
 156         if os.IsNotExist(err) {
 157             // on windows, file-not-found messages may mention `CreateFile`,
 158             // even when trying to open files in read-only mode
 159             err = errors.New(`can't find file/folder named ` + path)
 160         }
 161 
 162         if err != nil {
 163             showError(path, err)
 164             success = false
 165             continue
 166         }
 167 
 168         if !info.IsDir() {
 169             files = append(files, path)
 170             continue
 171         }
 172 
 173         err = rec(path, func(path string, info fs.DirEntry, err error) error {
 174             if _, ok := got[path]; ok {
 175                 if info.IsDir() {
 176                     return fs.SkipDir
 177                 }
 178                 return nil
 179             }
 180             got[path] = struct{}{}
 181 
 182             if err != nil {
 183                 showError(path, err)
 184                 success = false
 185                 return err
 186             }
 187 
 188             if info.IsDir() {
 189                 return nil
 190             }
 191 
 192             files = append(files, path)
 193             return nil
 194         })
 195 
 196         if err != nil {
 197             showError(path, err)
 198             success = false
 199         }
 200     }
 201 
 202     return files, success
 203 }
 204 
 205 func showError(path string, err error) {
 206     if path != `` {
 207         os.Stderr.WriteString(path)
 208         os.Stderr.WriteString(`: `)
 209     }
 210     os.Stderr.WriteString(err.Error())
 211     os.Stderr.WriteString("\n")
 212 }
 213 
 214 func findChecksums(paths []string, sums []string) {
 215     var tasks sync.WaitGroup
 216     // the number of tasks is always known in advance
 217     tasks.Add(len(paths))
 218 
 219     // permissions is buffered to limit concurrency to the core-count
 220     permissions := make(chan struct{}, runtime.NumCPU())
 221     defer close(permissions)
 222 
 223     for i, path := range paths {
 224         // wait until some concurrency-room is available, before proceeding
 225         permissions <- struct{}{}
 226 
 227         go func(i int, path string) {
 228             defer tasks.Done()
 229 
 230             chsum, err := sha(path)
 231             if err != nil {
 232                 chsum = ``
 233                 showError(path, err)
 234             }
 235 
 236             sums[i] = chsum
 237             <-permissions
 238         }(i, path)
 239     }
 240 
 241     // wait for all tasks to finish
 242     tasks.Wait()
 243 }
 244 
 245 // sha calculates a checksum for a file's contents
 246 func sha(path string) (string, error) {
 247     f, err := os.Open(path)
 248     if err != nil {
 249         return ``, err
 250     }
 251     defer f.Close()
 252 
 253     sha := sha512.New()
 254     _, err = io.Copy(sha, f)
 255     if err != nil {
 256         return ``, err
 257     }
 258 
 259     // buf has room to fit a SHA-512 hash exactly: while its hexadecimal-ASCII
 260     // rendition is 128 bytes, the checksum itself is 64 bytes
 261     var buf [64]byte
 262     return hex.EncodeToString(sha.Sum(buf[:0])), err
 263 }