File: shame512.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath shame512.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "crypto/sha512"
  37     "encoding/hex"
  38     "errors"
  39     "io"
  40     "io/fs"
  41     "os"
  42     "path/filepath"
  43     "runtime"
  44     "sync"
  45 )
  46 
  47 const info = `
  48 shame512 [options...] [files/folders...]
  49 
  50 Group files by common/same SHA-512 hashes: having the same hash means a high
  51 chance of having the same contents, even though that's not a guarantee.
  52 
  53 Files and folder names can be given together for convenience: folder names
  54 are searched recursively to find all files, including in any subfolders.
  55 
  56 The only option available is to show this help message, using any of
  57 "-h", "--h", "-help", or "--help", without the quotes.
  58 `
  59 
  60 func main() {
  61     args := os.Args[1:]
  62 
  63     if len(args) > 0 {
  64         switch args[0] {
  65         case `-h`, `--h`, `-help`, `--help`:
  66             os.Stdout.WriteString(info[1:])
  67             return
  68 
  69         case `--`:
  70             args = args[1:]
  71         }
  72     }
  73 
  74     // paths has all filepaths given, ignoring repetitions
  75     paths, ok := findAllFiles(deduplicate(args))
  76     if !ok {
  77         os.Exit(1)
  78     }
  79     if len(paths) == 0 {
  80         paths = []string{`.`}
  81     }
  82 
  83     checksums := make([]string, len(paths))
  84     findChecksums(paths, checksums)
  85 
  86     groups := make(map[string][]string)
  87     for i, chsum := range checksums {
  88         if chsum == `` {
  89             continue
  90         }
  91         groups[chsum] = append(groups[chsum], paths[i])
  92     }
  93 
  94     shown := 0
  95     bw := bufio.NewWriter(os.Stdout)
  96     defer bw.Flush()
  97 
  98     for chsum, files := range groups {
  99         if shown > 0 {
 100             bw.WriteByte('\n')
 101         }
 102 
 103         bw.WriteString(chsum)
 104         bw.WriteByte('\n')
 105 
 106         for _, path := range files {
 107             bw.WriteString(path)
 108             bw.WriteByte('\n')
 109         }
 110 
 111         shown++
 112     }
 113 }
 114 
 115 // findAllFiles can be given a mix of file/folder paths, finding all files
 116 // recursively in folders, avoiding duplicates
 117 func findAllFiles(paths []string) (found []string, ok bool) {
 118     res := make(chan any)
 119     var all sync.WaitGroup
 120     all.Add(1)
 121 
 122     go func() {
 123         defer all.Done()
 124         got := make(map[string]struct{})
 125         ok = true
 126 
 127         for v := range res {
 128             if err, ok := v.(error); ok {
 129                 showError(``, err)
 130                 ok = false
 131                 continue
 132             }
 133 
 134             s, ok := v.(string)
 135             if !ok {
 136                 showError(``, errors.New(`value is neither string nor error`))
 137                 ok = false
 138                 continue
 139             }
 140 
 141             if _, ok := got[s]; ok {
 142                 continue
 143             }
 144 
 145             got[s] = struct{}{}
 146             found = append(found, s)
 147         }
 148     }()
 149 
 150     rec := func(path string, info fs.DirEntry, err error) error {
 151         if err != nil {
 152             res <- err
 153             return err
 154         }
 155 
 156         if info.IsDir() {
 157             return nil
 158         }
 159 
 160         res <- path
 161         return nil
 162     }
 163 
 164     for _, s := range paths {
 165         // a dash means standard input
 166         if s == `-` {
 167             res <- s
 168             continue
 169         }
 170 
 171         info, err := os.Stat(s)
 172         if os.IsNotExist(err) {
 173             // on windows, file-not-found messages may mention `CreateFile`,
 174             // even when trying to open files in read-only mode
 175             res <- errors.New(`can't find file/folder named ` + s)
 176             continue
 177         }
 178 
 179         if err != nil {
 180             res <- err
 181             continue
 182         }
 183 
 184         if !info.IsDir() {
 185             res <- s
 186             continue
 187         }
 188 
 189         if err := filepath.WalkDir(s, rec); err != nil {
 190             res <- err
 191         }
 192     }
 193 
 194     close(res)
 195     all.Wait()
 196 
 197     return found, ok
 198 }
 199 
 200 func showError(path string, err error) {
 201     if path != `` {
 202         os.Stderr.WriteString(path)
 203         os.Stderr.WriteString(`: `)
 204     }
 205     os.Stderr.WriteString(err.Error())
 206     os.Stderr.WriteString("\n")
 207 }
 208 
 209 func deduplicate(values []string) []string {
 210     got := make(map[string]struct{})
 211     unique := make([]string, 0, len(values))
 212 
 213     for _, s := range values {
 214         if _, ok := got[s]; ok {
 215             continue
 216         }
 217         got[s] = struct{}{}
 218         unique = append(unique, s)
 219     }
 220 
 221     return unique
 222 }
 223 
 224 func findChecksums(paths []string, sums []string) {
 225     // permissions limits how many worker tasks can be active at the same
 226     // time: when given many filepaths to work on, rate-limiting avoids
 227     // a massive number of concurrent tasks which read and process input
 228     permissions := make(chan struct{}, runtime.NumCPU())
 229     defer close(permissions)
 230 
 231     for i := range paths {
 232         // wait until some concurrency-room is available
 233         permissions <- struct{}{}
 234 
 235         go func(i int) {
 236             defer func() { <-permissions }()
 237 
 238             chsum, err := sha(paths[i])
 239             if err != nil {
 240                 chsum = ``
 241                 showError(paths[i], err)
 242             }
 243 
 244             sums[i] = chsum
 245         }(i)
 246     }
 247 }
 248 
 249 // sha calculates a checksum for a file's contents
 250 func sha(path string) (string, error) {
 251     f, err := os.Open(path)
 252     if err != nil {
 253         return ``, err
 254     }
 255     defer f.Close()
 256 
 257     sha := sha512.New()
 258     _, err = io.Copy(sha, f)
 259     if err != nil {
 260         return ``, err
 261     }
 262 
 263     return hex.EncodeToString(sha.Sum(nil)), err
 264 }