File: dedup.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath dedup.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "errors" 37 "io" 38 "os" 39 ) 40 41 // Note: the code is avoiding using the fmt package to save hundreds of 42 // kilobytes on the resulting executable, which is a noticeable difference. 43 44 const info = ` 45 dedup [options...] [file...] 46 47 48 DEDUPlicate lines prevents the same line from appearing again in the output, 49 after the first time. Unique lines are remembered across inputs. 50 51 Input is assumed to be UTF-8, and all CRLF byte-pairs are turned into line 52 feeds by default. 53 54 All (optional) leading options start with either single or double-dash: 55 56 -h show this help message 57 -help show this help message 58 ` 59 60 // errNoMoreOutput is a dummy error whose message is ignored, and which 61 // causes the app to quit immediately and successfully 62 var errNoMoreOutput = errors.New(`no more output`) 63 64 type stringSet map[string]struct{} 65 66 func main() { 67 buffered := false 68 args := os.Args[1:] 69 70 if len(args) > 0 { 71 switch args[0] { 72 case `-h`, `--h`, `-help`, `--help`: 73 os.Stderr.WriteString(info[1:]) 74 return 75 76 case `-buffered`, `--buffered`: 77 buffered = true 78 args = args[1:] 79 } 80 } 81 82 if len(args) > 0 && args[0] == `--` { 83 args = args[1:] 84 } 85 86 liveLines := true 87 if !buffered { 88 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 89 liveLines = false 90 } 91 } 92 93 err := run(os.Stdout, args, liveLines) 94 if err != nil && err != errNoMoreOutput { 95 os.Stderr.WriteString(err.Error()) 96 os.Stderr.WriteString("\n") 97 os.Exit(1) 98 } 99 } 100 101 func run(w io.Writer, args []string, live bool) error { 102 files := make(stringSet) 103 lines := make(stringSet) 104 bw := bufio.NewWriter(w) 105 defer bw.Flush() 106 107 for _, name := range args { 108 if _, ok := files[name]; ok { 109 continue 110 } 111 files[name] = struct{}{} 112 113 if err := handleFile(bw, name, lines, live); err != nil { 114 return err 115 } 116 } 117 118 if len(args) == 0 { 119 return dedup(bw, os.Stdin, lines, live) 120 } 121 return nil 122 } 123 124 func handleFile(w *bufio.Writer, name string, got stringSet, live bool) error { 125 if name == `` || name == `-` { 126 return dedup(w, os.Stdin, got, live) 127 } 128 129 f, err := os.Open(name) 130 if err != nil { 131 return errors.New(`can't read from file named "` + name + `"`) 132 } 133 defer f.Close() 134 135 return dedup(w, f, got, live) 136 } 137 138 func dedup(w *bufio.Writer, r io.Reader, got stringSet, live bool) error { 139 const gb = 1024 * 1024 * 1024 140 sc := bufio.NewScanner(r) 141 sc.Buffer(nil, 8*gb) 142 143 for sc.Scan() { 144 line := sc.Text() 145 if _, ok := got[line]; ok { 146 continue 147 } 148 got[line] = struct{}{} 149 150 w.Write(sc.Bytes()) 151 if w.WriteByte('\n') != nil { 152 return errNoMoreOutput 153 } 154 155 if !live { 156 continue 157 } 158 159 if err := w.Flush(); err != nil { 160 return errNoMoreOutput 161 } 162 } 163 164 return sc.Err() 165 }