File: dedup.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath dedup.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "errors" 37 "io" 38 "os" 39 ) 40 41 const info = ` 42 dedup [options...] [file...] 43 44 45 DEDUPlicate lines prevents the same line from appearing again in the output, 46 after the first time. Unique lines are remembered across inputs. 47 48 Input is assumed to be UTF-8, and all CRLF byte-pairs are turned into line 49 feeds by default. 50 51 All (optional) leading options start with either single or double-dash: 52 53 -h, -help show this help message 54 ` 55 56 type stringSet map[string]struct{} 57 58 func main() { 59 buffered := false 60 args := os.Args[1:] 61 62 if len(args) > 0 { 63 switch args[0] { 64 case `-b`, `--b`, `-buffered`, `--buffered`: 65 buffered = true 66 args = args[1:] 67 68 case `-h`, `--h`, `-help`, `--help`: 69 os.Stdout.WriteString(info[1:]) 70 return 71 } 72 } 73 74 if len(args) > 0 && args[0] == `--` { 75 args = args[1:] 76 } 77 78 liveLines := !buffered 79 if !buffered { 80 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 81 liveLines = false 82 } 83 } 84 85 err := run(os.Stdout, args, liveLines) 86 if err != nil && err != io.EOF { 87 os.Stderr.WriteString(err.Error()) 88 os.Stderr.WriteString("\n") 89 os.Exit(1) 90 } 91 } 92 93 func run(w io.Writer, args []string, live bool) error { 94 files := make(stringSet) 95 lines := make(stringSet) 96 bw := bufio.NewWriter(w) 97 defer bw.Flush() 98 99 for _, name := range args { 100 if _, ok := files[name]; ok { 101 continue 102 } 103 files[name] = struct{}{} 104 105 if err := handleFile(bw, name, lines, live); err != nil { 106 return err 107 } 108 } 109 110 if len(args) == 0 { 111 return dedup(bw, os.Stdin, lines, live) 112 } 113 return nil 114 } 115 116 func handleFile(w *bufio.Writer, name string, got stringSet, live bool) error { 117 if name == `` || name == `-` { 118 return dedup(w, os.Stdin, got, live) 119 } 120 121 f, err := os.Open(name) 122 if err != nil { 123 return errors.New(`can't read from file named "` + name + `"`) 124 } 125 defer f.Close() 126 127 return dedup(w, f, got, live) 128 } 129 130 func dedup(w *bufio.Writer, r io.Reader, got stringSet, live bool) error { 131 const gb = 1024 * 1024 * 1024 132 sc := bufio.NewScanner(r) 133 sc.Buffer(nil, 8*gb) 134 135 for sc.Scan() { 136 line := sc.Text() 137 if _, ok := got[line]; ok { 138 continue 139 } 140 got[line] = struct{}{} 141 142 w.Write(sc.Bytes()) 143 if w.WriteByte('\n') != nil { 144 return io.EOF 145 } 146 147 if !live { 148 continue 149 } 150 151 if err := w.Flush(); err != nil { 152 return io.EOF 153 } 154 } 155 156 return sc.Err() 157 }