File: dedup.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath dedup.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "errors" 37 "io" 38 "os" 39 ) 40 41 const info = ` 42 dedup [options...] [file...] 43 44 45 DEDUPlicate lines prevents the same line from appearing again in the output, 46 after the first time. Unique lines are remembered across inputs. 47 48 Input is assumed to be UTF-8, and all CRLF byte-pairs are turned into line 49 feeds by default. 50 51 All (optional) leading options start with either single or double-dash: 52 53 -h, -help show this help message 54 ` 55 56 type stringSet map[string]struct{} 57 58 func main() { 59 buffered := false 60 args := os.Args[1:] 61 62 if len(args) > 0 { 63 switch args[0] { 64 case `-b`, `--b`, `-buffered`, `--buffered`: 65 buffered = true 66 args = args[1:] 67 68 case `-h`, `--h`, `-help`, `--help`: 69 os.Stdout.WriteString(info[1:]) 70 return 71 } 72 } 73 74 if len(args) > 0 && args[0] == `--` { 75 args = args[1:] 76 } 77 78 liveLines := !buffered 79 if !buffered { 80 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 81 liveLines = false 82 } 83 } 84 85 err := run(os.Stdout, args, liveLines) 86 if err != nil && err != io.EOF { 87 os.Stderr.WriteString(err.Error()) 88 os.Stderr.WriteString("\n") 89 os.Exit(1) 90 return 91 } 92 } 93 94 func run(w io.Writer, args []string, live bool) error { 95 files := make(stringSet) 96 lines := make(stringSet) 97 bw := bufio.NewWriter(w) 98 defer bw.Flush() 99 100 for _, name := range args { 101 if _, ok := files[name]; ok { 102 continue 103 } 104 files[name] = struct{}{} 105 106 if err := handleFile(bw, name, lines, live); err != nil { 107 return err 108 } 109 } 110 111 if len(args) == 0 { 112 return dedup(bw, os.Stdin, lines, live) 113 } 114 return nil 115 } 116 117 func handleFile(w *bufio.Writer, name string, got stringSet, live bool) error { 118 if name == `` || name == `-` { 119 return dedup(w, os.Stdin, got, live) 120 } 121 122 f, err := os.Open(name) 123 if err != nil { 124 return errors.New(`can't read from file named "` + name + `"`) 125 } 126 defer f.Close() 127 128 return dedup(w, f, got, live) 129 } 130 131 func dedup(w *bufio.Writer, r io.Reader, got stringSet, live bool) error { 132 const gb = 1024 * 1024 * 1024 133 sc := bufio.NewScanner(r) 134 sc.Buffer(nil, 8*gb) 135 136 for sc.Scan() { 137 line := sc.Text() 138 if _, ok := got[line]; ok { 139 continue 140 } 141 got[line] = struct{}{} 142 143 w.Write(sc.Bytes()) 144 if w.WriteByte('\n') != nil { 145 return io.EOF 146 } 147 148 if !live { 149 continue 150 } 151 152 if err := w.Flush(); err != nil { 153 return io.EOF 154 } 155 } 156 157 return sc.Err() 158 }