File: unique/info.txt 1 unique [options...] [filepaths...] 2 3 4 Emit each line only once, ignoring later repetitions, if they happen. When 5 not given any filenames, the default is to read lines from standard input. 6 7 Options can start either with a single dash or 2 dashes: 8 9 -h show this help message 10 -help same as option `-h` 11 12 -i match/compare lines case-insensitively 13 -insensitive same as option `-i` File: unique/main.go 1 package main 2 3 import ( 4 "bufio" 5 "io" 6 "os" 7 "strings" 8 9 _ "embed" 10 ) 11 12 //go:embed info.txt 13 var info string 14 15 func main() { 16 if len(os.Args) > 1 { 17 switch os.Args[1] { 18 case `-h`, `--h`, `-help`, `--help`: 19 os.Stderr.WriteString(info) 20 return 21 } 22 } 23 24 args := os.Args[1:] 25 recase := identity 26 27 for len(args) > 0 { 28 switch args[0] { 29 case `-i`, `--i`, `-insensitive`, `--insensitive`: 30 recase = strings.ToLower 31 args = args[1:] 32 continue 33 } 34 35 break 36 } 37 38 err := run(args, recase) 39 if _, ok := err.(noMoreOutput); ok { 40 return 41 } 42 43 if err != nil { 44 os.Stderr.WriteString("\x1b[31m") 45 os.Stderr.WriteString(err.Error()) 46 os.Stderr.WriteString("\x1b[0m\n") 47 os.Exit(1) 48 } 49 } 50 51 func identity(s string) string { 52 return s 53 } 54 55 type noMoreOutput struct{} 56 57 func (nmo noMoreOutput) Error() string { 58 return `no more output` 59 } 60 61 type tooManyDashes struct{} 62 63 func (tmd tooManyDashes) Error() string { 64 return `can't use standard input (dash) more than once` 65 } 66 67 func run(paths []string, recase func(string) string) error { 68 // forbid multiple uses of stdin 69 dash := false 70 for _, p := range paths { 71 if p == `-` { 72 if dash { 73 return tooManyDashes{} 74 } 75 dash = true 76 } 77 } 78 79 got := make(map[string]struct{}) 80 args := uniqueArgs{got, recase} 81 w := bufio.NewWriter(os.Stdout) 82 defer w.Flush() 83 84 for _, p := range paths { 85 err := withFile(p, func(r io.Reader) error { 86 return unique(w, r, args) 87 }) 88 89 if err != nil { 90 return err 91 } 92 } 93 94 if len(paths) == 0 { 95 unique(w, os.Stdin, args) 96 } 97 return nil 98 } 99 100 func withFile(path string, fn func(r io.Reader) error) error { 101 if path == `-` { 102 return fn(os.Stdin) 103 } 104 105 f, err := os.Open(path) 106 if err != nil { 107 return err 108 } 109 defer f.Close() 110 111 return fn(f) 112 } 113 114 type uniqueArgs struct { 115 got map[string]struct{} 116 recase func(string) string 117 } 118 119 func unique(w *bufio.Writer, r io.Reader, args uniqueArgs) error { 120 const gb = 1024 * 1024 * 1024 121 sc := bufio.NewScanner(r) 122 sc.Buffer(nil, 8*gb) 123 124 for i := 0; sc.Scan(); i++ { 125 line := sc.Text() 126 if i == 0 { 127 line = strings.TrimPrefix(line, "\xef\xbb\xbf") 128 } 129 130 s := args.recase(line) 131 if _, ok := args.got[s]; ok { 132 continue 133 } 134 135 w.WriteString(line) 136 if err := w.WriteByte('\n'); err != nil { 137 return noMoreOutput{} 138 } 139 140 args.got[s] = struct{}{} 141 } 142 143 return sc.Err() 144 }