File: unique/info.txt
   1 unique [options...] [filepaths...]
   2 
   3 
   4 Emit each line only once, ignoring later repetitions, if they happen. When
   5 not given any filenames, the default is to read lines from standard input.
   6 
   7 Options can start either with a single dash or 2 dashes:
   8 
   9     -h              show this help message
  10     -help           same as option `-h`
  11 
  12     -i              match/compare lines case-insensitively
  13     -insensitive    same as option `-i`

     File: unique/main.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "io"
   6     "os"
   7     "strings"
   8 
   9     _ "embed"
  10 )
  11 
  12 //go:embed info.txt
  13 var info string
  14 
  15 func main() {
  16     if len(os.Args) > 1 {
  17         switch os.Args[1] {
  18         case `-h`, `--h`, `-help`, `--help`:
  19             os.Stderr.WriteString(info)
  20             return
  21         }
  22     }
  23 
  24     args := os.Args[1:]
  25     recase := identity
  26 
  27     for len(args) > 0 {
  28         switch args[0] {
  29         case `-i`, `--i`, `-insensitive`, `--insensitive`:
  30             recase = strings.ToLower
  31             args = args[1:]
  32             continue
  33         }
  34 
  35         break
  36     }
  37 
  38     err := run(args, recase)
  39     if _, ok := err.(noMoreOutput); ok {
  40         return
  41     }
  42 
  43     if err != nil {
  44         os.Stderr.WriteString("\x1b[31m")
  45         os.Stderr.WriteString(err.Error())
  46         os.Stderr.WriteString("\x1b[0m\n")
  47         os.Exit(1)
  48     }
  49 }
  50 
  51 func identity(s string) string {
  52     return s
  53 }
  54 
  55 type noMoreOutput struct{}
  56 
  57 func (nmo noMoreOutput) Error() string {
  58     return `no more output`
  59 }
  60 
  61 type tooManyDashes struct{}
  62 
  63 func (tmd tooManyDashes) Error() string {
  64     return `can't use standard input (dash) more than once`
  65 }
  66 
  67 func run(paths []string, recase func(string) string) error {
  68     // forbid multiple uses of stdin
  69     dash := false
  70     for _, p := range paths {
  71         if p == `-` {
  72             if dash {
  73                 return tooManyDashes{}
  74             }
  75             dash = true
  76         }
  77     }
  78 
  79     got := make(map[string]struct{})
  80     args := uniqueArgs{got, recase}
  81     w := bufio.NewWriter(os.Stdout)
  82     defer w.Flush()
  83 
  84     for _, p := range paths {
  85         err := withFile(p, func(r io.Reader) error {
  86             return unique(w, r, args)
  87         })
  88 
  89         if err != nil {
  90             return err
  91         }
  92     }
  93 
  94     if len(paths) == 0 {
  95         unique(w, os.Stdin, args)
  96     }
  97     return nil
  98 }
  99 
 100 func withFile(path string, fn func(r io.Reader) error) error {
 101     if path == `-` {
 102         return fn(os.Stdin)
 103     }
 104 
 105     f, err := os.Open(path)
 106     if err != nil {
 107         return err
 108     }
 109     defer f.Close()
 110 
 111     return fn(f)
 112 }
 113 
 114 type uniqueArgs struct {
 115     got    map[string]struct{}
 116     recase func(string) string
 117 }
 118 
 119 func unique(w *bufio.Writer, r io.Reader, args uniqueArgs) error {
 120     const gb = 1024 * 1024 * 1024
 121     sc := bufio.NewScanner(r)
 122     sc.Buffer(nil, 8*gb)
 123 
 124     for i := 0; sc.Scan(); i++ {
 125         line := sc.Text()
 126         if i == 0 {
 127             line = strings.TrimPrefix(line, "\xef\xbb\xbf")
 128         }
 129 
 130         s := args.recase(line)
 131         if _, ok := args.got[s]; ok {
 132             continue
 133         }
 134 
 135         w.WriteString(line)
 136         if err := w.WriteByte('\n'); err != nil {
 137             return noMoreOutput{}
 138         }
 139 
 140         args.got[s] = struct{}{}
 141     }
 142 
 143     return sc.Err()
 144 }