File: erase.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath erase.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "io"
  37     "os"
  38     "regexp"
  39 )
  40 
  41 const info = `
  42 erase [options...] [regexes...]
  43 
  44 
  45 Ignore/remove all occurrences of all regex matches along lines read from the
  46 standard input. The regular-expression mode used is "re2", which is a superset
  47 of the commonly-used "extended-mode".
  48 
  49 Regexes always avoid matching any ANSI-style sequences, to avoid messing those
  50 up. Each regex erases all its occurrences on the current line in the order
  51 given among the arguments, so regex-order matters.
  52 
  53 The options are, available both in single and double-dash versions
  54 
  55     -h      show this help message
  56     -help   show this help message
  57 
  58     -i      match regexes case-insensitively
  59     -ins    match regexes case-insensitively
  60 `
  61 
  62 func main() {
  63     args := os.Args[1:]
  64     buffered := false
  65     insensitive := false
  66 
  67 out:
  68     for len(args) > 0 {
  69         switch args[0] {
  70         case `-b`, `--b`, `-buffered`, `--buffered`:
  71             buffered = true
  72             args = args[1:]
  73 
  74         case `-h`, `--h`, `-help`, `--help`:
  75             os.Stdout.WriteString(info[1:])
  76             return
  77 
  78         case `-i`, `--i`, `-ins`, `--ins`:
  79             insensitive = true
  80             args = args[1:]
  81 
  82         default:
  83             break out
  84         }
  85     }
  86 
  87     if len(args) > 0 && args[0] == `--` {
  88         args = args[1:]
  89     }
  90 
  91     exprs := make([]*regexp.Regexp, 0, len(args))
  92 
  93     for _, s := range args {
  94         var err error
  95         var exp *regexp.Regexp
  96 
  97         if insensitive {
  98             exp, err = regexp.Compile(`(?i)` + s)
  99         } else {
 100             exp, err = regexp.Compile(s)
 101         }
 102 
 103         if err != nil {
 104             os.Stderr.WriteString(err.Error())
 105             os.Stderr.WriteString("\n")
 106             continue
 107         }
 108 
 109         exprs = append(exprs, exp)
 110     }
 111 
 112     // quit right away when given invalid regexes
 113     if len(exprs) < len(args) {
 114         os.Exit(1)
 115     }
 116 
 117     liveLines := !buffered
 118     if !buffered {
 119         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
 120             liveLines = false
 121         }
 122     }
 123 
 124     if err := run(os.Stdout, os.Stdin, exprs, liveLines); err != nil {
 125         os.Stderr.WriteString(err.Error())
 126         os.Stderr.WriteString("\n")
 127         os.Exit(1)
 128     }
 129 }
 130 
 131 func run(w io.Writer, r io.Reader, exprs []*regexp.Regexp, live bool) error {
 132     sc := bufio.NewScanner(r)
 133     sc.Buffer(nil, 8*1024*1024*1024)
 134     bw := bufio.NewWriter(w)
 135     defer bw.Flush()
 136 
 137     var src []byte
 138     var dst []byte
 139 
 140     for i := 0; sc.Scan(); i++ {
 141         s := sc.Bytes()
 142         if i == 0 && len(s) > 2 && s[0] == 0xef && s[1] == 0xbb && s[2] == 0xbf {
 143             s = s[3:]
 144         }
 145 
 146         src = append(src[:0], s...)
 147         for _, e := range exprs {
 148             dst = erase(dst[:0], src, e)
 149             src = append(src[:0], dst...)
 150         }
 151 
 152         bw.Write(dst)
 153 
 154         if err := bw.WriteByte('\n'); err != nil {
 155             return nil
 156         }
 157 
 158         if !live {
 159             continue
 160         }
 161 
 162         if err := bw.Flush(); err != nil {
 163             return nil
 164         }
 165     }
 166 
 167     return sc.Err()
 168 }
 169 
 170 func erase(dst []byte, src []byte, with *regexp.Regexp) []byte {
 171     for len(src) > 0 {
 172         i, j := indexEscapeSequence(src)
 173         if i < 0 {
 174             dst = handleLineChunk(dst, src, with)
 175             break
 176         }
 177         if j < 0 {
 178             j = len(src)
 179         }
 180 
 181         dst = handleLineChunk(dst, src[:i], with)
 182         dst = append(dst, src[i:j]...)
 183         src = src[j:]
 184     }
 185 
 186     return dst
 187 }
 188 
 189 func handleLineChunk(dst []byte, src []byte, with *regexp.Regexp) []byte {
 190     for len(src) > 0 {
 191         span := with.FindIndex(src)
 192         if span == nil {
 193             return append(dst, src...)
 194         }
 195 
 196         start := span[0]
 197         end := span[1]
 198 
 199         dst = append(dst, src[:start]...)
 200         // avoid infinite loops caused by empty regex matches
 201         if start == end && end < len(src) {
 202             dst = append(dst, src[end])
 203             end++
 204         }
 205         src = src[end:]
 206     }
 207 
 208     return dst
 209 }
 210 
 211 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 212 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 213 // indices which can be independently negative when either the start/end of
 214 // a sequence isn't found; given their fairly-common use, even the hyperlink
 215 // ESC]8 sequences are supported
 216 func indexEscapeSequence(s []byte) (int, int) {
 217     var prev byte
 218 
 219     for i, b := range s {
 220         if prev == '\x1b' && b == '[' {
 221             j := indexLetter(s[i+1:])
 222             if j < 0 {
 223                 return i, -1
 224             }
 225             return i - 1, i + 1 + j + 1
 226         }
 227 
 228         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 229             j := indexPair(s[i+1:], '\x1b', '\\')
 230             if j < 0 {
 231                 return i, -1
 232             }
 233             return i - 1, i + 1 + j + 2
 234         }
 235 
 236         prev = b
 237     }
 238 
 239     return -1, -1
 240 }
 241 
 242 func indexLetter(s []byte) int {
 243     for i, b := range s {
 244         upper := b &^ 32
 245         if 'A' <= upper && upper <= 'Z' {
 246             return i
 247         }
 248     }
 249 
 250     return -1
 251 }
 252 
 253 func indexPair(s []byte, x byte, y byte) int {
 254     var prev byte
 255 
 256     for i, b := range s {
 257         if prev == x && b == y {
 258             return i
 259         }
 260         prev = b
 261     }
 262 
 263     return -1
 264 }