File: erase.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath erase.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "bytes"
  37     "io"
  38     "os"
  39     "regexp"
  40 )
  41 
  42 const info = `
  43 erase [options...] [regexes...]
  44 
  45 
  46 Ignore/remove all occurrences of all regex matches along lines read from the
  47 standard input. The regular-expression mode used is "re2", which is a superset
  48 of the commonly-used "extended-mode".
  49 
  50 All ANSI-style sequences are removed before trying to match-remove things, to
  51 avoid messing those up. Each regex erases all its occurrences on the current
  52 line in the order given among the arguments, so regex-order matters.
  53 
  54 The options are, available both in single and double-dash versions
  55 
  56     -h, -help    show this help message
  57     -i, -ins     match regexes case-insensitively
  58 `
  59 
  60 func main() {
  61     args := os.Args[1:]
  62     buffered := false
  63     insensitive := false
  64 
  65     for len(args) > 0 {
  66         switch args[0] {
  67         case `-b`, `--b`, `-buffered`, `--buffered`:
  68             buffered = true
  69             args = args[1:]
  70             continue
  71 
  72         case `-h`, `--h`, `-help`, `--help`:
  73             os.Stdout.WriteString(info[1:])
  74             return
  75 
  76         case `-i`, `--i`, `-ins`, `--ins`:
  77             insensitive = true
  78             args = args[1:]
  79             continue
  80         }
  81 
  82         break
  83     }
  84 
  85     if len(args) > 0 && args[0] == `--` {
  86         args = args[1:]
  87     }
  88 
  89     exprs := make([]*regexp.Regexp, 0, len(args))
  90 
  91     for _, s := range args {
  92         var err error
  93         var exp *regexp.Regexp
  94 
  95         if insensitive {
  96             exp, err = regexp.Compile(`(?i)` + s)
  97         } else {
  98             exp, err = regexp.Compile(s)
  99         }
 100 
 101         if err != nil {
 102             os.Stderr.WriteString(err.Error())
 103             os.Stderr.WriteString("\n")
 104             continue
 105         }
 106 
 107         exprs = append(exprs, exp)
 108     }
 109 
 110     // quit right away when given invalid regexes
 111     if len(exprs) < len(args) {
 112         os.Exit(1)
 113     }
 114 
 115     liveLines := !buffered
 116     if !buffered {
 117         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
 118             liveLines = false
 119         }
 120     }
 121 
 122     err := run(os.Stdout, os.Stdin, exprs, liveLines)
 123     if err != nil && err != io.EOF {
 124         os.Stderr.WriteString(err.Error())
 125         os.Stderr.WriteString("\n")
 126         os.Exit(1)
 127     }
 128 }
 129 
 130 func run(w io.Writer, r io.Reader, exprs []*regexp.Regexp, live bool) error {
 131     var buf []byte
 132     sc := bufio.NewScanner(r)
 133     sc.Buffer(nil, 8*1024*1024*1024)
 134     bw := bufio.NewWriter(w)
 135     defer bw.Flush()
 136 
 137     src := make([]byte, 8*1024)
 138     dst := make([]byte, 8*1024)
 139 
 140     for i := 0; sc.Scan(); i++ {
 141         line := sc.Bytes()
 142         if i == 0 && bytes.HasPrefix(line, []byte{0xef, 0xbb, 0xbf}) {
 143             line = line[3:]
 144         }
 145 
 146         s := line
 147         if bytes.IndexByte(s, '\x1b') >= 0 {
 148             buf = plain(buf[:0], s)
 149             s = buf
 150         }
 151 
 152         if len(exprs) > 0 {
 153             src = append(src[:0], s...)
 154             for _, exp := range exprs {
 155                 dst = erase(dst[:0], src, exp)
 156                 src = append(src[:0], dst...)
 157             }
 158             bw.Write(dst)
 159         } else {
 160             bw.Write(s)
 161         }
 162 
 163         if bw.WriteByte('\n') != nil {
 164             return io.EOF
 165         }
 166 
 167         if !live {
 168             continue
 169         }
 170 
 171         if bw.Flush() != nil {
 172             return io.EOF
 173         }
 174     }
 175 
 176     return sc.Err()
 177 }
 178 
 179 func erase(dst []byte, src []byte, with *regexp.Regexp) []byte {
 180     for len(src) > 0 {
 181         span := with.FindIndex(src)
 182         // also ignore empty regex matches to avoid infinite outer loops,
 183         // as skipping empty slices isn't advancing at all, leaving the
 184         // string stuck to being empty-matched forever by the same regex
 185         if len(span) != 2 || span[0] == span[1] || span[0] < 0 {
 186             return append(dst, src...)
 187         }
 188 
 189         start, end := span[0], span[1]
 190         dst = append(dst, src[:start]...)
 191         // avoid infinite loops caused by empty regex matches
 192         if start == end && end < len(src) {
 193             dst = append(dst, src[end])
 194             end++
 195         }
 196         src = src[end:]
 197     }
 198 
 199     return dst
 200 }
 201 
 202 func plain(dst []byte, src []byte) []byte {
 203     for len(src) > 0 {
 204         i, j := indexEscapeSequence(src)
 205         if i < 0 {
 206             dst = append(dst, src...)
 207             break
 208         }
 209         if j < 0 {
 210             j = len(src)
 211         }
 212 
 213         if i > 0 {
 214             dst = append(dst, src[:i]...)
 215         }
 216 
 217         src = src[j:]
 218     }
 219 
 220     return dst
 221 }
 222 
 223 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 224 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 225 // indices which can be independently negative when either the start/end of
 226 // a sequence isn't found; given their fairly-common use, even the hyperlink
 227 // ESC]8 sequences are supported
 228 func indexEscapeSequence(s []byte) (int, int) {
 229     var prev byte
 230 
 231     for i, b := range s {
 232         if prev == '\x1b' && b == '[' {
 233             j := indexLetter(s[i+1:])
 234             if j < 0 {
 235                 return i, -1
 236             }
 237             return i - 1, i + 1 + j + 1
 238         }
 239 
 240         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 241             j := indexPair(s[i+1:], '\x1b', '\\')
 242             if j < 0 {
 243                 return i, -1
 244             }
 245             return i - 1, i + 1 + j + 2
 246         }
 247 
 248         prev = b
 249     }
 250 
 251     return -1, -1
 252 }
 253 
 254 func indexLetter(s []byte) int {
 255     for i, b := range s {
 256         upper := b &^ 32
 257         if 'A' <= upper && upper <= 'Z' {
 258             return i
 259         }
 260     }
 261 
 262     return -1
 263 }
 264 
 265 func indexPair(s []byte, x byte, y byte) int {
 266     var prev byte
 267 
 268     for i, b := range s {
 269         if prev == x && b == y && i > 0 {
 270             return i
 271         }
 272         prev = b
 273     }
 274 
 275     return -1
 276 }