File: erase.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath erase.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "bytes"
  37     "io"
  38     "os"
  39     "regexp"
  40 )
  41 
  42 const info = `
  43 erase [options...] [regexes...]
  44 
  45 
  46 Ignore/remove all occurrences of all regex matches along lines read from the
  47 standard input. The regular-expression mode used is "re2", which is a superset
  48 of the commonly-used "extended-mode".
  49 
  50 All ANSI-style sequences are removed before trying to match-remove things, to
  51 avoid messing those up. Each regex erases all its occurrences on the current
  52 line in the order given among the arguments, so regex-order matters.
  53 
  54 The options are, available both in single and double-dash versions
  55 
  56     -h, -help    show this help message
  57     -i, -ins     match regexes case-insensitively
  58 `
  59 
  60 func main() {
  61     args := os.Args[1:]
  62     buffered := false
  63     insensitive := false
  64 
  65     for len(args) > 0 {
  66         switch args[0] {
  67         case `-b`, `--b`, `-buffered`, `--buffered`:
  68             buffered = true
  69             args = args[1:]
  70 
  71         case `-h`, `--h`, `-help`, `--help`:
  72             os.Stdout.WriteString(info[1:])
  73             return
  74 
  75         case `-i`, `--i`, `-ins`, `--ins`:
  76             insensitive = true
  77             args = args[1:]
  78         }
  79 
  80         break
  81     }
  82 
  83     if len(args) > 0 && args[0] == `--` {
  84         args = args[1:]
  85     }
  86 
  87     exprs := make([]*regexp.Regexp, 0, len(args))
  88 
  89     for _, s := range args {
  90         var err error
  91         var exp *regexp.Regexp
  92 
  93         if insensitive {
  94             exp, err = regexp.Compile(`(?i)` + s)
  95         } else {
  96             exp, err = regexp.Compile(s)
  97         }
  98 
  99         if err != nil {
 100             os.Stderr.WriteString(err.Error())
 101             os.Stderr.WriteString("\n")
 102             continue
 103         }
 104 
 105         exprs = append(exprs, exp)
 106     }
 107 
 108     // quit right away when given invalid regexes
 109     if len(exprs) < len(args) {
 110         os.Exit(1)
 111     }
 112 
 113     liveLines := !buffered
 114     if !buffered {
 115         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
 116             liveLines = false
 117         }
 118     }
 119 
 120     err := run(os.Stdout, os.Stdin, exprs, liveLines)
 121     if err != nil && err != io.EOF {
 122         os.Stderr.WriteString(err.Error())
 123         os.Stderr.WriteString("\n")
 124         os.Exit(1)
 125     }
 126 }
 127 
 128 func run(w io.Writer, r io.Reader, exprs []*regexp.Regexp, live bool) error {
 129     var buf []byte
 130     sc := bufio.NewScanner(r)
 131     sc.Buffer(nil, 8*1024*1024*1024)
 132     bw := bufio.NewWriter(w)
 133     defer bw.Flush()
 134 
 135     src := make([]byte, 8*1024)
 136     dst := make([]byte, 8*1024)
 137 
 138     for i := 0; sc.Scan(); i++ {
 139         line := sc.Bytes()
 140         if i == 0 && bytes.HasPrefix(line, []byte{0xef, 0xbb, 0xbf}) {
 141             line = line[3:]
 142         }
 143 
 144         s := line
 145         if bytes.IndexByte(s, '\x1b') >= 0 {
 146             buf = plain(buf[:0], s)
 147             s = buf
 148         }
 149 
 150         if len(exprs) > 0 {
 151             src = append(src[:0], s...)
 152             for _, exp := range exprs {
 153                 dst = erase(dst[:0], src, exp)
 154                 src = append(src[:0], dst...)
 155             }
 156             bw.Write(dst)
 157         } else {
 158             bw.Write(s)
 159         }
 160 
 161         if bw.WriteByte('\n') != nil {
 162             return io.EOF
 163         }
 164 
 165         if !live {
 166             continue
 167         }
 168 
 169         if bw.Flush() != nil {
 170             return io.EOF
 171         }
 172     }
 173 
 174     return sc.Err()
 175 }
 176 
 177 func erase(dst []byte, src []byte, with *regexp.Regexp) []byte {
 178     for len(src) > 0 {
 179         span := with.FindIndex(src)
 180         // also ignore empty regex matches to avoid infinite outer loops,
 181         // as skipping empty slices isn't advancing at all, leaving the
 182         // string stuck to being empty-matched forever by the same regex
 183         if len(span) != 2 || span[0] == span[1] || span[0] < 0 {
 184             return append(dst, src...)
 185         }
 186 
 187         start, end := span[0], span[1]
 188         dst = append(dst, src[:start]...)
 189         // avoid infinite loops caused by empty regex matches
 190         if start == end && end < len(src) {
 191             dst = append(dst, src[end])
 192             end++
 193         }
 194         src = src[end:]
 195     }
 196 
 197     return dst
 198 }
 199 
 200 func plain(dst []byte, src []byte) []byte {
 201     for len(src) > 0 {
 202         i, j := indexEscapeSequence(src)
 203         if i < 0 {
 204             dst = append(dst, src...)
 205             break
 206         }
 207         if j < 0 {
 208             j = len(src)
 209         }
 210 
 211         if i > 0 {
 212             dst = append(dst, src[:i]...)
 213         }
 214 
 215         src = src[j:]
 216     }
 217 
 218     return dst
 219 }
 220 
 221 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 222 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 223 // indices which can be independently negative when either the start/end of
 224 // a sequence isn't found; given their fairly-common use, even the hyperlink
 225 // ESC]8 sequences are supported
 226 func indexEscapeSequence(s []byte) (int, int) {
 227     var prev byte
 228 
 229     for i, b := range s {
 230         if prev == '\x1b' && b == '[' {
 231             j := indexLetter(s[i+1:])
 232             if j < 0 {
 233                 return i, -1
 234             }
 235             return i - 1, i + 1 + j + 1
 236         }
 237 
 238         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 239             j := indexPair(s[i+1:], '\x1b', '\\')
 240             if j < 0 {
 241                 return i, -1
 242             }
 243             return i - 1, i + 1 + j + 2
 244         }
 245 
 246         prev = b
 247     }
 248 
 249     return -1, -1
 250 }
 251 
 252 func indexLetter(s []byte) int {
 253     for i, b := range s {
 254         upper := b &^ 32
 255         if 'A' <= upper && upper <= 'Z' {
 256             return i
 257         }
 258     }
 259 
 260     return -1
 261 }
 262 
 263 func indexPair(s []byte, x byte, y byte) int {
 264     var prev byte
 265 
 266     for i, b := range s {
 267         if prev == x && b == y && i > 0 {
 268             return i
 269         }
 270         prev = b
 271     }
 272 
 273     return -1
 274 }