File: erase.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath erase.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "bytes"
  37     "io"
  38     "os"
  39     "regexp"
  40     "strings"
  41 )
  42 
  43 const info = `
  44 erase [options...] [regexes...]
  45 
  46 
  47 Ignore/remove all occurrences of all regex matches along lines read from the
  48 standard input. The regular-expression mode used is "re2", which is a superset
  49 of the commonly-used "extended-mode".
  50 
  51 Regexes always avoid matching any ANSI-style sequences, to avoid messing those
  52 up. Each regex erases all its occurrences on the current line in the order
  53 given among the arguments, so regex-order matters.
  54 
  55 The options are, available both in single and double-dash versions
  56 
  57     -h      show this help message
  58     -help   show this help message
  59 
  60     -i      match regexes case-insensitively
  61     -ins    match regexes case-insensitively
  62 `
  63 
  64 func main() {
  65     args := os.Args[1:]
  66     buffered := false
  67     insensitive := false
  68 
  69     for len(args) > 0 {
  70         switch args[0] {
  71         case `-b`, `--b`, `-buffered`, `--buffered`:
  72             buffered = true
  73             args = args[1:]
  74 
  75         case `-h`, `--h`, `-help`, `--help`:
  76             os.Stdout.WriteString(info[1:])
  77             return
  78 
  79         case `-i`, `--i`, `-ins`, `--ins`:
  80             insensitive = true
  81             args = args[1:]
  82         }
  83 
  84         break
  85     }
  86 
  87     if len(args) > 0 && args[0] == `--` {
  88         args = args[1:]
  89     }
  90 
  91     patterns := make([]pattern, 0, len(args))
  92 
  93     for _, s := range args {
  94         var err error
  95         var pat pattern
  96 
  97         if insensitive {
  98             pat, err = compile(`(?i)` + s)
  99         } else {
 100             pat, err = compile(s)
 101         }
 102 
 103         if err != nil {
 104             os.Stderr.WriteString(err.Error())
 105             os.Stderr.WriteString("\n")
 106             continue
 107         }
 108 
 109         patterns = append(patterns, pat)
 110     }
 111 
 112     // quit right away when given invalid regexes
 113     if len(patterns) < len(args) {
 114         os.Exit(1)
 115     }
 116 
 117     liveLines := !buffered
 118     if !buffered {
 119         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
 120             liveLines = false
 121         }
 122     }
 123 
 124     if err := run(os.Stdout, os.Stdin, patterns, liveLines); err != nil && err != io.EOF {
 125         os.Stderr.WriteString(err.Error())
 126         os.Stderr.WriteString("\n")
 127         os.Exit(1)
 128     }
 129 }
 130 
 131 // pattern is a regular-expression pattern which distinguishes between the
 132 // start/end of a line and those of the chunks it can be used to match
 133 type pattern struct {
 134     // expr is the regular-expression
 135     expr *regexp.Regexp
 136 
 137     // begin is whether the regexp refers to the start of a line
 138     begin bool
 139 
 140     // end is whether the regexp refers to the end of a line
 141     end bool
 142 }
 143 
 144 func compile(src string) (pattern, error) {
 145     expr, err := regexp.Compile(src)
 146 
 147     var pat pattern
 148     pat.expr = expr
 149     pat.begin = strings.HasPrefix(src, `^`) || strings.HasPrefix(src, `(?i)^`)
 150     pat.end = strings.HasSuffix(src, `$`) && !strings.HasSuffix(src, `\$`)
 151     return pat, err
 152 }
 153 
 154 func (p pattern) findIndex(s []byte, i int, last int) (start int, stop int) {
 155     if i > 0 && p.begin {
 156         return -1, -1
 157     }
 158     if i != last && p.end {
 159         return -1, -1
 160     }
 161 
 162     span := p.expr.FindIndex(s)
 163     // also ignore empty regex matches to avoid infinite outer loops,
 164     // as skipping empty slices isn't advancing at all, leaving the
 165     // string stuck to being empty-matched forever by the same regex
 166     if len(span) != 2 || span[0] == span[1] {
 167         return -1, -1
 168     }
 169 
 170     return span[0], span[1]
 171 }
 172 
 173 func run(w io.Writer, r io.Reader, patterns []pattern, live bool) error {
 174     sc := bufio.NewScanner(r)
 175     sc.Buffer(nil, 8*1024*1024*1024)
 176     bw := bufio.NewWriter(w)
 177     defer bw.Flush()
 178 
 179     src := make([]byte, 8*1024)
 180     dst := make([]byte, 8*1024)
 181 
 182     for i := 0; sc.Scan(); i++ {
 183         s := sc.Bytes()
 184 
 185         if i == 0 && bytes.HasPrefix(s, []byte{0xef, 0xbb, 0xbf}) {
 186             s = s[3:]
 187         }
 188 
 189         if len(patterns) > 0 {
 190             src = append(src[:0], s...)
 191             for _, p := range patterns {
 192                 dst = erase(dst[:0], src, p)
 193                 src = append(src[:0], dst...)
 194             }
 195             bw.Write(dst)
 196         } else {
 197             bw.Write(s)
 198         }
 199 
 200         if bw.WriteByte('\n') != nil {
 201             return io.EOF
 202         }
 203 
 204         if !live {
 205             continue
 206         }
 207 
 208         if bw.Flush() != nil {
 209             return io.EOF
 210         }
 211     }
 212 
 213     return sc.Err()
 214 }
 215 
 216 func erase(dst []byte, src []byte, with pattern) []byte {
 217     n := 0
 218     last := countChunks(src) - 1
 219     if last < 0 {
 220         last = 0
 221     }
 222 
 223     for len(src) > 0 {
 224         i, j := indexEscapeSequence(src)
 225         if i < 0 {
 226             dst = handleChunk(dst, src, with, n, last)
 227             break
 228         }
 229         if j < 0 {
 230             j = len(src)
 231         }
 232 
 233         dst = handleChunk(dst, src[:i], with, n, last)
 234         dst = append(dst, src[i:j]...)
 235         if i > 0 {
 236             n++
 237         }
 238         src = src[j:]
 239     }
 240 
 241     return dst
 242 }
 243 
 244 func countChunks(s []byte) int {
 245     chunks := 0
 246 
 247     for len(s) > 0 {
 248         i, j := indexEscapeSequence(s)
 249         if i < 0 {
 250             break
 251         }
 252 
 253         if i > 0 {
 254             chunks++
 255         }
 256 
 257         if j < 0 {
 258             break
 259         }
 260         s = s[j:]
 261     }
 262 
 263     if len(s) > 0 {
 264         chunks++
 265     }
 266     return chunks
 267 }
 268 
 269 func handleChunk(dst []byte, src []byte, with pattern, n int, last int) []byte {
 270     for len(src) > 0 {
 271         start, end := with.findIndex(src, n, last)
 272         if start < 0 {
 273             return append(dst, src...)
 274         }
 275 
 276         dst = append(dst, src[:start]...)
 277         // avoid infinite loops caused by empty regex matches
 278         if start == end && end < len(src) {
 279             dst = append(dst, src[end])
 280             end++
 281         }
 282         src = src[end:]
 283     }
 284 
 285     return dst
 286 }
 287 
 288 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 289 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 290 // indices which can be independently negative when either the start/end of
 291 // a sequence isn't found; given their fairly-common use, even the hyperlink
 292 // ESC]8 sequences are supported
 293 func indexEscapeSequence(s []byte) (int, int) {
 294     var prev byte
 295 
 296     for i, b := range s {
 297         if prev == '\x1b' && b == '[' {
 298             j := indexLetter(s[i+1:])
 299             if j < 0 {
 300                 return i, -1
 301             }
 302             return i - 1, i + 1 + j + 1
 303         }
 304 
 305         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 306             j := indexPair(s[i+1:], '\x1b', '\\')
 307             if j < 0 {
 308                 return i, -1
 309             }
 310             return i - 1, i + 1 + j + 2
 311         }
 312 
 313         prev = b
 314     }
 315 
 316     return -1, -1
 317 }
 318 
 319 func indexLetter(s []byte) int {
 320     for i, b := range s {
 321         upper := b &^ 32
 322         if 'A' <= upper && upper <= 'Z' {
 323             return i
 324         }
 325     }
 326 
 327     return -1
 328 }
 329 
 330 func indexPair(s []byte, x byte, y byte) int {
 331     var prev byte
 332 
 333     for i, b := range s {
 334         if prev == x && b == y {
 335             return i
 336         }
 337         prev = b
 338     }
 339 
 340     return -1
 341 }