File: erase.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath erase.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "io"
  37     "os"
  38     "regexp"
  39 )
  40 
  41 const info = `
  42 erase [options...] [regexes...]
  43 
  44 
  45 Ignore/remove all occurrences of all regex matches along lines read from the
  46 standard input. The regular-expression mode used is "re2", which is a superset
  47 of the commonly-used "extended-mode".
  48 
  49 Regexes always avoid matching any ANSI-style sequences, to avoid messing those
  50 up. Each regex erases all its occurrences on the current line in the order
  51 given among the arguments, so regex-order matters.
  52 
  53 The options are, available both in single and double-dash versions
  54 
  55     -h      show this help message
  56     -help   show this help message
  57 
  58     -i      match regexes case-insensitively
  59     -ins    match regexes case-insensitively
  60 `
  61 
  62 func main() {
  63     insensitive := false
  64     args := os.Args[1:]
  65 
  66     if len(args) > 0 {
  67         switch args[0] {
  68         case `-h`, `--h`, `-help`, `--help`:
  69             os.Stdout.WriteString(info[1:])
  70             return
  71 
  72         case `-i`, `--i`, `-ins`, `--ins`:
  73             insensitive = true
  74             args = args[1:]
  75 
  76         case `--`:
  77             args = args[1:]
  78         }
  79     }
  80 
  81     // if len(args) == 0 {
  82     //  args = []string{`[^\r]`}
  83     // }
  84 
  85     exprs := make([]*regexp.Regexp, 0, len(args))
  86 
  87     for _, s := range args {
  88         var err error
  89         var exp *regexp.Regexp
  90 
  91         if insensitive {
  92             exp, err = regexp.Compile(`(?i)` + s)
  93         } else {
  94             exp, err = regexp.Compile(s)
  95         }
  96 
  97         if err != nil {
  98             os.Stderr.WriteString(err.Error())
  99             os.Stderr.WriteString("\n")
 100             continue
 101         }
 102 
 103         exprs = append(exprs, exp)
 104     }
 105 
 106     // quit right away when given invalid regexes
 107     if len(exprs) < len(args) {
 108         os.Exit(1)
 109     }
 110 
 111     liveLines := true
 112     if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
 113         liveLines = false
 114     }
 115 
 116     if err := run(os.Stdout, os.Stdin, exprs, liveLines); err != nil {
 117         os.Stderr.WriteString(err.Error())
 118         os.Stderr.WriteString("\n")
 119         os.Exit(1)
 120     }
 121 }
 122 
 123 func run(w io.Writer, r io.Reader, exprs []*regexp.Regexp, live bool) error {
 124     sc := bufio.NewScanner(r)
 125     sc.Buffer(nil, 8*1024*1024*1024)
 126     bw := bufio.NewWriter(w)
 127     defer bw.Flush()
 128 
 129     var srcbuf []byte
 130     var dstbuf []byte
 131     src := srcbuf[:0]
 132     dst := dstbuf[:0]
 133 
 134     for sc.Scan() {
 135         src = append(src[:0], sc.Bytes()...)
 136         for _, e := range exprs {
 137             dst = erase(dst[:0], src, e)
 138             src = append(src[:0], dst...)
 139         }
 140 
 141         bw.Write(dst)
 142 
 143         if err := bw.WriteByte('\n'); err != nil {
 144             return nil
 145         }
 146 
 147         if !live {
 148             continue
 149         }
 150 
 151         if err := bw.Flush(); err != nil {
 152             return nil
 153         }
 154     }
 155 
 156     return sc.Err()
 157 }
 158 
 159 func erase(dst []byte, src []byte, with *regexp.Regexp) []byte {
 160     for len(src) > 0 {
 161         i, j := indexEscapeSequence(src)
 162         if i < 0 {
 163             dst = handleLineChunk(dst, src, with)
 164             break
 165         }
 166         if j < 0 {
 167             j = len(src)
 168         }
 169 
 170         dst = handleLineChunk(dst, src[:i], with)
 171         dst = append(dst, src[i:j]...)
 172         src = src[j:]
 173     }
 174 
 175     return dst
 176 }
 177 
 178 func handleLineChunk(dst []byte, src []byte, with *regexp.Regexp) []byte {
 179     for len(src) > 0 {
 180         span := with.FindIndex(src)
 181         if span == nil {
 182             return append(dst, src...)
 183         }
 184 
 185         start := span[0]
 186         end := span[1]
 187         dst = append(dst, src[:start]...)
 188         src = src[end:]
 189     }
 190 
 191     return dst
 192 }
 193 
 194 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 195 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 196 // indices which can be independently negative when either the start/end of
 197 // a sequence isn't found; given their fairly-common use, even the hyperlink
 198 // ESC]8 sequences are supported
 199 func indexEscapeSequence(s []byte) (int, int) {
 200     var prev byte
 201 
 202     for i, b := range s {
 203         if prev == '\x1b' && b == '[' {
 204             j := indexLetter(s[i+1:])
 205             if j < 0 {
 206                 return i, -1
 207             }
 208             return i - 1, i + 1 + j + 1
 209         }
 210 
 211         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 212             j := indexPair(s[i+1:], '\x1b', '\\')
 213             if j < 0 {
 214                 return i, -1
 215             }
 216             return i - 1, i + 1 + j + 2
 217         }
 218 
 219         prev = b
 220     }
 221 
 222     return -1, -1
 223 }
 224 
 225 func indexLetter(s []byte) int {
 226     for i, b := range s {
 227         upper := b &^ 32
 228         if 'A' <= upper && upper <= 'Z' {
 229             return i
 230         }
 231     }
 232 
 233     return -1
 234 }
 235 
 236 func indexPair(s []byte, x byte, y byte) int {
 237     var prev byte
 238 
 239     for i, b := range s {
 240         if prev == x && b == y {
 241             return i
 242         }
 243         prev = b
 244     }
 245 
 246     return -1
 247 }