File: erase.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath erase.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "io"
  37     "os"
  38     "regexp"
  39 )
  40 
  41 const info = `
  42 erase [options...] [regexes...]
  43 
  44 
  45 Ignore/remove all occurrences of all regex matches along lines read from the
  46 standard input. The regular-expression mode used is "re2", which is a superset
  47 of the commonly-used "extended-mode".
  48 
  49 Regexes always avoid matching any ANSI-style sequences, to avoid messing those
  50 up. Each regex erases all its occurrences on the current line in the order
  51 given among the arguments, so regex-order matters.
  52 
  53 The options are, available both in single and double-dash versions
  54 
  55     -h      show this help message
  56     -help   show this help message
  57 
  58     -i      match regexes case-insensitively
  59     -ins    match regexes case-insensitively
  60 `
  61 
  62 func main() {
  63     args := os.Args[1:]
  64     insensitive := false
  65 
  66     if len(args) > 0 {
  67         switch args[0] {
  68         case `-h`, `--h`, `-help`, `--help`:
  69             os.Stdout.WriteString(info[1:])
  70             return
  71 
  72         case `-i`, `--i`, `-ins`, `--ins`:
  73             insensitive = true
  74             args = args[1:]
  75 
  76         case `--`:
  77             args = args[1:]
  78         }
  79     }
  80 
  81     exprs := make([]*regexp.Regexp, 0, len(args))
  82 
  83     for _, s := range args {
  84         var err error
  85         var exp *regexp.Regexp
  86 
  87         if insensitive {
  88             exp, err = regexp.Compile(`(?i)` + s)
  89         } else {
  90             exp, err = regexp.Compile(s)
  91         }
  92 
  93         if err != nil {
  94             os.Stderr.WriteString(err.Error())
  95             os.Stderr.WriteString("\n")
  96             continue
  97         }
  98 
  99         exprs = append(exprs, exp)
 100     }
 101 
 102     // quit right away when given invalid regexes
 103     if len(exprs) < len(args) {
 104         os.Exit(1)
 105     }
 106 
 107     liveLines := true
 108     if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
 109         liveLines = false
 110     }
 111 
 112     if err := run(os.Stdout, os.Stdin, exprs, liveLines); err != nil {
 113         os.Stderr.WriteString(err.Error())
 114         os.Stderr.WriteString("\n")
 115         os.Exit(1)
 116     }
 117 }
 118 
 119 func run(w io.Writer, r io.Reader, exprs []*regexp.Regexp, live bool) error {
 120     sc := bufio.NewScanner(r)
 121     sc.Buffer(nil, 8*1024*1024*1024)
 122     bw := bufio.NewWriter(w)
 123     defer bw.Flush()
 124 
 125     var srcbuf []byte
 126     var dstbuf []byte
 127     src := srcbuf[:0]
 128     dst := dstbuf[:0]
 129 
 130     for sc.Scan() {
 131         src = append(src[:0], sc.Bytes()...)
 132         for _, e := range exprs {
 133             dst = erase(dst[:0], src, e)
 134             src = append(src[:0], dst...)
 135         }
 136 
 137         bw.Write(dst)
 138 
 139         if err := bw.WriteByte('\n'); err != nil {
 140             return nil
 141         }
 142 
 143         if !live {
 144             continue
 145         }
 146 
 147         if err := bw.Flush(); err != nil {
 148             return nil
 149         }
 150     }
 151 
 152     return sc.Err()
 153 }
 154 
 155 func erase(dst []byte, src []byte, with *regexp.Regexp) []byte {
 156     for len(src) > 0 {
 157         i, j := indexEscapeSequence(src)
 158         if i < 0 {
 159             dst = handleLineChunk(dst, src, with)
 160             break
 161         }
 162         if j < 0 {
 163             j = len(src)
 164         }
 165 
 166         dst = handleLineChunk(dst, src[:i], with)
 167         dst = append(dst, src[i:j]...)
 168         src = src[j:]
 169     }
 170 
 171     return dst
 172 }
 173 
 174 func handleLineChunk(dst []byte, src []byte, with *regexp.Regexp) []byte {
 175     for len(src) > 0 {
 176         span := with.FindIndex(src)
 177         if span == nil {
 178             return append(dst, src...)
 179         }
 180 
 181         start := span[0]
 182         end := span[1]
 183 
 184         dst = append(dst, src[:start]...)
 185         // avoid infinite loops caused by empty regex matches
 186         if start == end && end < len(src) {
 187             dst = append(dst, src[end])
 188             end++
 189         }
 190         src = src[end:]
 191     }
 192 
 193     return dst
 194 }
 195 
 196 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 197 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 198 // indices which can be independently negative when either the start/end of
 199 // a sequence isn't found; given their fairly-common use, even the hyperlink
 200 // ESC]8 sequences are supported
 201 func indexEscapeSequence(s []byte) (int, int) {
 202     var prev byte
 203 
 204     for i, b := range s {
 205         if prev == '\x1b' && b == '[' {
 206             j := indexLetter(s[i+1:])
 207             if j < 0 {
 208                 return i, -1
 209             }
 210             return i - 1, i + 1 + j + 1
 211         }
 212 
 213         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 214             j := indexPair(s[i+1:], '\x1b', '\\')
 215             if j < 0 {
 216                 return i, -1
 217             }
 218             return i - 1, i + 1 + j + 2
 219         }
 220 
 221         prev = b
 222     }
 223 
 224     return -1, -1
 225 }
 226 
 227 func indexLetter(s []byte) int {
 228     for i, b := range s {
 229         upper := b &^ 32
 230         if 'A' <= upper && upper <= 'Z' {
 231             return i
 232         }
 233     }
 234 
 235     return -1
 236 }
 237 
 238 func indexPair(s []byte, x byte, y byte) int {
 239     var prev byte
 240 
 241     for i, b := range s {
 242         if prev == x && b == y {
 243             return i
 244         }
 245         prev = b
 246     }
 247 
 248     return -1
 249 }