File: erase.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath erase.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "io" 37 "os" 38 "regexp" 39 ) 40 41 const info = ` 42 erase [options...] [regexes...] 43 44 45 Ignore/remove all occurrences of all regex matches along lines read from the 46 standard input. The regular-expression mode used is "re2", which is a superset 47 of the commonly-used "extended-mode". 48 49 Regexes always avoid matching any ANSI-style sequences, to avoid messing those 50 up. Each regex erases all its occurrences on the current line in the order 51 given among the arguments, so regex-order matters. 52 53 The options are, available both in single and double-dash versions 54 55 -h show this help message 56 -help show this help message 57 58 -i match regexes case-insensitively 59 -ins match regexes case-insensitively 60 ` 61 62 func main() { 63 args := os.Args[1:] 64 insensitive := false 65 66 if len(args) > 0 { 67 switch args[0] { 68 case `-h`, `--h`, `-help`, `--help`: 69 os.Stdout.WriteString(info[1:]) 70 return 71 72 case `-i`, `--i`, `-ins`, `--ins`: 73 insensitive = true 74 args = args[1:] 75 76 case `--`: 77 args = args[1:] 78 } 79 } 80 81 exprs := make([]*regexp.Regexp, 0, len(args)) 82 83 for _, s := range args { 84 var err error 85 var exp *regexp.Regexp 86 87 if insensitive { 88 exp, err = regexp.Compile(`(?i)` + s) 89 } else { 90 exp, err = regexp.Compile(s) 91 } 92 93 if err != nil { 94 os.Stderr.WriteString(err.Error()) 95 os.Stderr.WriteString("\n") 96 continue 97 } 98 99 exprs = append(exprs, exp) 100 } 101 102 // quit right away when given invalid regexes 103 if len(exprs) < len(args) { 104 os.Exit(1) 105 } 106 107 liveLines := true 108 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 109 liveLines = false 110 } 111 112 if err := run(os.Stdout, os.Stdin, exprs, liveLines); err != nil { 113 os.Stderr.WriteString(err.Error()) 114 os.Stderr.WriteString("\n") 115 os.Exit(1) 116 } 117 } 118 119 func run(w io.Writer, r io.Reader, exprs []*regexp.Regexp, live bool) error { 120 sc := bufio.NewScanner(r) 121 sc.Buffer(nil, 8*1024*1024*1024) 122 bw := bufio.NewWriter(w) 123 defer bw.Flush() 124 125 var srcbuf []byte 126 var dstbuf []byte 127 src := srcbuf[:0] 128 dst := dstbuf[:0] 129 130 for sc.Scan() { 131 src = append(src[:0], sc.Bytes()...) 132 for _, e := range exprs { 133 dst = erase(dst[:0], src, e) 134 src = append(src[:0], dst...) 135 } 136 137 bw.Write(dst) 138 139 if err := bw.WriteByte('\n'); err != nil { 140 return nil 141 } 142 143 if !live { 144 continue 145 } 146 147 if err := bw.Flush(); err != nil { 148 return nil 149 } 150 } 151 152 return sc.Err() 153 } 154 155 func erase(dst []byte, src []byte, with *regexp.Regexp) []byte { 156 for len(src) > 0 { 157 i, j := indexEscapeSequence(src) 158 if i < 0 { 159 dst = handleLineChunk(dst, src, with) 160 break 161 } 162 if j < 0 { 163 j = len(src) 164 } 165 166 dst = handleLineChunk(dst, src[:i], with) 167 dst = append(dst, src[i:j]...) 168 src = src[j:] 169 } 170 171 return dst 172 } 173 174 func handleLineChunk(dst []byte, src []byte, with *regexp.Regexp) []byte { 175 for len(src) > 0 { 176 span := with.FindIndex(src) 177 if span == nil { 178 return append(dst, src...) 179 } 180 181 start := span[0] 182 end := span[1] 183 184 dst = append(dst, src[:start]...) 185 // avoid infinite loops caused by empty regex matches 186 if start == end && end < len(src) { 187 dst = append(dst, src[end]) 188 end++ 189 } 190 src = src[end:] 191 } 192 193 return dst 194 } 195 196 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is 197 // the multi-byte sequences starting with ESC[; the result is a pair of slice 198 // indices which can be independently negative when either the start/end of 199 // a sequence isn't found; given their fairly-common use, even the hyperlink 200 // ESC]8 sequences are supported 201 func indexEscapeSequence(s []byte) (int, int) { 202 var prev byte 203 204 for i, b := range s { 205 if prev == '\x1b' && b == '[' { 206 j := indexLetter(s[i+1:]) 207 if j < 0 { 208 return i, -1 209 } 210 return i - 1, i + 1 + j + 1 211 } 212 213 if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' { 214 j := indexPair(s[i+1:], '\x1b', '\\') 215 if j < 0 { 216 return i, -1 217 } 218 return i - 1, i + 1 + j + 2 219 } 220 221 prev = b 222 } 223 224 return -1, -1 225 } 226 227 func indexLetter(s []byte) int { 228 for i, b := range s { 229 upper := b &^ 32 230 if 'A' <= upper && upper <= 'Z' { 231 return i 232 } 233 } 234 235 return -1 236 } 237 238 func indexPair(s []byte, x byte, y byte) int { 239 var prev byte 240 241 for i, b := range s { 242 if prev == x && b == y { 243 return i 244 } 245 prev = b 246 } 247 248 return -1 249 }