File: erase.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath erase.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "bytes" 37 "io" 38 "os" 39 "regexp" 40 "strings" 41 ) 42 43 const info = ` 44 erase [options...] [regexes...] 45 46 47 Ignore/remove all occurrences of all regex matches along lines read from the 48 standard input. The regular-expression mode used is "re2", which is a superset 49 of the commonly-used "extended-mode". 50 51 Regexes always avoid matching any ANSI-style sequences, to avoid messing those 52 up. Each regex erases all its occurrences on the current line in the order 53 given among the arguments, so regex-order matters. 54 55 The options are, available both in single and double-dash versions 56 57 -h show this help message 58 -help show this help message 59 60 -i match regexes case-insensitively 61 -ins match regexes case-insensitively 62 ` 63 64 func main() { 65 args := os.Args[1:] 66 buffered := false 67 insensitive := false 68 69 for len(args) > 0 { 70 switch args[0] { 71 case `-b`, `--b`, `-buffered`, `--buffered`: 72 buffered = true 73 args = args[1:] 74 75 case `-h`, `--h`, `-help`, `--help`: 76 os.Stdout.WriteString(info[1:]) 77 return 78 79 case `-i`, `--i`, `-ins`, `--ins`: 80 insensitive = true 81 args = args[1:] 82 } 83 84 break 85 } 86 87 if len(args) > 0 && args[0] == `--` { 88 args = args[1:] 89 } 90 91 patterns := make([]pattern, 0, len(args)) 92 93 for _, s := range args { 94 var err error 95 var pat pattern 96 97 if insensitive { 98 pat, err = compile(`(?i)` + s) 99 } else { 100 pat, err = compile(s) 101 } 102 103 if err != nil { 104 os.Stderr.WriteString(err.Error()) 105 os.Stderr.WriteString("\n") 106 continue 107 } 108 109 patterns = append(patterns, pat) 110 } 111 112 // quit right away when given invalid regexes 113 if len(patterns) < len(args) { 114 os.Exit(1) 115 } 116 117 liveLines := !buffered 118 if !buffered { 119 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 120 liveLines = false 121 } 122 } 123 124 if err := run(os.Stdout, os.Stdin, patterns, liveLines); err != nil && err != io.EOF { 125 os.Stderr.WriteString(err.Error()) 126 os.Stderr.WriteString("\n") 127 os.Exit(1) 128 } 129 } 130 131 // pattern is a regular-expression pattern which distinguishes between the 132 // start/end of a line and those of the chunks it can be used to match 133 type pattern struct { 134 // expr is the regular-expression 135 expr *regexp.Regexp 136 137 // begin is whether the regexp refers to the start of a line 138 begin bool 139 140 // end is whether the regexp refers to the end of a line 141 end bool 142 } 143 144 func compile(src string) (pattern, error) { 145 expr, err := regexp.Compile(src) 146 147 var pat pattern 148 pat.expr = expr 149 pat.begin = strings.HasPrefix(src, `^`) || strings.HasPrefix(src, `(?i)^`) 150 pat.end = strings.HasSuffix(src, `$`) && !strings.HasSuffix(src, `\$`) 151 return pat, err 152 } 153 154 func (p pattern) findIndex(s []byte, i int, last int) (start int, stop int) { 155 if i > 0 && p.begin { 156 return -1, -1 157 } 158 if i != last && p.end { 159 return -1, -1 160 } 161 162 span := p.expr.FindIndex(s) 163 // also ignore empty regex matches to avoid infinite outer loops, 164 // as skipping empty slices isn't advancing at all, leaving the 165 // string stuck to being empty-matched forever by the same regex 166 if len(span) != 2 || span[0] == span[1] { 167 return -1, -1 168 } 169 170 return span[0], span[1] 171 } 172 173 func run(w io.Writer, r io.Reader, patterns []pattern, live bool) error { 174 sc := bufio.NewScanner(r) 175 sc.Buffer(nil, 8*1024*1024*1024) 176 bw := bufio.NewWriter(w) 177 defer bw.Flush() 178 179 src := make([]byte, 8*1024) 180 dst := make([]byte, 8*1024) 181 182 for i := 0; sc.Scan(); i++ { 183 s := sc.Bytes() 184 185 if i == 0 && bytes.HasPrefix(s, []byte{0xef, 0xbb, 0xbf}) { 186 s = s[3:] 187 } 188 189 if len(patterns) > 0 { 190 src = append(src[:0], s...) 191 for _, p := range patterns { 192 dst = erase(dst[:0], src, p) 193 src = append(src[:0], dst...) 194 } 195 bw.Write(dst) 196 } else { 197 bw.Write(s) 198 } 199 200 if bw.WriteByte('\n') != nil { 201 return io.EOF 202 } 203 204 if !live { 205 continue 206 } 207 208 if bw.Flush() != nil { 209 return io.EOF 210 } 211 } 212 213 return sc.Err() 214 } 215 216 func erase(dst []byte, src []byte, with pattern) []byte { 217 n := 0 218 last := countChunks(src) - 1 219 if last < 0 { 220 last = 0 221 } 222 223 for len(src) > 0 { 224 i, j := indexEscapeSequence(src) 225 if i < 0 { 226 dst = handleChunk(dst, src, with, n, last) 227 break 228 } 229 if j < 0 { 230 j = len(src) 231 } 232 233 dst = handleChunk(dst, src[:i], with, n, last) 234 dst = append(dst, src[i:j]...) 235 if i > 0 { 236 n++ 237 } 238 src = src[j:] 239 } 240 241 return dst 242 } 243 244 func countChunks(s []byte) int { 245 chunks := 0 246 247 for len(s) > 0 { 248 i, j := indexEscapeSequence(s) 249 if i < 0 { 250 break 251 } 252 253 if i > 0 { 254 chunks++ 255 } 256 257 if j < 0 { 258 break 259 } 260 s = s[j:] 261 } 262 263 if len(s) > 0 { 264 chunks++ 265 } 266 return chunks 267 } 268 269 func handleChunk(dst []byte, src []byte, with pattern, n int, last int) []byte { 270 for len(src) > 0 { 271 start, end := with.findIndex(src, n, last) 272 if start < 0 { 273 return append(dst, src...) 274 } 275 276 dst = append(dst, src[:start]...) 277 // avoid infinite loops caused by empty regex matches 278 if start == end && end < len(src) { 279 dst = append(dst, src[end]) 280 end++ 281 } 282 src = src[end:] 283 } 284 285 return dst 286 } 287 288 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is 289 // the multi-byte sequences starting with ESC[; the result is a pair of slice 290 // indices which can be independently negative when either the start/end of 291 // a sequence isn't found; given their fairly-common use, even the hyperlink 292 // ESC]8 sequences are supported 293 func indexEscapeSequence(s []byte) (int, int) { 294 var prev byte 295 296 for i, b := range s { 297 if prev == '\x1b' && b == '[' { 298 j := indexLetter(s[i+1:]) 299 if j < 0 { 300 return i, -1 301 } 302 return i - 1, i + 1 + j + 1 303 } 304 305 if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' { 306 j := indexPair(s[i+1:], '\x1b', '\\') 307 if j < 0 { 308 return i, -1 309 } 310 return i - 1, i + 1 + j + 2 311 } 312 313 prev = b 314 } 315 316 return -1, -1 317 } 318 319 func indexLetter(s []byte) int { 320 for i, b := range s { 321 upper := b &^ 32 322 if 'A' <= upper && upper <= 'Z' { 323 return i 324 } 325 } 326 327 return -1 328 } 329 330 func indexPair(s []byte, x byte, y byte) int { 331 var prev byte 332 333 for i, b := range s { 334 if prev == x && b == y { 335 return i 336 } 337 prev = b 338 } 339 340 return -1 341 }