File: erase.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath erase.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "bytes" 37 "io" 38 "os" 39 "regexp" 40 ) 41 42 const info = ` 43 erase [options...] [regexes...] 44 45 46 Ignore/remove all occurrences of all regex matches along lines read from the 47 standard input. The regular-expression mode used is "re2", which is a superset 48 of the commonly-used "extended-mode". 49 50 All ANSI-style sequences are removed before trying to match-remove things, to 51 avoid messing those up. Each regex erases all its occurrences on the current 52 line in the order given among the arguments, so regex-order matters. 53 54 The options are, available both in single and double-dash versions 55 56 -h, -help show this help message 57 -i, -ins match regexes case-insensitively 58 ` 59 60 func main() { 61 args := os.Args[1:] 62 buffered := false 63 insensitive := false 64 65 for len(args) > 0 { 66 switch args[0] { 67 case `-b`, `--b`, `-buffered`, `--buffered`: 68 buffered = true 69 args = args[1:] 70 71 case `-h`, `--h`, `-help`, `--help`: 72 os.Stdout.WriteString(info[1:]) 73 return 74 75 case `-i`, `--i`, `-ins`, `--ins`: 76 insensitive = true 77 args = args[1:] 78 } 79 80 break 81 } 82 83 if len(args) > 0 && args[0] == `--` { 84 args = args[1:] 85 } 86 87 exprs := make([]*regexp.Regexp, 0, len(args)) 88 89 for _, s := range args { 90 var err error 91 var exp *regexp.Regexp 92 93 if insensitive { 94 exp, err = regexp.Compile(`(?i)` + s) 95 } else { 96 exp, err = regexp.Compile(s) 97 } 98 99 if err != nil { 100 os.Stderr.WriteString(err.Error()) 101 os.Stderr.WriteString("\n") 102 continue 103 } 104 105 exprs = append(exprs, exp) 106 } 107 108 // quit right away when given invalid regexes 109 if len(exprs) < len(args) { 110 os.Exit(1) 111 } 112 113 liveLines := !buffered 114 if !buffered { 115 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 116 liveLines = false 117 } 118 } 119 120 err := run(os.Stdout, os.Stdin, exprs, liveLines) 121 if err != nil && err != io.EOF { 122 os.Stderr.WriteString(err.Error()) 123 os.Stderr.WriteString("\n") 124 os.Exit(1) 125 } 126 } 127 128 func run(w io.Writer, r io.Reader, exprs []*regexp.Regexp, live bool) error { 129 var buf []byte 130 sc := bufio.NewScanner(r) 131 sc.Buffer(nil, 8*1024*1024*1024) 132 bw := bufio.NewWriter(w) 133 defer bw.Flush() 134 135 src := make([]byte, 8*1024) 136 dst := make([]byte, 8*1024) 137 138 for i := 0; sc.Scan(); i++ { 139 line := sc.Bytes() 140 if i == 0 && bytes.HasPrefix(line, []byte{0xef, 0xbb, 0xbf}) { 141 line = line[3:] 142 } 143 144 s := line 145 if bytes.IndexByte(s, '\x1b') >= 0 { 146 buf = plain(buf[:0], s) 147 s = buf 148 } 149 150 if len(exprs) > 0 { 151 src = append(src[:0], s...) 152 for _, exp := range exprs { 153 dst = erase(dst[:0], src, exp) 154 src = append(src[:0], dst...) 155 } 156 bw.Write(dst) 157 } else { 158 bw.Write(s) 159 } 160 161 if bw.WriteByte('\n') != nil { 162 return io.EOF 163 } 164 165 if !live { 166 continue 167 } 168 169 if bw.Flush() != nil { 170 return io.EOF 171 } 172 } 173 174 return sc.Err() 175 } 176 177 func erase(dst []byte, src []byte, with *regexp.Regexp) []byte { 178 for len(src) > 0 { 179 span := with.FindIndex(src) 180 // also ignore empty regex matches to avoid infinite outer loops, 181 // as skipping empty slices isn't advancing at all, leaving the 182 // string stuck to being empty-matched forever by the same regex 183 if len(span) != 2 || span[0] == span[1] || span[0] < 0 { 184 return append(dst, src...) 185 } 186 187 start, end := span[0], span[1] 188 dst = append(dst, src[:start]...) 189 // avoid infinite loops caused by empty regex matches 190 if start == end && end < len(src) { 191 dst = append(dst, src[end]) 192 end++ 193 } 194 src = src[end:] 195 } 196 197 return dst 198 } 199 200 func plain(dst []byte, src []byte) []byte { 201 for len(src) > 0 { 202 i, j := indexEscapeSequence(src) 203 if i < 0 { 204 dst = append(dst, src...) 205 break 206 } 207 if j < 0 { 208 j = len(src) 209 } 210 211 if i > 0 { 212 dst = append(dst, src[:i]...) 213 } 214 215 src = src[j:] 216 } 217 218 return dst 219 } 220 221 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is 222 // the multi-byte sequences starting with ESC[; the result is a pair of slice 223 // indices which can be independently negative when either the start/end of 224 // a sequence isn't found; given their fairly-common use, even the hyperlink 225 // ESC]8 sequences are supported 226 func indexEscapeSequence(s []byte) (int, int) { 227 var prev byte 228 229 for i, b := range s { 230 if prev == '\x1b' && b == '[' { 231 j := indexLetter(s[i+1:]) 232 if j < 0 { 233 return i, -1 234 } 235 return i - 1, i + 1 + j + 1 236 } 237 238 if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' { 239 j := indexPair(s[i+1:], '\x1b', '\\') 240 if j < 0 { 241 return i, -1 242 } 243 return i - 1, i + 1 + j + 2 244 } 245 246 prev = b 247 } 248 249 return -1, -1 250 } 251 252 func indexLetter(s []byte) int { 253 for i, b := range s { 254 upper := b &^ 32 255 if 'A' <= upper && upper <= 'Z' { 256 return i 257 } 258 } 259 260 return -1 261 } 262 263 func indexPair(s []byte, x byte, y byte) int { 264 var prev byte 265 266 for i, b := range s { 267 if prev == x && b == y && i > 0 { 268 return i 269 } 270 prev = b 271 } 272 273 return -1 274 }