File: erase.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath erase.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "bytes" 37 "io" 38 "os" 39 "regexp" 40 ) 41 42 const info = ` 43 erase [options...] [regexes...] 44 45 46 Ignore/remove all occurrences of all regex matches along lines read from the 47 standard input. The regular-expression mode used is "re2", which is a superset 48 of the commonly-used "extended-mode". 49 50 All ANSI-style sequences are removed before trying to match-remove things, to 51 avoid messing those up. Each regex erases all its occurrences on the current 52 line in the order given among the arguments, so regex-order matters. 53 54 The options are, available both in single and double-dash versions 55 56 -h, -help show this help message 57 -i, -ins match regexes case-insensitively 58 ` 59 60 func main() { 61 args := os.Args[1:] 62 buffered := false 63 insensitive := false 64 65 for len(args) > 0 { 66 switch args[0] { 67 case `-b`, `--b`, `-buffered`, `--buffered`: 68 buffered = true 69 args = args[1:] 70 continue 71 72 case `-h`, `--h`, `-help`, `--help`: 73 os.Stdout.WriteString(info[1:]) 74 return 75 76 case `-i`, `--i`, `-ins`, `--ins`: 77 insensitive = true 78 args = args[1:] 79 continue 80 } 81 82 break 83 } 84 85 if len(args) > 0 && args[0] == `--` { 86 args = args[1:] 87 } 88 89 exprs := make([]*regexp.Regexp, 0, len(args)) 90 91 for _, s := range args { 92 var err error 93 var exp *regexp.Regexp 94 95 if insensitive { 96 exp, err = regexp.Compile(`(?i)` + s) 97 } else { 98 exp, err = regexp.Compile(s) 99 } 100 101 if err != nil { 102 os.Stderr.WriteString(err.Error()) 103 os.Stderr.WriteString("\n") 104 continue 105 } 106 107 exprs = append(exprs, exp) 108 } 109 110 // quit right away when given invalid regexes 111 if len(exprs) < len(args) { 112 os.Exit(1) 113 } 114 115 liveLines := !buffered 116 if !buffered { 117 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 118 liveLines = false 119 } 120 } 121 122 err := run(os.Stdout, os.Stdin, exprs, liveLines) 123 if err != nil && err != io.EOF { 124 os.Stderr.WriteString(err.Error()) 125 os.Stderr.WriteString("\n") 126 os.Exit(1) 127 } 128 } 129 130 func run(w io.Writer, r io.Reader, exprs []*regexp.Regexp, live bool) error { 131 var buf []byte 132 sc := bufio.NewScanner(r) 133 sc.Buffer(nil, 8*1024*1024*1024) 134 bw := bufio.NewWriter(w) 135 defer bw.Flush() 136 137 src := make([]byte, 8*1024) 138 dst := make([]byte, 8*1024) 139 140 for i := 0; sc.Scan(); i++ { 141 line := sc.Bytes() 142 if i == 0 && bytes.HasPrefix(line, []byte{0xef, 0xbb, 0xbf}) { 143 line = line[3:] 144 } 145 146 s := line 147 if bytes.IndexByte(s, '\x1b') >= 0 { 148 buf = plain(buf[:0], s) 149 s = buf 150 } 151 152 if len(exprs) > 0 { 153 src = append(src[:0], s...) 154 for _, exp := range exprs { 155 dst = erase(dst[:0], src, exp) 156 src = append(src[:0], dst...) 157 } 158 bw.Write(dst) 159 } else { 160 bw.Write(s) 161 } 162 163 if bw.WriteByte('\n') != nil { 164 return io.EOF 165 } 166 167 if !live { 168 continue 169 } 170 171 if bw.Flush() != nil { 172 return io.EOF 173 } 174 } 175 176 return sc.Err() 177 } 178 179 func erase(dst []byte, src []byte, with *regexp.Regexp) []byte { 180 for len(src) > 0 { 181 span := with.FindIndex(src) 182 // also ignore empty regex matches to avoid infinite outer loops, 183 // as skipping empty slices isn't advancing at all, leaving the 184 // string stuck to being empty-matched forever by the same regex 185 if len(span) != 2 || span[0] == span[1] || span[0] < 0 { 186 return append(dst, src...) 187 } 188 189 start, end := span[0], span[1] 190 dst = append(dst, src[:start]...) 191 // avoid infinite loops caused by empty regex matches 192 if start == end && end < len(src) { 193 dst = append(dst, src[end]) 194 end++ 195 } 196 src = src[end:] 197 } 198 199 return dst 200 } 201 202 func plain(dst []byte, src []byte) []byte { 203 for len(src) > 0 { 204 i, j := indexEscapeSequence(src) 205 if i < 0 { 206 dst = append(dst, src...) 207 break 208 } 209 if j < 0 { 210 j = len(src) 211 } 212 213 if i > 0 { 214 dst = append(dst, src[:i]...) 215 } 216 217 src = src[j:] 218 } 219 220 return dst 221 } 222 223 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is 224 // the multi-byte sequences starting with ESC[; the result is a pair of slice 225 // indices which can be independently negative when either the start/end of 226 // a sequence isn't found; given their fairly-common use, even the hyperlink 227 // ESC]8 sequences are supported 228 func indexEscapeSequence(s []byte) (int, int) { 229 var prev byte 230 231 for i, b := range s { 232 if prev == '\x1b' && b == '[' { 233 j := indexLetter(s[i+1:]) 234 if j < 0 { 235 return i, -1 236 } 237 return i - 1, i + 1 + j + 1 238 } 239 240 if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' { 241 j := indexPair(s[i+1:], '\x1b', '\\') 242 if j < 0 { 243 return i, -1 244 } 245 return i - 1, i + 1 + j + 2 246 } 247 248 prev = b 249 } 250 251 return -1, -1 252 } 253 254 func indexLetter(s []byte) int { 255 for i, b := range s { 256 upper := b &^ 32 257 if 'A' <= upper && upper <= 'Z' { 258 return i 259 } 260 } 261 262 return -1 263 } 264 265 func indexPair(s []byte, x byte, y byte) int { 266 var prev byte 267 268 for i, b := range s { 269 if prev == x && b == y && i > 0 { 270 return i 271 } 272 prev = b 273 } 274 275 return -1 276 }