File: erase.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath erase.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "io" 37 "os" 38 "regexp" 39 ) 40 41 const info = ` 42 erase [options...] [regexes...] 43 44 45 Ignore/remove all occurrences of all regex matches along lines read from the 46 standard input. The regular-expression mode used is "re2", which is a superset 47 of the commonly-used "extended-mode". 48 49 Regexes always avoid matching any ANSI-style sequences, to avoid messing those 50 up. Each regex erases all its occurrences on the current line in the order 51 given among the arguments, so regex-order matters. 52 53 The options are, available both in single and double-dash versions 54 55 -h show this help message 56 -help show this help message 57 58 -i match regexes case-insensitively 59 -ins match regexes case-insensitively 60 ` 61 62 func main() { 63 args := os.Args[1:] 64 buffered := false 65 insensitive := false 66 67 out: 68 for len(args) > 0 { 69 switch args[0] { 70 case `-b`, `--b`, `-buffered`, `--buffered`: 71 buffered = true 72 args = args[1:] 73 74 case `-h`, `--h`, `-help`, `--help`: 75 os.Stdout.WriteString(info[1:]) 76 return 77 78 case `-i`, `--i`, `-ins`, `--ins`: 79 insensitive = true 80 args = args[1:] 81 82 default: 83 break out 84 } 85 } 86 87 if len(args) > 0 && args[0] == `--` { 88 args = args[1:] 89 } 90 91 exprs := make([]*regexp.Regexp, 0, len(args)) 92 93 for _, s := range args { 94 var err error 95 var exp *regexp.Regexp 96 97 if insensitive { 98 exp, err = regexp.Compile(`(?i)` + s) 99 } else { 100 exp, err = regexp.Compile(s) 101 } 102 103 if err != nil { 104 os.Stderr.WriteString(err.Error()) 105 os.Stderr.WriteString("\n") 106 continue 107 } 108 109 exprs = append(exprs, exp) 110 } 111 112 // quit right away when given invalid regexes 113 if len(exprs) < len(args) { 114 os.Exit(1) 115 } 116 117 liveLines := !buffered 118 if !buffered { 119 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 120 liveLines = false 121 } 122 } 123 124 if err := run(os.Stdout, os.Stdin, exprs, liveLines); err != nil { 125 os.Stderr.WriteString(err.Error()) 126 os.Stderr.WriteString("\n") 127 os.Exit(1) 128 } 129 } 130 131 func run(w io.Writer, r io.Reader, exprs []*regexp.Regexp, live bool) error { 132 sc := bufio.NewScanner(r) 133 sc.Buffer(nil, 8*1024*1024*1024) 134 bw := bufio.NewWriter(w) 135 defer bw.Flush() 136 137 var src []byte 138 var dst []byte 139 140 for i := 0; sc.Scan(); i++ { 141 s := sc.Bytes() 142 if i == 0 && len(s) > 2 && s[0] == 0xef && s[1] == 0xbb && s[2] == 0xbf { 143 s = s[3:] 144 } 145 146 src = append(src[:0], s...) 147 for _, e := range exprs { 148 dst = erase(dst[:0], src, e) 149 src = append(src[:0], dst...) 150 } 151 152 bw.Write(dst) 153 154 if err := bw.WriteByte('\n'); err != nil { 155 return nil 156 } 157 158 if !live { 159 continue 160 } 161 162 if err := bw.Flush(); err != nil { 163 return nil 164 } 165 } 166 167 return sc.Err() 168 } 169 170 func erase(dst []byte, src []byte, with *regexp.Regexp) []byte { 171 for len(src) > 0 { 172 i, j := indexEscapeSequence(src) 173 if i < 0 { 174 dst = handleLineChunk(dst, src, with) 175 break 176 } 177 if j < 0 { 178 j = len(src) 179 } 180 181 dst = handleLineChunk(dst, src[:i], with) 182 dst = append(dst, src[i:j]...) 183 src = src[j:] 184 } 185 186 return dst 187 } 188 189 func handleLineChunk(dst []byte, src []byte, with *regexp.Regexp) []byte { 190 for len(src) > 0 { 191 span := with.FindIndex(src) 192 if span == nil { 193 return append(dst, src...) 194 } 195 196 start := span[0] 197 end := span[1] 198 199 dst = append(dst, src[:start]...) 200 // avoid infinite loops caused by empty regex matches 201 if start == end && end < len(src) { 202 dst = append(dst, src[end]) 203 end++ 204 } 205 src = src[end:] 206 } 207 208 return dst 209 } 210 211 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is 212 // the multi-byte sequences starting with ESC[; the result is a pair of slice 213 // indices which can be independently negative when either the start/end of 214 // a sequence isn't found; given their fairly-common use, even the hyperlink 215 // ESC]8 sequences are supported 216 func indexEscapeSequence(s []byte) (int, int) { 217 var prev byte 218 219 for i, b := range s { 220 if prev == '\x1b' && b == '[' { 221 j := indexLetter(s[i+1:]) 222 if j < 0 { 223 return i, -1 224 } 225 return i - 1, i + 1 + j + 1 226 } 227 228 if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' { 229 j := indexPair(s[i+1:], '\x1b', '\\') 230 if j < 0 { 231 return i, -1 232 } 233 return i - 1, i + 1 + j + 2 234 } 235 236 prev = b 237 } 238 239 return -1, -1 240 } 241 242 func indexLetter(s []byte) int { 243 for i, b := range s { 244 upper := b &^ 32 245 if 'A' <= upper && upper <= 'Z' { 246 return i 247 } 248 } 249 250 return -1 251 } 252 253 func indexPair(s []byte, x byte, y byte) int { 254 var prev byte 255 256 for i, b := range s { 257 if prev == x && b == y { 258 return i 259 } 260 prev = b 261 } 262 263 return -1 264 }