File: erase.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath erase.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "io" 37 "os" 38 "regexp" 39 ) 40 41 const info = ` 42 erase [options...] [regexes...] 43 44 45 Ignore/remove all occurrences of all regex matches along lines read from the 46 standard input. The regular-expression mode used is "re2", which is a superset 47 of the commonly-used "extended-mode". 48 49 Regexes always avoid matching any ANSI-style sequences, to avoid messing those 50 up. Each regex erases all its occurrences on the current line in the order 51 given among the arguments, so regex-order matters. 52 53 The options are, available both in single and double-dash versions 54 55 -h show this help message 56 -help show this help message 57 58 -i match regexes case-insensitively 59 -ins match regexes case-insensitively 60 ` 61 62 func main() { 63 insensitive := false 64 args := os.Args[1:] 65 66 if len(args) > 0 { 67 switch args[0] { 68 case `-h`, `--h`, `-help`, `--help`: 69 os.Stdout.WriteString(info[1:]) 70 return 71 72 case `-i`, `--i`, `-ins`, `--ins`: 73 insensitive = true 74 args = args[1:] 75 76 case `--`: 77 args = args[1:] 78 } 79 } 80 81 // if len(args) == 0 { 82 // args = []string{`[^\r]`} 83 // } 84 85 exprs := make([]*regexp.Regexp, 0, len(args)) 86 87 for _, s := range args { 88 var err error 89 var exp *regexp.Regexp 90 91 if insensitive { 92 exp, err = regexp.Compile(`(?i)` + s) 93 } else { 94 exp, err = regexp.Compile(s) 95 } 96 97 if err != nil { 98 os.Stderr.WriteString(err.Error()) 99 os.Stderr.WriteString("\n") 100 continue 101 } 102 103 exprs = append(exprs, exp) 104 } 105 106 // quit right away when given invalid regexes 107 if len(exprs) < len(args) { 108 os.Exit(1) 109 } 110 111 liveLines := true 112 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 113 liveLines = false 114 } 115 116 if err := run(os.Stdout, os.Stdin, exprs, liveLines); err != nil { 117 os.Stderr.WriteString(err.Error()) 118 os.Stderr.WriteString("\n") 119 os.Exit(1) 120 } 121 } 122 123 func run(w io.Writer, r io.Reader, exprs []*regexp.Regexp, live bool) error { 124 sc := bufio.NewScanner(r) 125 sc.Buffer(nil, 8*1024*1024*1024) 126 bw := bufio.NewWriter(w) 127 defer bw.Flush() 128 129 var srcbuf []byte 130 var dstbuf []byte 131 src := srcbuf[:0] 132 dst := dstbuf[:0] 133 134 for sc.Scan() { 135 src = append(src[:0], sc.Bytes()...) 136 for _, e := range exprs { 137 dst = erase(dst[:0], src, e) 138 src = append(src[:0], dst...) 139 } 140 141 bw.Write(dst) 142 143 if err := bw.WriteByte('\n'); err != nil { 144 return nil 145 } 146 147 if !live { 148 continue 149 } 150 151 if err := bw.Flush(); err != nil { 152 return nil 153 } 154 } 155 156 return sc.Err() 157 } 158 159 func erase(dst []byte, src []byte, with *regexp.Regexp) []byte { 160 for len(src) > 0 { 161 i, j := indexEscapeSequence(src) 162 if i < 0 { 163 dst = handleLineChunk(dst, src, with) 164 break 165 } 166 if j < 0 { 167 j = len(src) 168 } 169 170 dst = handleLineChunk(dst, src[:i], with) 171 dst = append(dst, src[i:j]...) 172 src = src[j:] 173 } 174 175 return dst 176 } 177 178 func handleLineChunk(dst []byte, src []byte, with *regexp.Regexp) []byte { 179 for len(src) > 0 { 180 span := with.FindIndex(src) 181 if span == nil { 182 return append(dst, src...) 183 } 184 185 start := span[0] 186 end := span[1] 187 dst = append(dst, src[:start]...) 188 src = src[end:] 189 } 190 191 return dst 192 } 193 194 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is 195 // the multi-byte sequences starting with ESC[; the result is a pair of slice 196 // indices which can be independently negative when either the start/end of 197 // a sequence isn't found; given their fairly-common use, even the hyperlink 198 // ESC]8 sequences are supported 199 func indexEscapeSequence(s []byte) (int, int) { 200 var prev byte 201 202 for i, b := range s { 203 if prev == '\x1b' && b == '[' { 204 j := indexLetter(s[i+1:]) 205 if j < 0 { 206 return i, -1 207 } 208 return i - 1, i + 1 + j + 1 209 } 210 211 if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' { 212 j := indexPair(s[i+1:], '\x1b', '\\') 213 if j < 0 { 214 return i, -1 215 } 216 return i - 1, i + 1 + j + 2 217 } 218 219 prev = b 220 } 221 222 return -1, -1 223 } 224 225 func indexLetter(s []byte) int { 226 for i, b := range s { 227 upper := b &^ 32 228 if 'A' <= upper && upper <= 'Z' { 229 return i 230 } 231 } 232 233 return -1 234 } 235 236 func indexPair(s []byte, x byte, y byte) int { 237 var prev byte 238 239 for i, b := range s { 240 if prev == x && b == y { 241 return i 242 } 243 prev = b 244 } 245 246 return -1 247 }