File: hima.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath hima.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "bytes" 37 "io" 38 "os" 39 "regexp" 40 "strings" 41 ) 42 43 const info = ` 44 hima [options...] [regexes...] 45 46 47 HIlight MAtches ANSI-styles matching regular expressions along lines read 48 from the standard input. The regular-expression mode used is "re2", which 49 is a superset of the commonly-used "extended-mode". 50 51 Regexes always avoid matching any ANSI-style sequences, to avoid messing 52 those up. Also, multiple matches in a line never overlap: at each step 53 along a line, the earliest-starting match among the regexes always wins, 54 as the order regexes are given among the arguments never matters. 55 56 The options are, available both in single and double-dash versions 57 58 -h, -help show this help message 59 -f, -filter filter out (ignore) lines with no matches 60 -i, -ins match regexes case-insensitively 61 ` 62 63 const highlightStyle = "\x1b[7m" 64 65 func main() { 66 filter := false 67 buffered := false 68 insensitive := false 69 args := os.Args[1:] 70 71 for len(args) > 0 { 72 switch args[0] { 73 case `-b`, `--b`, `-buffered`, `--buffered`: 74 buffered = true 75 args = args[1:] 76 77 case `-f`, `--f`, `-filter`, `--filter`: 78 filter = true 79 args = args[1:] 80 81 case `-h`, `--h`, `-help`, `--help`: 82 os.Stdout.WriteString(info[1:]) 83 return 84 85 case `-i`, `--i`, `-ins`, `--ins`: 86 insensitive = true 87 args = args[1:] 88 } 89 90 break 91 } 92 93 if len(args) > 0 && args[0] == `--` { 94 args = args[1:] 95 } 96 97 patterns := make([]pattern, 0, len(args)) 98 99 for _, s := range args { 100 var err error 101 var pat pattern 102 103 if insensitive { 104 pat, err = compile(`(?i)` + s) 105 } else { 106 pat, err = compile(s) 107 } 108 109 if err != nil { 110 os.Stderr.WriteString(err.Error()) 111 os.Stderr.WriteString("\n") 112 continue 113 } 114 115 patterns = append(patterns, pat) 116 } 117 118 // quit right away when given invalid regexes 119 if len(patterns) < len(args) { 120 os.Exit(1) 121 } 122 123 liveLines := !buffered 124 if !buffered { 125 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 126 liveLines = false 127 } 128 } 129 130 err := run(os.Stdout, os.Stdin, patterns, filter, liveLines) 131 if err != nil && err != io.EOF { 132 os.Stderr.WriteString(err.Error()) 133 os.Stderr.WriteString("\n") 134 os.Exit(1) 135 } 136 } 137 138 // pattern is a regular-expression pattern which distinguishes between the 139 // start/end of a line and those of the chunks it can be used to match 140 type pattern struct { 141 // expr is the regular-expression 142 expr *regexp.Regexp 143 144 // begin is whether the regexp refers to the start of a line 145 begin bool 146 147 // end is whether the regexp refers to the end of a line 148 end bool 149 } 150 151 func compile(src string) (pattern, error) { 152 expr, err := regexp.Compile(src) 153 154 var pat pattern 155 pat.expr = expr 156 pat.begin = strings.HasPrefix(src, `^`) || strings.HasPrefix(src, `(?i)^`) 157 pat.end = strings.HasSuffix(src, `$`) && !strings.HasSuffix(src, `\$`) 158 return pat, err 159 } 160 161 func (p pattern) findIndex(s []byte, i int, last int) (start int, stop int) { 162 if i > 0 && p.begin { 163 return -1, -1 164 } 165 if i != last && p.end { 166 return -1, -1 167 } 168 169 span := p.expr.FindIndex(s) 170 // also ignore empty regex matches to avoid infinite outer loops, 171 // as skipping empty slices isn't advancing at all, leaving the 172 // string stuck to being empty-matched forever by the same regex 173 if len(span) != 2 || span[0] == span[1] { 174 return -1, -1 175 } 176 177 return span[0], span[1] 178 } 179 180 func run(w io.Writer, r io.Reader, pats []pattern, filter, live bool) error { 181 sc := bufio.NewScanner(r) 182 sc.Buffer(nil, 8*1024*1024*1024) 183 bw := bufio.NewWriter(w) 184 defer bw.Flush() 185 186 for i := 0; sc.Scan(); i++ { 187 s := sc.Bytes() 188 if i == 0 && bytes.HasPrefix(s, []byte{0xef, 0xbb, 0xbf}) { 189 s = s[3:] 190 } 191 192 n := 0 193 last := countChunks(s) - 1 194 if last < 0 { 195 last = 0 196 } 197 198 if filter && !matches(s, pats, last) { 199 continue 200 } 201 202 for len(s) > 0 { 203 i, j := indexEscapeSequence(s) 204 if i < 0 { 205 handleChunk(bw, s, pats, n, last) 206 break 207 } 208 if j < 0 { 209 j = len(s) 210 } 211 212 handleChunk(bw, s[:i], pats, n, last) 213 if i > 0 { 214 n++ 215 } 216 217 bw.Write(s[i:j]) 218 219 s = s[j:] 220 } 221 222 if bw.WriteByte('\n') != nil { 223 return io.EOF 224 } 225 226 if !live { 227 continue 228 } 229 230 if bw.Flush() != nil { 231 return io.EOF 232 } 233 } 234 235 return sc.Err() 236 } 237 238 // matches finds out if any regex matches any substring around ANSI-sequences 239 func matches(s []byte, patterns []pattern, last int) bool { 240 n := 0 241 242 for len(s) > 0 { 243 i, j := indexEscapeSequence(s) 244 if i < 0 { 245 for _, p := range patterns { 246 if begin, _ := p.findIndex(s, n, last); begin >= 0 { 247 return true 248 } 249 } 250 return false 251 } 252 253 if j < 0 { 254 j = len(s) 255 } 256 257 for _, p := range patterns { 258 if begin, _ := p.findIndex(s[:i], n, last); begin >= 0 { 259 return true 260 } 261 } 262 263 if i > 0 { 264 n++ 265 } 266 267 s = s[j:] 268 } 269 270 return false 271 } 272 273 func countChunks(s []byte) int { 274 chunks := 0 275 276 for len(s) > 0 { 277 i, j := indexEscapeSequence(s) 278 if i < 0 { 279 break 280 } 281 282 if i > 0 { 283 chunks++ 284 } 285 286 if j < 0 { 287 break 288 } 289 s = s[j:] 290 } 291 292 if len(s) > 0 { 293 chunks++ 294 } 295 return chunks 296 } 297 298 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is 299 // the multi-byte sequences starting with ESC[; the result is a pair of slice 300 // indices which can be independently negative when either the start/end of 301 // a sequence isn't found; given their fairly-common use, even the hyperlink 302 // ESC]8 sequences are supported 303 func indexEscapeSequence(s []byte) (int, int) { 304 var prev byte 305 306 for i, b := range s { 307 if prev == '\x1b' && b == '[' { 308 j := indexLetter(s[i+1:]) 309 if j < 0 { 310 return i, -1 311 } 312 return i - 1, i + 1 + j + 1 313 } 314 315 if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' { 316 j := indexPair(s[i+1:], '\x1b', '\\') 317 if j < 0 { 318 return i, -1 319 } 320 return i - 1, i + 1 + j + 2 321 } 322 323 prev = b 324 } 325 326 return -1, -1 327 } 328 329 func indexLetter(s []byte) int { 330 for i, b := range s { 331 upper := b &^ 32 332 if 'A' <= upper && upper <= 'Z' { 333 return i 334 } 335 } 336 337 return -1 338 } 339 340 func indexPair(s []byte, x byte, y byte) int { 341 var prev byte 342 343 for i, b := range s { 344 if prev == x && b == y && i > 0 { 345 return i 346 } 347 prev = b 348 } 349 350 return -1 351 } 352 353 // note: looking at the results of restoring ANSI-styles after style-resets 354 // doesn't seem to be worth it, as a previous version used to do 355 356 // handleChunk handles line-slices around any detected ANSI-style sequences, 357 // or even whole lines, when no ANSI-styles are found in them 358 func handleChunk(w *bufio.Writer, s []byte, with []pattern, n int, last int) { 359 for len(s) > 0 { 360 start, end := -1, -1 361 for _, p := range with { 362 i, j := p.findIndex(s, n, last) 363 if i >= 0 && (i < start || start < 0) { 364 start, end = i, j 365 } 366 } 367 368 if start < 0 { 369 w.Write(s) 370 return 371 } 372 373 w.Write(s[:start]) 374 w.WriteString(highlightStyle) 375 w.Write(s[start:end]) 376 w.WriteString("\x1b[0m") 377 378 s = s[end:] 379 } 380 }