File: hima.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath hima.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "bytes" 37 "io" 38 "os" 39 "regexp" 40 "strings" 41 ) 42 43 const info = ` 44 hima [options...] [regexes...] 45 46 47 HIlight MAtches ANSI-styles matching regular expressions along lines read 48 from the standard input. The regular-expression mode used is "re2", which 49 is a superset of the commonly-used "extended-mode". 50 51 Regexes always avoid matching any ANSI-style sequences, to avoid messing 52 those up. Also, multiple matches in a line never overlap: at each step 53 along a line, the earliest-starting match among the regexes always wins, 54 as the order regexes are given among the arguments never matters. 55 56 The options are, available both in single and double-dash versions 57 58 -h, -help show this help message 59 -f, -filter filter out (ignore) lines with no matches 60 -i, -ins match regexes case-insensitively 61 ` 62 63 const highlightStyle = "\x1b[7m" 64 65 func main() { 66 filter := false 67 buffered := false 68 insensitive := false 69 args := os.Args[1:] 70 71 for len(args) > 0 { 72 switch args[0] { 73 case `-b`, `--b`, `-buffered`, `--buffered`: 74 buffered = true 75 args = args[1:] 76 continue 77 78 case `-f`, `--f`, `-filter`, `--filter`: 79 filter = true 80 args = args[1:] 81 continue 82 83 case `-fi`, `--fi`, `-if`, `--if`: 84 filter = true 85 insensitive = true 86 args = args[1:] 87 continue 88 89 case `-h`, `--h`, `-help`, `--help`: 90 os.Stdout.WriteString(info[1:]) 91 return 92 93 case `-i`, `--i`, `-ins`, `--ins`: 94 insensitive = true 95 args = args[1:] 96 continue 97 } 98 99 break 100 } 101 102 if len(args) > 0 && args[0] == `--` { 103 args = args[1:] 104 } 105 106 patterns := make([]pattern, 0, len(args)) 107 108 for _, s := range args { 109 var err error 110 var pat pattern 111 112 if insensitive { 113 pat, err = compile(`(?i)` + s) 114 } else { 115 pat, err = compile(s) 116 } 117 118 if err != nil { 119 os.Stderr.WriteString(err.Error()) 120 os.Stderr.WriteString("\n") 121 continue 122 } 123 124 patterns = append(patterns, pat) 125 } 126 127 // quit right away when given invalid regexes 128 if len(patterns) < len(args) { 129 os.Exit(1) 130 return 131 } 132 133 liveLines := !buffered 134 if !buffered { 135 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 136 liveLines = false 137 } 138 } 139 140 err := run(os.Stdout, os.Stdin, patterns, filter, liveLines) 141 if err != nil && err != io.EOF { 142 os.Stderr.WriteString(err.Error()) 143 os.Stderr.WriteString("\n") 144 os.Exit(1) 145 return 146 } 147 } 148 149 // pattern is a regular-expression pattern which distinguishes between the 150 // start/end of a line and those of the chunks it can be used to match 151 type pattern struct { 152 // expr is the regular-expression 153 expr *regexp.Regexp 154 155 // begin is whether the regexp refers to the start of a line 156 begin bool 157 158 // end is whether the regexp refers to the end of a line 159 end bool 160 } 161 162 func compile(src string) (pattern, error) { 163 expr, err := regexp.Compile(src) 164 165 var pat pattern 166 pat.expr = expr 167 pat.begin = strings.HasPrefix(src, `^`) || strings.HasPrefix(src, `(?i)^`) 168 pat.end = strings.HasSuffix(src, `$`) && !strings.HasSuffix(src, `\$`) 169 return pat, err 170 } 171 172 func (p pattern) findIndex(s []byte, i int, last int) (start int, stop int) { 173 if i > 0 && p.begin { 174 return -1, -1 175 } 176 if i != last && p.end { 177 return -1, -1 178 } 179 180 span := p.expr.FindIndex(s) 181 // also ignore empty regex matches to avoid infinite outer loops, 182 // as skipping empty slices isn't advancing at all, leaving the 183 // string stuck to being empty-matched forever by the same regex 184 if len(span) != 2 || span[0] == span[1] { 185 return -1, -1 186 } 187 188 return span[0], span[1] 189 } 190 191 func run(w io.Writer, r io.Reader, pats []pattern, filter, live bool) error { 192 sc := bufio.NewScanner(r) 193 sc.Buffer(nil, 8*1024*1024*1024) 194 bw := bufio.NewWriter(w) 195 defer bw.Flush() 196 197 for i := 0; sc.Scan(); i++ { 198 s := sc.Bytes() 199 if i == 0 && bytes.HasPrefix(s, []byte{0xef, 0xbb, 0xbf}) { 200 s = s[3:] 201 } 202 203 n := 0 204 last := countChunks(s) - 1 205 if last < 0 { 206 last = 0 207 } 208 209 if filter && !matches(s, pats, last) { 210 continue 211 } 212 213 for len(s) > 0 { 214 i, j := indexEscapeSequence(s) 215 if i < 0 { 216 handleChunk(bw, s, pats, n, last) 217 break 218 } 219 if j < 0 { 220 j = len(s) 221 } 222 223 handleChunk(bw, s[:i], pats, n, last) 224 if i > 0 { 225 n++ 226 } 227 228 bw.Write(s[i:j]) 229 230 s = s[j:] 231 } 232 233 if bw.WriteByte('\n') != nil { 234 return io.EOF 235 } 236 237 if !live { 238 continue 239 } 240 241 if bw.Flush() != nil { 242 return io.EOF 243 } 244 } 245 246 return sc.Err() 247 } 248 249 // matches finds out if any regex matches any substring around ANSI-sequences 250 func matches(s []byte, patterns []pattern, last int) bool { 251 n := 0 252 253 for len(s) > 0 { 254 i, j := indexEscapeSequence(s) 255 if i < 0 { 256 for _, p := range patterns { 257 if begin, _ := p.findIndex(s, n, last); begin >= 0 { 258 return true 259 } 260 } 261 return false 262 } 263 264 if j < 0 { 265 j = len(s) 266 } 267 268 for _, p := range patterns { 269 if begin, _ := p.findIndex(s[:i], n, last); begin >= 0 { 270 return true 271 } 272 } 273 274 if i > 0 { 275 n++ 276 } 277 278 s = s[j:] 279 } 280 281 return false 282 } 283 284 func countChunks(s []byte) int { 285 chunks := 0 286 287 for len(s) > 0 { 288 i, j := indexEscapeSequence(s) 289 if i < 0 { 290 break 291 } 292 293 if i > 0 { 294 chunks++ 295 } 296 297 if j < 0 { 298 break 299 } 300 s = s[j:] 301 } 302 303 if len(s) > 0 { 304 chunks++ 305 } 306 return chunks 307 } 308 309 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is 310 // the multi-byte sequences starting with ESC[; the result is a pair of slice 311 // indices which can be independently negative when either the start/end of 312 // a sequence isn't found; given their fairly-common use, even the hyperlink 313 // ESC]8 sequences are supported 314 func indexEscapeSequence(s []byte) (int, int) { 315 var prev byte 316 317 for i, b := range s { 318 if prev == '\x1b' && b == '[' { 319 j := indexLetter(s[i+1:]) 320 if j < 0 { 321 return i, -1 322 } 323 return i - 1, i + 1 + j + 1 324 } 325 326 if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' { 327 j := indexPair(s[i+1:], '\x1b', '\\') 328 if j < 0 { 329 return i, -1 330 } 331 return i - 1, i + 1 + j + 2 332 } 333 334 prev = b 335 } 336 337 return -1, -1 338 } 339 340 func indexLetter(s []byte) int { 341 for i, b := range s { 342 upper := b &^ 32 343 if 'A' <= upper && upper <= 'Z' { 344 return i 345 } 346 } 347 348 return -1 349 } 350 351 func indexPair(s []byte, x byte, y byte) int { 352 var prev byte 353 354 for i, b := range s { 355 if prev == x && b == y && i > 0 { 356 return i 357 } 358 prev = b 359 } 360 361 return -1 362 } 363 364 // note: looking at the results of restoring ANSI-styles after style-resets 365 // doesn't seem to be worth it, as a previous version used to do 366 367 // handleChunk handles line-slices around any detected ANSI-style sequences, 368 // or even whole lines, when no ANSI-styles are found in them 369 func handleChunk(w *bufio.Writer, s []byte, with []pattern, n int, last int) { 370 for len(s) > 0 { 371 start, end := -1, -1 372 for _, p := range with { 373 i, j := p.findIndex(s, n, last) 374 if i >= 0 && (i < start || start < 0) { 375 start, end = i, j 376 } 377 } 378 379 if start < 0 { 380 w.Write(s) 381 return 382 } 383 384 w.Write(s[:start]) 385 w.WriteString(highlightStyle) 386 w.Write(s[start:end]) 387 w.WriteString("\x1b[0m") 388 389 s = s[end:] 390 } 391 }