File: hima.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath hima.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "bytes" 37 "io" 38 "os" 39 "regexp" 40 "strings" 41 ) 42 43 const info = ` 44 hima [options...] [regexes...] 45 46 47 HIlight MAtches ANSI-styles matching regular expressions along lines read 48 from the standard input. The regular-expression mode used is "re2", which 49 is a superset of the commonly-used "extended-mode". 50 51 Regexes always avoid matching any ANSI-style sequences, to avoid messing 52 those up. Also, multiple matches in a line never overlap: at each step 53 along a line, the earliest-starting match among the regexes always wins, 54 as the order regexes are given among the arguments never matters. 55 56 The options are, available both in single and double-dash versions 57 58 -h, -help show this help message 59 -f, -filter filter out (ignore) lines with no matches 60 -i, -ins match regexes case-insensitively 61 ` 62 63 const highlightStyle = "\x1b[7m" 64 65 func main() { 66 filter := false 67 buffered := false 68 insensitive := false 69 args := os.Args[1:] 70 71 for len(args) > 0 { 72 switch args[0] { 73 case `-b`, `--b`, `-buffered`, `--buffered`: 74 buffered = true 75 args = args[1:] 76 continue 77 78 case `-f`, `--f`, `-filter`, `--filter`: 79 filter = true 80 args = args[1:] 81 continue 82 83 case `-fi`, `--fi`, `-if`, `--if`: 84 filter = true 85 insensitive = true 86 args = args[1:] 87 continue 88 89 case `-h`, `--h`, `-help`, `--help`: 90 os.Stdout.WriteString(info[1:]) 91 return 92 93 case `-i`, `--i`, `-ins`, `--ins`: 94 insensitive = true 95 args = args[1:] 96 continue 97 } 98 99 break 100 } 101 102 if len(args) > 0 && args[0] == `--` { 103 args = args[1:] 104 } 105 106 patterns := make([]pattern, 0, len(args)) 107 108 for _, s := range args { 109 var err error 110 var pat pattern 111 112 if insensitive { 113 pat, err = compile(`(?i)` + s) 114 } else { 115 pat, err = compile(s) 116 } 117 118 if err != nil { 119 os.Stderr.WriteString(err.Error()) 120 os.Stderr.WriteString("\n") 121 continue 122 } 123 124 patterns = append(patterns, pat) 125 } 126 127 // quit right away when given invalid regexes 128 if len(patterns) < len(args) { 129 os.Exit(1) 130 } 131 132 liveLines := !buffered 133 if !buffered { 134 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 135 liveLines = false 136 } 137 } 138 139 err := run(os.Stdout, os.Stdin, patterns, filter, liveLines) 140 if err != nil && err != io.EOF { 141 os.Stderr.WriteString(err.Error()) 142 os.Stderr.WriteString("\n") 143 os.Exit(1) 144 } 145 } 146 147 // pattern is a regular-expression pattern which distinguishes between the 148 // start/end of a line and those of the chunks it can be used to match 149 type pattern struct { 150 // expr is the regular-expression 151 expr *regexp.Regexp 152 153 // begin is whether the regexp refers to the start of a line 154 begin bool 155 156 // end is whether the regexp refers to the end of a line 157 end bool 158 } 159 160 func compile(src string) (pattern, error) { 161 expr, err := regexp.Compile(src) 162 163 var pat pattern 164 pat.expr = expr 165 pat.begin = strings.HasPrefix(src, `^`) || strings.HasPrefix(src, `(?i)^`) 166 pat.end = strings.HasSuffix(src, `$`) && !strings.HasSuffix(src, `\$`) 167 return pat, err 168 } 169 170 func (p pattern) findIndex(s []byte, i int, last int) (start int, stop int) { 171 if i > 0 && p.begin { 172 return -1, -1 173 } 174 if i != last && p.end { 175 return -1, -1 176 } 177 178 span := p.expr.FindIndex(s) 179 // also ignore empty regex matches to avoid infinite outer loops, 180 // as skipping empty slices isn't advancing at all, leaving the 181 // string stuck to being empty-matched forever by the same regex 182 if len(span) != 2 || span[0] == span[1] { 183 return -1, -1 184 } 185 186 return span[0], span[1] 187 } 188 189 func run(w io.Writer, r io.Reader, pats []pattern, filter, live bool) error { 190 sc := bufio.NewScanner(r) 191 sc.Buffer(nil, 8*1024*1024*1024) 192 bw := bufio.NewWriter(w) 193 defer bw.Flush() 194 195 for i := 0; sc.Scan(); i++ { 196 s := sc.Bytes() 197 if i == 0 && bytes.HasPrefix(s, []byte{0xef, 0xbb, 0xbf}) { 198 s = s[3:] 199 } 200 201 n := 0 202 last := countChunks(s) - 1 203 if last < 0 { 204 last = 0 205 } 206 207 if filter && !matches(s, pats, last) { 208 continue 209 } 210 211 for len(s) > 0 { 212 i, j := indexEscapeSequence(s) 213 if i < 0 { 214 handleChunk(bw, s, pats, n, last) 215 break 216 } 217 if j < 0 { 218 j = len(s) 219 } 220 221 handleChunk(bw, s[:i], pats, n, last) 222 if i > 0 { 223 n++ 224 } 225 226 bw.Write(s[i:j]) 227 228 s = s[j:] 229 } 230 231 if bw.WriteByte('\n') != nil { 232 return io.EOF 233 } 234 235 if !live { 236 continue 237 } 238 239 if bw.Flush() != nil { 240 return io.EOF 241 } 242 } 243 244 return sc.Err() 245 } 246 247 // matches finds out if any regex matches any substring around ANSI-sequences 248 func matches(s []byte, patterns []pattern, last int) bool { 249 n := 0 250 251 for len(s) > 0 { 252 i, j := indexEscapeSequence(s) 253 if i < 0 { 254 for _, p := range patterns { 255 if begin, _ := p.findIndex(s, n, last); begin >= 0 { 256 return true 257 } 258 } 259 return false 260 } 261 262 if j < 0 { 263 j = len(s) 264 } 265 266 for _, p := range patterns { 267 if begin, _ := p.findIndex(s[:i], n, last); begin >= 0 { 268 return true 269 } 270 } 271 272 if i > 0 { 273 n++ 274 } 275 276 s = s[j:] 277 } 278 279 return false 280 } 281 282 func countChunks(s []byte) int { 283 chunks := 0 284 285 for len(s) > 0 { 286 i, j := indexEscapeSequence(s) 287 if i < 0 { 288 break 289 } 290 291 if i > 0 { 292 chunks++ 293 } 294 295 if j < 0 { 296 break 297 } 298 s = s[j:] 299 } 300 301 if len(s) > 0 { 302 chunks++ 303 } 304 return chunks 305 } 306 307 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is 308 // the multi-byte sequences starting with ESC[; the result is a pair of slice 309 // indices which can be independently negative when either the start/end of 310 // a sequence isn't found; given their fairly-common use, even the hyperlink 311 // ESC]8 sequences are supported 312 func indexEscapeSequence(s []byte) (int, int) { 313 var prev byte 314 315 for i, b := range s { 316 if prev == '\x1b' && b == '[' { 317 j := indexLetter(s[i+1:]) 318 if j < 0 { 319 return i, -1 320 } 321 return i - 1, i + 1 + j + 1 322 } 323 324 if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' { 325 j := indexPair(s[i+1:], '\x1b', '\\') 326 if j < 0 { 327 return i, -1 328 } 329 return i - 1, i + 1 + j + 2 330 } 331 332 prev = b 333 } 334 335 return -1, -1 336 } 337 338 func indexLetter(s []byte) int { 339 for i, b := range s { 340 upper := b &^ 32 341 if 'A' <= upper && upper <= 'Z' { 342 return i 343 } 344 } 345 346 return -1 347 } 348 349 func indexPair(s []byte, x byte, y byte) int { 350 var prev byte 351 352 for i, b := range s { 353 if prev == x && b == y && i > 0 { 354 return i 355 } 356 prev = b 357 } 358 359 return -1 360 } 361 362 // note: looking at the results of restoring ANSI-styles after style-resets 363 // doesn't seem to be worth it, as a previous version used to do 364 365 // handleChunk handles line-slices around any detected ANSI-style sequences, 366 // or even whole lines, when no ANSI-styles are found in them 367 func handleChunk(w *bufio.Writer, s []byte, with []pattern, n int, last int) { 368 for len(s) > 0 { 369 start, end := -1, -1 370 for _, p := range with { 371 i, j := p.findIndex(s, n, last) 372 if i >= 0 && (i < start || start < 0) { 373 start, end = i, j 374 } 375 } 376 377 if start < 0 { 378 w.Write(s) 379 return 380 } 381 382 w.Write(s[:start]) 383 w.WriteString(highlightStyle) 384 w.Write(s[start:end]) 385 w.WriteString("\x1b[0m") 386 387 s = s[end:] 388 } 389 }