File: hima.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath hima.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "bytes"
  37     "io"
  38     "os"
  39     "regexp"
  40     "strings"
  41 )
  42 
  43 const info = `
  44 hima [options...] [regexes...]
  45 
  46 
  47 HIlight MAtches ANSI-styles matching regular expressions along lines read
  48 from the standard input. The regular-expression mode used is "re2", which
  49 is a superset of the commonly-used "extended-mode".
  50 
  51 Regexes always avoid matching any ANSI-style sequences, to avoid messing
  52 those up. Also, multiple matches in a line never overlap: at each step
  53 along a line, the earliest-starting match among the regexes always wins,
  54 as the order regexes are given among the arguments never matters.
  55 
  56 The options are, available both in single and double-dash versions
  57 
  58     -h, -help      show this help message
  59     -f, -filter    filter out (ignore) lines with no matches
  60     -i, -ins       match regexes case-insensitively
  61 `
  62 
  63 const highlightStyle = "\x1b[7m"
  64 
  65 func main() {
  66     filter := false
  67     buffered := false
  68     insensitive := false
  69     args := os.Args[1:]
  70 
  71     for len(args) > 0 {
  72         switch args[0] {
  73         case `-b`, `--b`, `-buffered`, `--buffered`:
  74             buffered = true
  75             args = args[1:]
  76             continue
  77 
  78         case `-f`, `--f`, `-filter`, `--filter`:
  79             filter = true
  80             args = args[1:]
  81             continue
  82 
  83         case `-fi`, `--fi`, `-if`, `--if`:
  84             filter = true
  85             insensitive = true
  86             args = args[1:]
  87             continue
  88 
  89         case `-h`, `--h`, `-help`, `--help`:
  90             os.Stdout.WriteString(info[1:])
  91             return
  92 
  93         case `-i`, `--i`, `-ins`, `--ins`:
  94             insensitive = true
  95             args = args[1:]
  96             continue
  97         }
  98 
  99         break
 100     }
 101 
 102     if len(args) > 0 && args[0] == `--` {
 103         args = args[1:]
 104     }
 105 
 106     patterns := make([]pattern, 0, len(args))
 107 
 108     for _, s := range args {
 109         var err error
 110         var pat pattern
 111 
 112         if insensitive {
 113             pat, err = compile(`(?i)` + s)
 114         } else {
 115             pat, err = compile(s)
 116         }
 117 
 118         if err != nil {
 119             os.Stderr.WriteString(err.Error())
 120             os.Stderr.WriteString("\n")
 121             continue
 122         }
 123 
 124         patterns = append(patterns, pat)
 125     }
 126 
 127     // quit right away when given invalid regexes
 128     if len(patterns) < len(args) {
 129         os.Exit(1)
 130     }
 131 
 132     liveLines := !buffered
 133     if !buffered {
 134         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
 135             liveLines = false
 136         }
 137     }
 138 
 139     err := run(os.Stdout, os.Stdin, patterns, filter, liveLines)
 140     if err != nil && err != io.EOF {
 141         os.Stderr.WriteString(err.Error())
 142         os.Stderr.WriteString("\n")
 143         os.Exit(1)
 144     }
 145 }
 146 
 147 // pattern is a regular-expression pattern which distinguishes between the
 148 // start/end of a line and those of the chunks it can be used to match
 149 type pattern struct {
 150     // expr is the regular-expression
 151     expr *regexp.Regexp
 152 
 153     // begin is whether the regexp refers to the start of a line
 154     begin bool
 155 
 156     // end is whether the regexp refers to the end of a line
 157     end bool
 158 }
 159 
 160 func compile(src string) (pattern, error) {
 161     expr, err := regexp.Compile(src)
 162 
 163     var pat pattern
 164     pat.expr = expr
 165     pat.begin = strings.HasPrefix(src, `^`) || strings.HasPrefix(src, `(?i)^`)
 166     pat.end = strings.HasSuffix(src, `$`) && !strings.HasSuffix(src, `\$`)
 167     return pat, err
 168 }
 169 
 170 func (p pattern) findIndex(s []byte, i int, last int) (start int, stop int) {
 171     if i > 0 && p.begin {
 172         return -1, -1
 173     }
 174     if i != last && p.end {
 175         return -1, -1
 176     }
 177 
 178     span := p.expr.FindIndex(s)
 179     // also ignore empty regex matches to avoid infinite outer loops,
 180     // as skipping empty slices isn't advancing at all, leaving the
 181     // string stuck to being empty-matched forever by the same regex
 182     if len(span) != 2 || span[0] == span[1] {
 183         return -1, -1
 184     }
 185 
 186     return span[0], span[1]
 187 }
 188 
 189 func run(w io.Writer, r io.Reader, pats []pattern, filter, live bool) error {
 190     sc := bufio.NewScanner(r)
 191     sc.Buffer(nil, 8*1024*1024*1024)
 192     bw := bufio.NewWriter(w)
 193     defer bw.Flush()
 194 
 195     for i := 0; sc.Scan(); i++ {
 196         s := sc.Bytes()
 197         if i == 0 && bytes.HasPrefix(s, []byte{0xef, 0xbb, 0xbf}) {
 198             s = s[3:]
 199         }
 200 
 201         n := 0
 202         last := countChunks(s) - 1
 203         if last < 0 {
 204             last = 0
 205         }
 206 
 207         if filter && !matches(s, pats, last) {
 208             continue
 209         }
 210 
 211         for len(s) > 0 {
 212             i, j := indexEscapeSequence(s)
 213             if i < 0 {
 214                 handleChunk(bw, s, pats, n, last)
 215                 break
 216             }
 217             if j < 0 {
 218                 j = len(s)
 219             }
 220 
 221             handleChunk(bw, s[:i], pats, n, last)
 222             if i > 0 {
 223                 n++
 224             }
 225 
 226             bw.Write(s[i:j])
 227 
 228             s = s[j:]
 229         }
 230 
 231         if bw.WriteByte('\n') != nil {
 232             return io.EOF
 233         }
 234 
 235         if !live {
 236             continue
 237         }
 238 
 239         if bw.Flush() != nil {
 240             return io.EOF
 241         }
 242     }
 243 
 244     return sc.Err()
 245 }
 246 
 247 // matches finds out if any regex matches any substring around ANSI-sequences
 248 func matches(s []byte, patterns []pattern, last int) bool {
 249     n := 0
 250 
 251     for len(s) > 0 {
 252         i, j := indexEscapeSequence(s)
 253         if i < 0 {
 254             for _, p := range patterns {
 255                 if begin, _ := p.findIndex(s, n, last); begin >= 0 {
 256                     return true
 257                 }
 258             }
 259             return false
 260         }
 261 
 262         if j < 0 {
 263             j = len(s)
 264         }
 265 
 266         for _, p := range patterns {
 267             if begin, _ := p.findIndex(s[:i], n, last); begin >= 0 {
 268                 return true
 269             }
 270         }
 271 
 272         if i > 0 {
 273             n++
 274         }
 275 
 276         s = s[j:]
 277     }
 278 
 279     return false
 280 }
 281 
 282 func countChunks(s []byte) int {
 283     chunks := 0
 284 
 285     for len(s) > 0 {
 286         i, j := indexEscapeSequence(s)
 287         if i < 0 {
 288             break
 289         }
 290 
 291         if i > 0 {
 292             chunks++
 293         }
 294 
 295         if j < 0 {
 296             break
 297         }
 298         s = s[j:]
 299     }
 300 
 301     if len(s) > 0 {
 302         chunks++
 303     }
 304     return chunks
 305 }
 306 
 307 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 308 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 309 // indices which can be independently negative when either the start/end of
 310 // a sequence isn't found; given their fairly-common use, even the hyperlink
 311 // ESC]8 sequences are supported
 312 func indexEscapeSequence(s []byte) (int, int) {
 313     var prev byte
 314 
 315     for i, b := range s {
 316         if prev == '\x1b' && b == '[' {
 317             j := indexLetter(s[i+1:])
 318             if j < 0 {
 319                 return i, -1
 320             }
 321             return i - 1, i + 1 + j + 1
 322         }
 323 
 324         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 325             j := indexPair(s[i+1:], '\x1b', '\\')
 326             if j < 0 {
 327                 return i, -1
 328             }
 329             return i - 1, i + 1 + j + 2
 330         }
 331 
 332         prev = b
 333     }
 334 
 335     return -1, -1
 336 }
 337 
 338 func indexLetter(s []byte) int {
 339     for i, b := range s {
 340         upper := b &^ 32
 341         if 'A' <= upper && upper <= 'Z' {
 342             return i
 343         }
 344     }
 345 
 346     return -1
 347 }
 348 
 349 func indexPair(s []byte, x byte, y byte) int {
 350     var prev byte
 351 
 352     for i, b := range s {
 353         if prev == x && b == y && i > 0 {
 354             return i
 355         }
 356         prev = b
 357     }
 358 
 359     return -1
 360 }
 361 
 362 // note: looking at the results of restoring ANSI-styles after style-resets
 363 // doesn't seem to be worth it, as a previous version used to do
 364 
 365 // handleChunk handles line-slices around any detected ANSI-style sequences,
 366 // or even whole lines, when no ANSI-styles are found in them
 367 func handleChunk(w *bufio.Writer, s []byte, with []pattern, n int, last int) {
 368     for len(s) > 0 {
 369         start, end := -1, -1
 370         for _, p := range with {
 371             i, j := p.findIndex(s, n, last)
 372             if i >= 0 && (i < start || start < 0) {
 373                 start, end = i, j
 374             }
 375         }
 376 
 377         if start < 0 {
 378             w.Write(s)
 379             return
 380         }
 381 
 382         w.Write(s[:start])
 383         w.WriteString(highlightStyle)
 384         w.Write(s[start:end])
 385         w.WriteString("\x1b[0m")
 386 
 387         s = s[end:]
 388     }
 389 }