File: hima.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath hima.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "bytes"
  37     "io"
  38     "os"
  39     "regexp"
  40     "strings"
  41 )
  42 
  43 const info = `
  44 hima [options...] [regexes...]
  45 
  46 
  47 HIlight MAtches ANSI-styles matching regular expressions along lines read
  48 from the standard input. The regular-expression mode used is "re2", which
  49 is a superset of the commonly-used "extended-mode".
  50 
  51 Regexes always avoid matching any ANSI-style sequences, to avoid messing
  52 those up. Also, multiple matches in a line never overlap: at each step
  53 along a line, the earliest-starting match among the regexes always wins,
  54 as the order regexes are given among the arguments never matters.
  55 
  56 The options are, available both in single and double-dash versions
  57 
  58     -h, -help      show this help message
  59     -f, -filter    filter out (ignore) lines with no matches
  60     -i, -ins       match regexes case-insensitively
  61 `
  62 
  63 const highlightStyle = "\x1b[7m"
  64 
  65 func main() {
  66     filter := false
  67     buffered := false
  68     insensitive := false
  69     args := os.Args[1:]
  70 
  71     for len(args) > 0 {
  72         switch args[0] {
  73         case `-b`, `--b`, `-buffered`, `--buffered`:
  74             buffered = true
  75             args = args[1:]
  76             continue
  77 
  78         case `-f`, `--f`, `-filter`, `--filter`:
  79             filter = true
  80             args = args[1:]
  81             continue
  82 
  83         case `-fi`, `--fi`, `-if`, `--if`:
  84             filter = true
  85             insensitive = true
  86             args = args[1:]
  87             continue
  88 
  89         case `-h`, `--h`, `-help`, `--help`:
  90             os.Stdout.WriteString(info[1:])
  91             return
  92 
  93         case `-i`, `--i`, `-ins`, `--ins`:
  94             insensitive = true
  95             args = args[1:]
  96             continue
  97         }
  98 
  99         break
 100     }
 101 
 102     if len(args) > 0 && args[0] == `--` {
 103         args = args[1:]
 104     }
 105 
 106     patterns := make([]pattern, 0, len(args))
 107 
 108     for _, s := range args {
 109         var err error
 110         var pat pattern
 111 
 112         if insensitive {
 113             pat, err = compile(`(?i)` + s)
 114         } else {
 115             pat, err = compile(s)
 116         }
 117 
 118         if err != nil {
 119             os.Stderr.WriteString(err.Error())
 120             os.Stderr.WriteString("\n")
 121             continue
 122         }
 123 
 124         patterns = append(patterns, pat)
 125     }
 126 
 127     // quit right away when given invalid regexes
 128     if len(patterns) < len(args) {
 129         os.Exit(1)
 130         return
 131     }
 132 
 133     liveLines := !buffered
 134     if !buffered {
 135         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
 136             liveLines = false
 137         }
 138     }
 139 
 140     err := run(os.Stdout, os.Stdin, patterns, filter, liveLines)
 141     if err != nil && err != io.EOF {
 142         os.Stderr.WriteString(err.Error())
 143         os.Stderr.WriteString("\n")
 144         os.Exit(1)
 145         return
 146     }
 147 }
 148 
 149 // pattern is a regular-expression pattern which distinguishes between the
 150 // start/end of a line and those of the chunks it can be used to match
 151 type pattern struct {
 152     // expr is the regular-expression
 153     expr *regexp.Regexp
 154 
 155     // begin is whether the regexp refers to the start of a line
 156     begin bool
 157 
 158     // end is whether the regexp refers to the end of a line
 159     end bool
 160 }
 161 
 162 func compile(src string) (pattern, error) {
 163     expr, err := regexp.Compile(src)
 164 
 165     var pat pattern
 166     pat.expr = expr
 167     pat.begin = strings.HasPrefix(src, `^`) || strings.HasPrefix(src, `(?i)^`)
 168     pat.end = strings.HasSuffix(src, `$`) && !strings.HasSuffix(src, `\$`)
 169     return pat, err
 170 }
 171 
 172 func (p pattern) findIndex(s []byte, i int, last int) (start int, stop int) {
 173     if i > 0 && p.begin {
 174         return -1, -1
 175     }
 176     if i != last && p.end {
 177         return -1, -1
 178     }
 179 
 180     span := p.expr.FindIndex(s)
 181     // also ignore empty regex matches to avoid infinite outer loops,
 182     // as skipping empty slices isn't advancing at all, leaving the
 183     // string stuck to being empty-matched forever by the same regex
 184     if len(span) != 2 || span[0] == span[1] {
 185         return -1, -1
 186     }
 187 
 188     return span[0], span[1]
 189 }
 190 
 191 func run(w io.Writer, r io.Reader, pats []pattern, filter, live bool) error {
 192     sc := bufio.NewScanner(r)
 193     sc.Buffer(nil, 8*1024*1024*1024)
 194     bw := bufio.NewWriter(w)
 195     defer bw.Flush()
 196 
 197     for i := 0; sc.Scan(); i++ {
 198         s := sc.Bytes()
 199         if i == 0 && bytes.HasPrefix(s, []byte{0xef, 0xbb, 0xbf}) {
 200             s = s[3:]
 201         }
 202 
 203         n := 0
 204         last := countChunks(s) - 1
 205         if last < 0 {
 206             last = 0
 207         }
 208 
 209         if filter && !matches(s, pats, last) {
 210             continue
 211         }
 212 
 213         for len(s) > 0 {
 214             i, j := indexEscapeSequence(s)
 215             if i < 0 {
 216                 handleChunk(bw, s, pats, n, last)
 217                 break
 218             }
 219             if j < 0 {
 220                 j = len(s)
 221             }
 222 
 223             handleChunk(bw, s[:i], pats, n, last)
 224             if i > 0 {
 225                 n++
 226             }
 227 
 228             bw.Write(s[i:j])
 229 
 230             s = s[j:]
 231         }
 232 
 233         if bw.WriteByte('\n') != nil {
 234             return io.EOF
 235         }
 236 
 237         if !live {
 238             continue
 239         }
 240 
 241         if bw.Flush() != nil {
 242             return io.EOF
 243         }
 244     }
 245 
 246     return sc.Err()
 247 }
 248 
 249 // matches finds out if any regex matches any substring around ANSI-sequences
 250 func matches(s []byte, patterns []pattern, last int) bool {
 251     n := 0
 252 
 253     for len(s) > 0 {
 254         i, j := indexEscapeSequence(s)
 255         if i < 0 {
 256             for _, p := range patterns {
 257                 if begin, _ := p.findIndex(s, n, last); begin >= 0 {
 258                     return true
 259                 }
 260             }
 261             return false
 262         }
 263 
 264         if j < 0 {
 265             j = len(s)
 266         }
 267 
 268         for _, p := range patterns {
 269             if begin, _ := p.findIndex(s[:i], n, last); begin >= 0 {
 270                 return true
 271             }
 272         }
 273 
 274         if i > 0 {
 275             n++
 276         }
 277 
 278         s = s[j:]
 279     }
 280 
 281     return false
 282 }
 283 
 284 func countChunks(s []byte) int {
 285     chunks := 0
 286 
 287     for len(s) > 0 {
 288         i, j := indexEscapeSequence(s)
 289         if i < 0 {
 290             break
 291         }
 292 
 293         if i > 0 {
 294             chunks++
 295         }
 296 
 297         if j < 0 {
 298             break
 299         }
 300         s = s[j:]
 301     }
 302 
 303     if len(s) > 0 {
 304         chunks++
 305     }
 306     return chunks
 307 }
 308 
 309 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 310 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 311 // indices which can be independently negative when either the start/end of
 312 // a sequence isn't found; given their fairly-common use, even the hyperlink
 313 // ESC]8 sequences are supported
 314 func indexEscapeSequence(s []byte) (int, int) {
 315     var prev byte
 316 
 317     for i, b := range s {
 318         if prev == '\x1b' && b == '[' {
 319             j := indexLetter(s[i+1:])
 320             if j < 0 {
 321                 return i, -1
 322             }
 323             return i - 1, i + 1 + j + 1
 324         }
 325 
 326         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 327             j := indexPair(s[i+1:], '\x1b', '\\')
 328             if j < 0 {
 329                 return i, -1
 330             }
 331             return i - 1, i + 1 + j + 2
 332         }
 333 
 334         prev = b
 335     }
 336 
 337     return -1, -1
 338 }
 339 
 340 func indexLetter(s []byte) int {
 341     for i, b := range s {
 342         upper := b &^ 32
 343         if 'A' <= upper && upper <= 'Z' {
 344             return i
 345         }
 346     }
 347 
 348     return -1
 349 }
 350 
 351 func indexPair(s []byte, x byte, y byte) int {
 352     var prev byte
 353 
 354     for i, b := range s {
 355         if prev == x && b == y && i > 0 {
 356             return i
 357         }
 358         prev = b
 359     }
 360 
 361     return -1
 362 }
 363 
 364 // note: looking at the results of restoring ANSI-styles after style-resets
 365 // doesn't seem to be worth it, as a previous version used to do
 366 
 367 // handleChunk handles line-slices around any detected ANSI-style sequences,
 368 // or even whole lines, when no ANSI-styles are found in them
 369 func handleChunk(w *bufio.Writer, s []byte, with []pattern, n int, last int) {
 370     for len(s) > 0 {
 371         start, end := -1, -1
 372         for _, p := range with {
 373             i, j := p.findIndex(s, n, last)
 374             if i >= 0 && (i < start || start < 0) {
 375                 start, end = i, j
 376             }
 377         }
 378 
 379         if start < 0 {
 380             w.Write(s)
 381             return
 382         }
 383 
 384         w.Write(s[:start])
 385         w.WriteString(highlightStyle)
 386         w.Write(s[start:end])
 387         w.WriteString("\x1b[0m")
 388 
 389         s = s[end:]
 390     }
 391 }