File: hima.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath hima.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "io"
  37     "os"
  38     "regexp"
  39 )
  40 
  41 const info = `
  42 hima [options...] [regexes...]
  43 
  44 
  45 HIlight MAtches ANSI-styles matching regular expressions along lines read
  46 from the standard input. The regular-expression mode used is "re2", which
  47 is a superset of the commonly-used "extended-mode".
  48 
  49 Regexes always avoid matching any ANSI-style sequences, to avoid messing
  50 those up. Also, multiple matches in a line never overlap: at each step
  51 along a line, the earliest-starting match among the regexes always wins,
  52 as the order regexes are given among the arguments never matters.
  53 
  54 The options are, available both in single and double-dash versions
  55 
  56     -h      show this help message
  57     -help   show this help message
  58 
  59     -i      match regexes case-insensitively
  60     -ins    match regexes case-insensitively
  61 `
  62 
  63 const highlightStyle = "\x1b[7m"
  64 
  65 func main() {
  66     buffered := false
  67     insensitive := false
  68     args := os.Args[1:]
  69 
  70     if len(args) > 0 {
  71         switch args[0] {
  72         case `-h`, `--h`, `-help`, `--help`:
  73             os.Stdout.WriteString(info[1:])
  74             return
  75         }
  76     }
  77 
  78 out:
  79     for len(args) > 0 {
  80         switch args[0] {
  81         case `-b`, `--b`, `-buffered`, `--buffered`:
  82             buffered = true
  83             args = args[1:]
  84 
  85         case `-i`, `--i`, `-ins`, `--ins`:
  86             insensitive = true
  87             args = args[1:]
  88 
  89         default:
  90             break out
  91         }
  92     }
  93 
  94     if len(args) > 0 && args[0] == `--` {
  95         args = args[1:]
  96     }
  97 
  98     exprs := make([]*regexp.Regexp, 0, len(args))
  99 
 100     for _, s := range args {
 101         var err error
 102         var exp *regexp.Regexp
 103 
 104         if insensitive {
 105             exp, err = regexp.Compile(`(?i)` + s)
 106         } else {
 107             exp, err = regexp.Compile(s)
 108         }
 109 
 110         if err != nil {
 111             os.Stderr.WriteString(err.Error())
 112             os.Stderr.WriteString("\n")
 113             continue
 114         }
 115 
 116         exprs = append(exprs, exp)
 117     }
 118 
 119     // quit right away when given invalid regexes
 120     if len(exprs) < len(args) {
 121         os.Exit(1)
 122     }
 123 
 124     liveLines := !buffered
 125     if !buffered {
 126         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
 127             liveLines = false
 128         }
 129     }
 130 
 131     if err := run(os.Stdout, os.Stdin, exprs, liveLines); err != nil {
 132         os.Stderr.WriteString(err.Error())
 133         os.Stderr.WriteString("\n")
 134         os.Exit(1)
 135     }
 136 }
 137 
 138 func run(w io.Writer, r io.Reader, exprs []*regexp.Regexp, live bool) error {
 139     sc := bufio.NewScanner(r)
 140     sc.Buffer(nil, 8*1024*1024*1024)
 141     bw := bufio.NewWriter(w)
 142     defer bw.Flush()
 143 
 144     for i := 0; sc.Scan(); i++ {
 145         s := sc.Bytes()
 146         if i == 0 && len(s) > 2 && s[0] == 0xef && s[1] == 0xbb && s[2] == 0xbf {
 147             s = s[3:]
 148         }
 149 
 150         for len(s) > 0 {
 151             i, j := indexEscapeSequence(s)
 152             if i < 0 {
 153                 handleChunk(bw, s, exprs)
 154                 break
 155             }
 156             if j < 0 {
 157                 j = len(s)
 158             }
 159 
 160             handleChunk(bw, s[:i], exprs)
 161             bw.Write(s[i:j])
 162 
 163             s = s[j:]
 164         }
 165 
 166         if err := bw.WriteByte('\n'); err != nil {
 167             return nil
 168         }
 169 
 170         if !live {
 171             continue
 172         }
 173 
 174         if err := bw.Flush(); err != nil {
 175             return nil
 176         }
 177     }
 178 
 179     return sc.Err()
 180 }
 181 
 182 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 183 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 184 // indices which can be independently negative when either the start/end of
 185 // a sequence isn't found; given their fairly-common use, even the hyperlink
 186 // ESC]8 sequences are supported
 187 func indexEscapeSequence(s []byte) (int, int) {
 188     var prev byte
 189 
 190     for i, b := range s {
 191         if prev == '\x1b' && b == '[' {
 192             j := indexLetter(s[i+1:])
 193             if j < 0 {
 194                 return i, -1
 195             }
 196             return i - 1, i + 1 + j + 1
 197         }
 198 
 199         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 200             j := indexPair(s[i+1:], '\x1b', '\\')
 201             if j < 0 {
 202                 return i, -1
 203             }
 204             return i - 1, i + 1 + j + 2
 205         }
 206 
 207         prev = b
 208     }
 209 
 210     return -1, -1
 211 }
 212 
 213 func indexLetter(s []byte) int {
 214     for i, b := range s {
 215         upper := b &^ 32
 216         if 'A' <= upper && upper <= 'Z' {
 217             return i
 218         }
 219     }
 220 
 221     return -1
 222 }
 223 
 224 func indexPair(s []byte, x byte, y byte) int {
 225     var prev byte
 226 
 227     for i, b := range s {
 228         if prev == x && b == y {
 229             return i
 230         }
 231         prev = b
 232     }
 233 
 234     return -1
 235 }
 236 
 237 // note: looking at the results of restoring ANSI-styles after style-resets
 238 // doesn't seem to be worth it, as a previous version used to do
 239 
 240 // handleChunk handles line-slices around any detected ANSI-style sequences,
 241 // or even whole lines, when no ANSI-styles are found in them
 242 func handleChunk(w *bufio.Writer, s []byte, with []*regexp.Regexp) {
 243     start := -1
 244     end := -1
 245 
 246     for len(s) > 0 {
 247         start = -1
 248         for _, e := range with {
 249             span := e.FindIndex(s)
 250             // also ignore empty regex matches to avoid infinite outer loops,
 251             // as skipping empty slices isn't advancing at all, leaving the
 252             // string stuck to being empty-matched forever by the same regex
 253             if span == nil || span[0] == span[1] {
 254                 continue
 255             }
 256 
 257             if span[0] < start || start < 0 {
 258                 start = span[0]
 259                 end = span[1]
 260             }
 261         }
 262 
 263         if start < 0 {
 264             w.Write(s)
 265             return
 266         }
 267 
 268         w.Write(s[:start])
 269         w.WriteString(highlightStyle)
 270         w.Write(s[start:end])
 271         w.WriteString("\x1b[0m")
 272 
 273         s = s[end:]
 274     }
 275 }