File: hima.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath hima.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "io"
  37     "os"
  38     "regexp"
  39 )
  40 
  41 const info = `
  42 hima [options...] [regexes...]
  43 
  44 
  45 HIlight MAtches ANSI-styles matching regular expressions along lines read
  46 from the standard input. The regular-expression mode used is "re2", which
  47 is a superset of the commonly-used "extended-mode".
  48 
  49 Regexes always avoid matching any ANSI-style sequences, to avoid messing
  50 those up. Also, multiple matches in a line never overlap: at each step
  51 along a line, the earliest-starting match among the regexes always wins,
  52 as the order regexes are given among the arguments never matters.
  53 
  54 The options are, available both in single and double-dash versions
  55 
  56     -h      show this help message
  57     -help   show this help message
  58 
  59     -i      match regexes case-insensitively
  60     -ins    match regexes case-insensitively
  61 `
  62 
  63 const highlightStyle = "\x1b[7m"
  64 
  65 func main() {
  66     buffered := false
  67     insensitive := false
  68     args := os.Args[1:]
  69 
  70     if len(args) > 0 {
  71         switch args[0] {
  72         case `-h`, `--h`, `-help`, `--help`:
  73             os.Stdout.WriteString(info[1:])
  74             return
  75         }
  76     }
  77 
  78     for len(args) > 0 {
  79         switch args[0] {
  80         case `-i`, `--i`, `-ins`, `--ins`:
  81             insensitive = true
  82             args = args[1:]
  83             continue
  84 
  85         case `-buffered`, `--buffered`:
  86             buffered = true
  87             args = args[1:]
  88             continue
  89         }
  90 
  91         break
  92     }
  93 
  94     if len(args) > 0 && args[0] == `--` {
  95         args = args[1:]
  96     }
  97 
  98     exprs := make([]*regexp.Regexp, 0, len(args))
  99 
 100     for _, s := range args {
 101         var err error
 102         var exp *regexp.Regexp
 103 
 104         if insensitive {
 105             exp, err = regexp.Compile(`(?i)` + s)
 106         } else {
 107             exp, err = regexp.Compile(s)
 108         }
 109 
 110         if err != nil {
 111             os.Stderr.WriteString(err.Error())
 112             os.Stderr.WriteString("\n")
 113             continue
 114         }
 115 
 116         exprs = append(exprs, exp)
 117     }
 118 
 119     // quit right away when given invalid regexes
 120     if len(exprs) < len(args) {
 121         os.Exit(1)
 122     }
 123 
 124     liveLines := true
 125     if !buffered {
 126         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
 127             liveLines = false
 128         }
 129     }
 130 
 131     if err := run(os.Stdout, os.Stdin, exprs, liveLines); err != nil {
 132         os.Stderr.WriteString(err.Error())
 133         os.Stderr.WriteString("\n")
 134         os.Exit(1)
 135     }
 136 }
 137 
 138 func run(w io.Writer, r io.Reader, exprs []*regexp.Regexp, live bool) error {
 139     sc := bufio.NewScanner(r)
 140     sc.Buffer(nil, 8*1024*1024*1024)
 141     bw := bufio.NewWriter(w)
 142     defer bw.Flush()
 143 
 144     for sc.Scan() {
 145         for s := sc.Bytes(); len(s) > 0; {
 146             i, j := indexEscapeSequence(s)
 147             if i < 0 {
 148                 handleChunk(bw, s, exprs)
 149                 break
 150             }
 151             if j < 0 {
 152                 j = len(s)
 153             }
 154 
 155             handleChunk(bw, s[:i], exprs)
 156             bw.Write(s[i:j])
 157 
 158             s = s[j:]
 159         }
 160 
 161         if err := bw.WriteByte('\n'); err != nil {
 162             return nil
 163         }
 164 
 165         if !live {
 166             continue
 167         }
 168 
 169         if err := bw.Flush(); err != nil {
 170             return nil
 171         }
 172     }
 173 
 174     return sc.Err()
 175 }
 176 
 177 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 178 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 179 // indices which can be independently negative when either the start/end of
 180 // a sequence isn't found; given their fairly-common use, even the hyperlink
 181 // ESC]8 sequences are supported
 182 func indexEscapeSequence(s []byte) (int, int) {
 183     var prev byte
 184 
 185     for i, b := range s {
 186         if prev == '\x1b' && b == '[' {
 187             j := indexLetter(s[i+1:])
 188             if j < 0 {
 189                 return i, -1
 190             }
 191             return i - 1, i + 1 + j + 1
 192         }
 193 
 194         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 195             j := indexPair(s[i+1:], '\x1b', '\\')
 196             if j < 0 {
 197                 return i, -1
 198             }
 199             return i - 1, i + 1 + j + 2
 200         }
 201 
 202         prev = b
 203     }
 204 
 205     return -1, -1
 206 }
 207 
 208 func indexLetter(s []byte) int {
 209     for i, b := range s {
 210         upper := b &^ 32
 211         if 'A' <= upper && upper <= 'Z' {
 212             return i
 213         }
 214     }
 215 
 216     return -1
 217 }
 218 
 219 func indexPair(s []byte, x byte, y byte) int {
 220     var prev byte
 221 
 222     for i, b := range s {
 223         if prev == x && b == y {
 224             return i
 225         }
 226         prev = b
 227     }
 228 
 229     return -1
 230 }
 231 
 232 // note: looking at the results of restoring ANSI-styles after style-resets
 233 // doesn't seem to be worth it, as a previous version used to do
 234 
 235 // handleChunk handles line-slices around any detected ANSI-style sequences,
 236 // or even whole lines, when no ANSI-styles are found in them
 237 func handleChunk(w *bufio.Writer, s []byte, with []*regexp.Regexp) {
 238     start := -1
 239     end := -1
 240 
 241     for len(s) > 0 {
 242         start = -1
 243         for _, e := range with {
 244             span := e.FindIndex(s)
 245             if span != nil && (span[0] < start || start < 0) {
 246                 start = span[0]
 247                 end = span[1]
 248             }
 249         }
 250 
 251         if start < 0 {
 252             w.Write(s)
 253             return
 254         }
 255 
 256         w.Write(s[:start])
 257         w.WriteString(highlightStyle)
 258         w.Write(s[start:end])
 259         w.WriteString("\x1b[0m")
 260 
 261         s = s[end:]
 262     }
 263 }