File: hima.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 Single-file source-code for hima.
  27 
  28 To compile a smaller-sized command-line app, you can use the `go` command as
  29 follows:
  30 
  31 go build -ldflags "-s -w" -trimpath hima.go
  32 */
  33 
  34 package main
  35 
  36 import (
  37     "bufio"
  38     "bytes"
  39     "io"
  40     "os"
  41     "regexp"
  42 )
  43 
  44 const info = `
  45 hima [options...] [regexes...]
  46 
  47 
  48 HIlight MAtches ANSI-styles matching regular expressions along lines read
  49 from the standard input. The regular-expression mode used is "re2", which
  50 is a superset of the commonly-used "extended-mode".
  51 
  52 Regexes always avoid matching any ANSI-style sequences, to avoid messing
  53 those up. Also, multiple matches in a line never overlap: at each step
  54 along a line, the earliest-starting match among the regexes always wins,
  55 as the order regexes are given among the arguments never matters.
  56 
  57 The options are, available both in single and double-dash versions
  58 
  59     -h      show this help message
  60     -help   show this help message
  61 
  62     -i      match regexes case-insensitively
  63     -ins    match regexes case-insensitively
  64 `
  65 
  66 func main() {
  67     nerr := 0
  68     insensitive := false
  69     args := os.Args[1:]
  70 
  71     if len(args) > 0 {
  72         switch args[0] {
  73         case `-h`, `--h`, `-help`, `--help`:
  74             os.Stdout.WriteString(info[1:])
  75             return
  76 
  77         case `-i`, `--i`, `-ins`, `--ins`:
  78             insensitive = true
  79             args = args[1:]
  80         }
  81     }
  82 
  83     if len(args) > 0 && args[0] == `--` {
  84         args = args[1:]
  85     }
  86 
  87     exprs := make([]*regexp.Regexp, 0, len(args))
  88 
  89     for _, s := range args {
  90         var err error
  91         var exp *regexp.Regexp
  92 
  93         if insensitive {
  94             exp, err = regexp.Compile(`(?i)` + s)
  95         } else {
  96             exp, err = regexp.Compile(s)
  97         }
  98 
  99         if err != nil {
 100             os.Stderr.WriteString(err.Error())
 101             os.Stderr.WriteString("\n")
 102             nerr++
 103         }
 104 
 105         exprs = append(exprs, exp)
 106     }
 107 
 108     if nerr > 0 {
 109         os.Exit(1)
 110     }
 111 
 112     sc := bufio.NewScanner(os.Stdin)
 113     sc.Buffer(nil, 8*1024*1024*1024)
 114     bw := bufio.NewWriter(os.Stdout)
 115 
 116     liveLines := true
 117     if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
 118         liveLines = false
 119     }
 120 
 121     for sc.Scan() {
 122         for s := sc.Bytes(); len(s) > 0; {
 123             i, j := indexEscapeSequence(s)
 124             if i < 0 {
 125                 chunk(bw, s, exprs)
 126                 break
 127             }
 128 
 129             chunk(bw, s[:i], exprs)
 130             bw.Write(s[i:j])
 131 
 132             if j < 0 {
 133                 break
 134             }
 135             s = s[j:]
 136         }
 137 
 138         if err := bw.WriteByte('\n'); err != nil {
 139             return
 140         }
 141 
 142         if !liveLines {
 143             continue
 144         }
 145 
 146         if err := bw.Flush(); err != nil {
 147             return
 148         }
 149     }
 150 }
 151 
 152 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 153 // either the alert/bell byte, or the multi-byte sequences starting either
 154 // with ESC[ or ESC]; either returned index can be negative
 155 func indexEscapeSequence(s []byte) (int, int) {
 156     var prev byte
 157 
 158     for i, b := range s {
 159         if b == '\a' {
 160             return i, i + 1
 161         }
 162 
 163         if prev == '\x1b' && b == '[' {
 164             j := indexLetter(s[i+1:])
 165             if j < 0 {
 166                 return i, -1
 167             }
 168             return i - 1, i + 1 + j + 1
 169         }
 170 
 171         if prev == '\x1b' && b == ']' {
 172             j := bytes.IndexByte(s[i+1:], ':')
 173             if j < 0 {
 174                 return i, -1
 175             }
 176             return i - 1, i + 1 + j + 1
 177         }
 178 
 179         if prev == '\x1b' && b == '\\' {
 180             return i - 1, i + 1
 181         }
 182 
 183         prev = b
 184     }
 185 
 186     return -1, -1
 187 }
 188 
 189 func indexLetter(s []byte) int {
 190     for i, b := range s {
 191         if 'A' <= b && b <= 'Z' {
 192             return i
 193         }
 194         if 'a' <= b && b <= 'z' {
 195             return i
 196         }
 197     }
 198 
 199     return -1
 200 }
 201 
 202 // note: a previous version tried to restore ANSI-styles, but the final
 203 // results didn't seem to be worth it, even when done `successfully`
 204 
 205 // chunk handles line-slices around any detected ANSI-style sequences, or
 206 // even whole lines, when no ANSI-styles are found in them
 207 func chunk(w *bufio.Writer, s []byte, with []*regexp.Regexp) {
 208     start := -1
 209     end := -1
 210 
 211     for len(s) > 0 {
 212         start = -1
 213         for _, e := range with {
 214             span := e.FindIndex(s)
 215             if span != nil && (span[0] < start || start < 0) {
 216                 start = span[0]
 217                 end = span[1]
 218             }
 219         }
 220 
 221         if start < 0 {
 222             w.Write(s)
 223             return
 224         }
 225 
 226         w.Write(s[:start])
 227         w.WriteString("\x1b[7m")
 228         w.Write(s[start:end])
 229         w.WriteString("\x1b[0m")
 230 
 231         s = s[end:]
 232     }
 233 }