File: hima.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath hima.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "io" 37 "os" 38 "regexp" 39 ) 40 41 const info = ` 42 hima [options...] [regexes...] 43 44 45 HIlight MAtches ANSI-styles matching regular expressions along lines read 46 from the standard input. The regular-expression mode used is "re2", which 47 is a superset of the commonly-used "extended-mode". 48 49 Regexes always avoid matching any ANSI-style sequences, to avoid messing 50 those up. Also, multiple matches in a line never overlap: at each step 51 along a line, the earliest-starting match among the regexes always wins, 52 as the order regexes are given among the arguments never matters. 53 54 The options are, available both in single and double-dash versions 55 56 -h show this help message 57 -help show this help message 58 59 -i match regexes case-insensitively 60 -ins match regexes case-insensitively 61 ` 62 63 const highlightStyle = "\x1b[7m" 64 65 func main() { 66 buffered := false 67 insensitive := false 68 args := os.Args[1:] 69 70 if len(args) > 0 { 71 switch args[0] { 72 case `-h`, `--h`, `-help`, `--help`: 73 os.Stdout.WriteString(info[1:]) 74 return 75 } 76 } 77 78 out: 79 for len(args) > 0 { 80 switch args[0] { 81 case `-b`, `--b`, `-buffered`, `--buffered`: 82 buffered = true 83 args = args[1:] 84 85 case `-i`, `--i`, `-ins`, `--ins`: 86 insensitive = true 87 args = args[1:] 88 89 default: 90 break out 91 } 92 } 93 94 if len(args) > 0 && args[0] == `--` { 95 args = args[1:] 96 } 97 98 exprs := make([]*regexp.Regexp, 0, len(args)) 99 100 for _, s := range args { 101 var err error 102 var exp *regexp.Regexp 103 104 if insensitive { 105 exp, err = regexp.Compile(`(?i)` + s) 106 } else { 107 exp, err = regexp.Compile(s) 108 } 109 110 if err != nil { 111 os.Stderr.WriteString(err.Error()) 112 os.Stderr.WriteString("\n") 113 continue 114 } 115 116 exprs = append(exprs, exp) 117 } 118 119 // quit right away when given invalid regexes 120 if len(exprs) < len(args) { 121 os.Exit(1) 122 } 123 124 liveLines := !buffered 125 if !buffered { 126 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 127 liveLines = false 128 } 129 } 130 131 if err := run(os.Stdout, os.Stdin, exprs, liveLines); err != nil { 132 os.Stderr.WriteString(err.Error()) 133 os.Stderr.WriteString("\n") 134 os.Exit(1) 135 } 136 } 137 138 func run(w io.Writer, r io.Reader, exprs []*regexp.Regexp, live bool) error { 139 sc := bufio.NewScanner(r) 140 sc.Buffer(nil, 8*1024*1024*1024) 141 bw := bufio.NewWriter(w) 142 defer bw.Flush() 143 144 for i := 0; sc.Scan(); i++ { 145 s := sc.Bytes() 146 if i == 0 && len(s) > 2 && s[0] == 0xef && s[1] == 0xbb && s[2] == 0xbf { 147 s = s[3:] 148 } 149 150 for len(s) > 0 { 151 i, j := indexEscapeSequence(s) 152 if i < 0 { 153 handleChunk(bw, s, exprs) 154 break 155 } 156 if j < 0 { 157 j = len(s) 158 } 159 160 handleChunk(bw, s[:i], exprs) 161 bw.Write(s[i:j]) 162 163 s = s[j:] 164 } 165 166 if err := bw.WriteByte('\n'); err != nil { 167 return nil 168 } 169 170 if !live { 171 continue 172 } 173 174 if err := bw.Flush(); err != nil { 175 return nil 176 } 177 } 178 179 return sc.Err() 180 } 181 182 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is 183 // the multi-byte sequences starting with ESC[; the result is a pair of slice 184 // indices which can be independently negative when either the start/end of 185 // a sequence isn't found; given their fairly-common use, even the hyperlink 186 // ESC]8 sequences are supported 187 func indexEscapeSequence(s []byte) (int, int) { 188 var prev byte 189 190 for i, b := range s { 191 if prev == '\x1b' && b == '[' { 192 j := indexLetter(s[i+1:]) 193 if j < 0 { 194 return i, -1 195 } 196 return i - 1, i + 1 + j + 1 197 } 198 199 if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' { 200 j := indexPair(s[i+1:], '\x1b', '\\') 201 if j < 0 { 202 return i, -1 203 } 204 return i - 1, i + 1 + j + 2 205 } 206 207 prev = b 208 } 209 210 return -1, -1 211 } 212 213 func indexLetter(s []byte) int { 214 for i, b := range s { 215 upper := b &^ 32 216 if 'A' <= upper && upper <= 'Z' { 217 return i 218 } 219 } 220 221 return -1 222 } 223 224 func indexPair(s []byte, x byte, y byte) int { 225 var prev byte 226 227 for i, b := range s { 228 if prev == x && b == y { 229 return i 230 } 231 prev = b 232 } 233 234 return -1 235 } 236 237 // note: looking at the results of restoring ANSI-styles after style-resets 238 // doesn't seem to be worth it, as a previous version used to do 239 240 // handleChunk handles line-slices around any detected ANSI-style sequences, 241 // or even whole lines, when no ANSI-styles are found in them 242 func handleChunk(w *bufio.Writer, s []byte, with []*regexp.Regexp) { 243 start := -1 244 end := -1 245 246 for len(s) > 0 { 247 start = -1 248 for _, e := range with { 249 span := e.FindIndex(s) 250 // also ignore empty regex matches to avoid infinite outer loops, 251 // as skipping empty slices isn't advancing at all, leaving the 252 // string stuck to being empty-matched forever by the same regex 253 if span == nil || span[0] == span[1] { 254 continue 255 } 256 257 if span[0] < start || start < 0 { 258 start = span[0] 259 end = span[1] 260 } 261 } 262 263 if start < 0 { 264 w.Write(s) 265 return 266 } 267 268 w.Write(s[:start]) 269 w.WriteString(highlightStyle) 270 w.Write(s[start:end]) 271 w.WriteString("\x1b[0m") 272 273 s = s[end:] 274 } 275 }