File: hima.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Single-file source-code for hima. 27 28 To compile a smaller-sized command-line app, you can use the `go` command as 29 follows: 30 31 go build -ldflags "-s -w" -trimpath hima.go 32 */ 33 34 package main 35 36 import ( 37 "bufio" 38 "bytes" 39 "io" 40 "os" 41 "regexp" 42 ) 43 44 const info = ` 45 hima [options...] [regexes...] 46 47 48 HIlight MAtches ANSI-styles matching regular expressions along lines read 49 from the standard input. The regular-expression mode used is "re2", which 50 is a superset of the commonly-used "extended-mode". 51 52 Regexes always avoid matching any ANSI-style sequences, to avoid messing 53 those up. Also, multiple matches in a line never overlap: at each step 54 along a line, the earliest-starting match among the regexes always wins, 55 as the order regexes are given among the arguments never matters. 56 57 The options are, available both in single and double-dash versions 58 59 -h show this help message 60 -help show this help message 61 62 -i match regexes case-insensitively 63 -ins match regexes case-insensitively 64 ` 65 66 func main() { 67 nerr := 0 68 insensitive := false 69 args := os.Args[1:] 70 71 if len(args) > 0 { 72 switch args[0] { 73 case `-h`, `--h`, `-help`, `--help`: 74 os.Stdout.WriteString(info[1:]) 75 return 76 77 case `-i`, `--i`, `-ins`, `--ins`: 78 insensitive = true 79 args = args[1:] 80 } 81 } 82 83 if len(args) > 0 && args[0] == `--` { 84 args = args[1:] 85 } 86 87 exprs := make([]*regexp.Regexp, 0, len(args)) 88 89 for _, s := range args { 90 var err error 91 var exp *regexp.Regexp 92 93 if insensitive { 94 exp, err = regexp.Compile(`(?i)` + s) 95 } else { 96 exp, err = regexp.Compile(s) 97 } 98 99 if err != nil { 100 os.Stderr.WriteString(err.Error()) 101 os.Stderr.WriteString("\n") 102 nerr++ 103 } 104 105 exprs = append(exprs, exp) 106 } 107 108 if nerr > 0 { 109 os.Exit(1) 110 } 111 112 sc := bufio.NewScanner(os.Stdin) 113 sc.Buffer(nil, 8*1024*1024*1024) 114 bw := bufio.NewWriter(os.Stdout) 115 116 liveLines := true 117 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 118 liveLines = false 119 } 120 121 for sc.Scan() { 122 for s := sc.Bytes(); len(s) > 0; { 123 i, j := indexEscapeSequence(s) 124 if i < 0 { 125 chunk(bw, s, exprs) 126 break 127 } 128 129 chunk(bw, s[:i], exprs) 130 bw.Write(s[i:j]) 131 132 if j < 0 { 133 break 134 } 135 s = s[j:] 136 } 137 138 if err := bw.WriteByte('\n'); err != nil { 139 return 140 } 141 142 if !liveLines { 143 continue 144 } 145 146 if err := bw.Flush(); err != nil { 147 return 148 } 149 } 150 } 151 152 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is 153 // either the alert/bell byte, or the multi-byte sequences starting either 154 // with ESC[ or ESC]; either returned index can be negative 155 func indexEscapeSequence(s []byte) (int, int) { 156 var prev byte 157 158 for i, b := range s { 159 if b == '\a' { 160 return i, i + 1 161 } 162 163 if prev == '\x1b' && b == '[' { 164 j := indexLetter(s[i+1:]) 165 if j < 0 { 166 return i, -1 167 } 168 return i - 1, i + 1 + j + 1 169 } 170 171 if prev == '\x1b' && b == ']' { 172 j := bytes.IndexByte(s[i+1:], ':') 173 if j < 0 { 174 return i, -1 175 } 176 return i - 1, i + 1 + j + 1 177 } 178 179 if prev == '\x1b' && b == '\\' { 180 return i - 1, i + 1 181 } 182 183 prev = b 184 } 185 186 return -1, -1 187 } 188 189 func indexLetter(s []byte) int { 190 for i, b := range s { 191 if 'A' <= b && b <= 'Z' { 192 return i 193 } 194 if 'a' <= b && b <= 'z' { 195 return i 196 } 197 } 198 199 return -1 200 } 201 202 // note: a previous version tried to restore ANSI-styles, but the final 203 // results didn't seem to be worth it, even when done `successfully` 204 205 // chunk handles line-slices around any detected ANSI-style sequences, or 206 // even whole lines, when no ANSI-styles are found in them 207 func chunk(w *bufio.Writer, s []byte, with []*regexp.Regexp) { 208 start := -1 209 end := -1 210 211 for len(s) > 0 { 212 start = -1 213 for _, e := range with { 214 span := e.FindIndex(s) 215 if span != nil && (span[0] < start || start < 0) { 216 start = span[0] 217 end = span[1] 218 } 219 } 220 221 if start < 0 { 222 w.Write(s) 223 return 224 } 225 226 w.Write(s[:start]) 227 w.WriteString("\x1b[7m") 228 w.Write(s[start:end]) 229 w.WriteString("\x1b[0m") 230 231 s = s[end:] 232 } 233 }