File: coma.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath coma.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "bytes"
  37     "io"
  38     "os"
  39     "regexp"
  40     "strings"
  41 )
  42 
  43 const info = `
  44 coma [options...] [regexes/style pairs...]
  45 
  46 
  47 COlor MAtches ANSI-styles matching regular expressions along lines read
  48 from the standard input. The regular-expression mode used is "re2", which
  49 is a superset of the commonly-used "extended-mode".
  50 
  51 Regexes always avoid matching any ANSI-style sequences, to avoid messing
  52 those up. Also, multiple matches in a line never overlap: at each step
  53 along a line, the earliest-starting match among the regexes always wins,
  54 as the order regexes are given among the arguments never matters.
  55 
  56 The options are, available both in single and double-dash versions
  57 
  58     -h, -help    show this help message
  59     -i, -ins     match regexes case-insensitively
  60 `
  61 
  62 var styleAliases = map[string]string{
  63     `b`: `blue`,
  64     `g`: `green`,
  65     `m`: `magenta`,
  66     `o`: `orange`,
  67     `p`: `purple`,
  68     `r`: `red`,
  69     `u`: `underline`,
  70 
  71     `bb`: `blueback`,
  72     `bg`: `greenback`,
  73     `bm`: `magentaback`,
  74     `bo`: `orangeback`,
  75     `bp`: `purpleback`,
  76     `br`: `redback`,
  77 
  78     `gb`: `greenback`,
  79     `mb`: `magentaback`,
  80     `ob`: `orangeback`,
  81     `pb`: `purpleback`,
  82     `rb`: `redback`,
  83 
  84     `hi`:  `inverse`,
  85     `inv`: `inverse`,
  86     `mag`: `magenta`,
  87 
  88     `du`: `doubleunderline`,
  89 
  90     `flip`: `inverse`,
  91     `swap`: `inverse`,
  92 
  93     `reset`:     `plain`,
  94     `highlight`: `inverse`,
  95     `hilite`:    `inverse`,
  96     `invert`:    `inverse`,
  97     `inverted`:  `inverse`,
  98     `swapped`:   `inverse`,
  99 
 100     `dunderline`:  `doubleunderline`,
 101     `dunderlined`: `doubleunderline`,
 102 
 103     `strikethrough`: `strike`,
 104     `strikethru`:    `strike`,
 105     `struck`:        `strike`,
 106 
 107     `underlined`: `underline`,
 108 
 109     `bblue`:    `blueback`,
 110     `bgray`:    `grayback`,
 111     `bgreen`:   `greenback`,
 112     `bmagenta`: `magentaback`,
 113     `borange`:  `orangeback`,
 114     `bpurple`:  `purpleback`,
 115     `bred`:     `redback`,
 116 
 117     `bgblue`:    `blueback`,
 118     `bggray`:    `grayback`,
 119     `bggreen`:   `greenback`,
 120     `bgmag`:     `magentaback`,
 121     `bgmagenta`: `magentaback`,
 122     `bgorange`:  `orangeback`,
 123     `bgpurple`:  `purpleback`,
 124     `bgred`:     `redback`,
 125 
 126     `bluebg`:    `blueback`,
 127     `graybg`:    `grayback`,
 128     `greenbg`:   `greenback`,
 129     `magbg`:     `magentaback`,
 130     `magentabg`: `magentaback`,
 131     `orangebg`:  `orangeback`,
 132     `purplebg`:  `purpleback`,
 133     `redbg`:     `redback`,
 134 
 135     `backblue`:    `blueback`,
 136     `backgray`:    `grayback`,
 137     `backgreen`:   `greenback`,
 138     `backmag`:     `magentaback`,
 139     `backmagenta`: `magentaback`,
 140     `backorange`:  `orangeback`,
 141     `backpurple`:  `purpleback`,
 142     `backred`:     `redback`,
 143 }
 144 
 145 var styles = map[string]string{
 146     `blue`:            "\x1b[38;2;0;95;215m",
 147     `bold`:            "\x1b[1m",
 148     `doubleunderline`: "\x1b[21m",
 149     `gray`:            "\x1b[38;2;168;168;168m",
 150     `green`:           "\x1b[38;2;0;135;95m",
 151     `inverse`:         "\x1b[7m",
 152     `magenta`:         "\x1b[38;2;215;0;255m",
 153     `orange`:          "\x1b[38;2;215;95;0m",
 154     `plain`:           "\x1b[0m",
 155     `purple`:          "\x1b[38;2;135;95;255m",
 156     `red`:             "\x1b[38;2;204;0;0m",
 157     `strike`:          "\x1b[9m",
 158     `underline`:       "\x1b[4m",
 159 
 160     `blueback`:    "\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m",
 161     `grayback`:    "\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m",
 162     `greenback`:   "\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m",
 163     `magentaback`: "\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m",
 164     `orangeback`:  "\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m",
 165     `purpleback`:  "\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m",
 166     `redback`:     "\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m",
 167 }
 168 
 169 func main() {
 170     buffered := false
 171     insensitive := false
 172     args := os.Args[1:]
 173 
 174     for len(args) > 0 {
 175         switch args[0] {
 176         case `-b`, `--b`, `-buffered`, `--buffered`:
 177             buffered = true
 178             args = args[1:]
 179             continue
 180 
 181         case `-h`, `--h`, `-help`, `--help`:
 182             os.Stdout.WriteString(info[1:])
 183             return
 184 
 185         case `-i`, `--i`, `-ins`, `--ins`:
 186             insensitive = true
 187             args = args[1:]
 188             continue
 189         }
 190 
 191         break
 192     }
 193 
 194     if len(args) > 0 && args[0] == `--` {
 195         args = args[1:]
 196     }
 197 
 198     if len(args)%2 != 0 {
 199         const msg = "you forgot the style-name for/after the last regex\n"
 200         os.Stderr.WriteString(msg)
 201         os.Exit(1)
 202     }
 203 
 204     nerr := 0
 205     pairs := make([]pair, 0, len(args)/2)
 206 
 207     for len(args) >= 2 {
 208         src := args[0]
 209         sname := args[1]
 210 
 211         var err error
 212         var pat pattern
 213         if insensitive {
 214             pat, err = compile(`(?i)` + src)
 215         } else {
 216             pat, err = compile(src)
 217         }
 218         if err != nil {
 219             os.Stderr.WriteString(err.Error())
 220             os.Stderr.WriteString("\n")
 221             nerr++
 222         }
 223 
 224         if alias, ok := styleAliases[sname]; ok {
 225             sname = alias
 226         }
 227 
 228         style, ok := styles[sname]
 229         if !ok {
 230             os.Stderr.WriteString("no style named `")
 231             os.Stderr.WriteString(args[1])
 232             os.Stderr.WriteString("`\n")
 233             nerr++
 234         }
 235 
 236         pairs = append(pairs, pair{style: style, pat: pat})
 237         args = args[2:]
 238     }
 239 
 240     if nerr > 0 {
 241         os.Exit(1)
 242     }
 243 
 244     liveLines := !buffered
 245     if !buffered {
 246         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
 247             liveLines = false
 248         }
 249     }
 250 
 251     sc := bufio.NewScanner(os.Stdin)
 252     sc.Buffer(nil, 8*1024*1024*1024)
 253     bw := bufio.NewWriter(os.Stdout)
 254     defer bw.Flush()
 255 
 256     for i := 0; sc.Scan(); i++ {
 257         s := sc.Bytes()
 258         if i == 0 && bytes.HasPrefix(s, []byte{0xef, 0xbb, 0xbf}) {
 259             s = s[3:]
 260         }
 261 
 262         handleLine(bw, s, pairs)
 263         if bw.WriteByte('\n') != nil {
 264             return
 265         }
 266 
 267         if !liveLines {
 268             continue
 269         }
 270 
 271         if bw.Flush() != nil {
 272             return
 273         }
 274     }
 275 }
 276 
 277 // pattern is a regular-expression pattern which distinguishes between the
 278 // start/end of a line and those of the chunks it can be used to match
 279 type pattern struct {
 280     // expr is the regular-expression
 281     expr *regexp.Regexp
 282 
 283     // begin is whether the regexp refers to the start of a line
 284     begin bool
 285 
 286     // end is whether the regexp refers to the end of a line
 287     end bool
 288 }
 289 
 290 func compile(src string) (pattern, error) {
 291     expr, err := regexp.Compile(src)
 292 
 293     var pat pattern
 294     pat.expr = expr
 295     pat.begin = strings.HasPrefix(src, `^`) || strings.HasPrefix(src, `(?i)^`)
 296     pat.end = strings.HasSuffix(src, `$`) && !strings.HasSuffix(src, `\$`)
 297     return pat, err
 298 }
 299 
 300 func (p pattern) findIndex(s []byte, i int, last int) (start int, stop int) {
 301     if i > 0 && p.begin {
 302         return -1, -1
 303     }
 304     if i != last && p.end {
 305         return -1, -1
 306     }
 307 
 308     span := p.expr.FindIndex(s)
 309     // also ignore empty regex matches to avoid infinite outer loops,
 310     // as skipping empty slices isn't advancing at all, leaving the
 311     // string stuck to being empty-matched forever by the same regex
 312     if len(span) != 2 || span[0] == span[1] {
 313         return -1, -1
 314     }
 315 
 316     return span[0], span[1]
 317 }
 318 
 319 // pair is a regex/style pair
 320 type pair struct {
 321     // style is the ANSI-sequence to style regex matches
 322     style string
 323 
 324     // pat is the regex pattern
 325     pat pattern
 326 }
 327 
 328 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 329 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 330 // indices which can be independently negative when either the start/end of
 331 // a sequence isn't found; given their fairly-common use, even the hyperlink
 332 // ESC]8 sequences are supported
 333 func indexEscapeSequence(s []byte) (int, int) {
 334     var prev byte
 335 
 336     for i, b := range s {
 337         if prev == '\x1b' && b == '[' {
 338             j := indexLetter(s[i+1:])
 339             if j < 0 {
 340                 return i, -1
 341             }
 342             return i - 1, i + 1 + j + 1
 343         }
 344 
 345         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 346             j := indexPair(s[i+1:], '\x1b', '\\')
 347             if j < 0 {
 348                 return i, -1
 349             }
 350             return i - 1, i + 1 + j + 2
 351         }
 352 
 353         prev = b
 354     }
 355 
 356     return -1, -1
 357 }
 358 
 359 func indexLetter(s []byte) int {
 360     for i, b := range s {
 361         upper := b &^ 32
 362         if 'A' <= upper && upper <= 'Z' {
 363             return i
 364         }
 365     }
 366 
 367     return -1
 368 }
 369 
 370 func indexPair(s []byte, x byte, y byte) int {
 371     var prev byte
 372 
 373     for i, b := range s {
 374         if prev == x && b == y && i > 0 {
 375             return i
 376         }
 377         prev = b
 378     }
 379 
 380     return -1
 381 }
 382 
 383 func handleLine(w *bufio.Writer, s []byte, with []pair) {
 384     n := 0
 385     last := countChunks(s) - 1
 386     if last < 0 {
 387         last = 0
 388     }
 389 
 390     for len(s) > 0 {
 391         i, j := indexEscapeSequence(s)
 392         if i < 0 {
 393             handleLineChunk(w, s, with, n, last)
 394             return
 395         }
 396 
 397         handleLineChunk(w, s[:i], with, n, last)
 398         if i > 0 {
 399             n++
 400         }
 401 
 402         w.Write(s[i:j])
 403 
 404         if j < 0 {
 405             break
 406         }
 407         s = s[j:]
 408     }
 409 }
 410 
 411 func countChunks(s []byte) int {
 412     chunks := 0
 413 
 414     for len(s) > 0 {
 415         i, j := indexEscapeSequence(s)
 416         if i < 0 {
 417             break
 418         }
 419 
 420         if i > 0 {
 421             chunks++
 422         }
 423 
 424         if j < 0 {
 425             break
 426         }
 427         s = s[j:]
 428     }
 429 
 430     if len(s) > 0 {
 431         chunks++
 432     }
 433     return chunks
 434 }
 435 
 436 func handleLineChunk(w *bufio.Writer, s []byte, with []pair, n int, last int) {
 437     for len(s) > 0 {
 438         which, start, end := -1, -1, -1
 439         for i, pair := range with {
 440             j, k := pair.pat.findIndex(s, n, last)
 441             if j >= 0 && (j < start || start < 0) {
 442                 which, start, end = i, j, k
 443             }
 444         }
 445 
 446         if start < 0 {
 447             w.Write(s)
 448             return
 449         }
 450 
 451         w.Write(s[:start])
 452         w.WriteString(with[which].style)
 453         w.Write(s[start:end])
 454         w.WriteString("\x1b[0m")
 455         s = s[end:]
 456     }
 457 }