File: coma.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath coma.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "bytes"
  37     "io"
  38     "os"
  39     "regexp"
  40     "strings"
  41 )
  42 
  43 const info = `
  44 coma [options...] [regexes/style pairs...]
  45 
  46 
  47 COlor MAtches ANSI-styles matching regular expressions along lines read
  48 from the standard input. The regular-expression mode used is "re2", which
  49 is a superset of the commonly-used "extended-mode".
  50 
  51 Regexes always avoid matching any ANSI-style sequences, to avoid messing
  52 those up. Also, multiple matches in a line never overlap: at each step
  53 along a line, the earliest-starting match among the regexes always wins,
  54 as the order regexes are given among the arguments never matters.
  55 
  56 The options are, available both in single and double-dash versions
  57 
  58     -h, -help    show this help message
  59     -i, -ins     match regexes case-insensitively
  60 `
  61 
  62 var styleAliases = map[string]string{
  63     `b`: `blue`,
  64     `g`: `green`,
  65     `m`: `magenta`,
  66     `o`: `orange`,
  67     `p`: `purple`,
  68     `r`: `red`,
  69     `u`: `underline`,
  70 
  71     `bb`: `blueback`,
  72     `bg`: `greenback`,
  73     `bm`: `magentaback`,
  74     `bo`: `orangeback`,
  75     `bp`: `purpleback`,
  76     `br`: `redback`,
  77 
  78     `gb`: `greenback`,
  79     `mb`: `magentaback`,
  80     `ob`: `orangeback`,
  81     `pb`: `purpleback`,
  82     `rb`: `redback`,
  83 
  84     `hi`:  `inverse`,
  85     `inv`: `inverse`,
  86     `mag`: `magenta`,
  87 
  88     `du`: `doubleunderline`,
  89 
  90     `flip`: `inverse`,
  91     `swap`: `inverse`,
  92 
  93     `reset`:     `plain`,
  94     `highlight`: `inverse`,
  95     `hilite`:    `inverse`,
  96     `invert`:    `inverse`,
  97     `inverted`:  `inverse`,
  98     `swapped`:   `inverse`,
  99 
 100     `dunderline`:  `doubleunderline`,
 101     `dunderlined`: `doubleunderline`,
 102 
 103     `strikethrough`: `strike`,
 104     `strikethru`:    `strike`,
 105     `struck`:        `strike`,
 106 
 107     `underlined`: `underline`,
 108 
 109     `bblue`:    `blueback`,
 110     `bgray`:    `grayback`,
 111     `bgreen`:   `greenback`,
 112     `bmagenta`: `magentaback`,
 113     `borange`:  `orangeback`,
 114     `bpurple`:  `purpleback`,
 115     `bred`:     `redback`,
 116 
 117     `bgblue`:    `blueback`,
 118     `bggray`:    `grayback`,
 119     `bggreen`:   `greenback`,
 120     `bgmag`:     `magentaback`,
 121     `bgmagenta`: `magentaback`,
 122     `bgorange`:  `orangeback`,
 123     `bgpurple`:  `purpleback`,
 124     `bgred`:     `redback`,
 125 
 126     `bluebg`:    `blueback`,
 127     `graybg`:    `grayback`,
 128     `greenbg`:   `greenback`,
 129     `magbg`:     `magentaback`,
 130     `magentabg`: `magentaback`,
 131     `orangebg`:  `orangeback`,
 132     `purplebg`:  `purpleback`,
 133     `redbg`:     `redback`,
 134 
 135     `backblue`:    `blueback`,
 136     `backgray`:    `grayback`,
 137     `backgreen`:   `greenback`,
 138     `backmag`:     `magentaback`,
 139     `backmagenta`: `magentaback`,
 140     `backorange`:  `orangeback`,
 141     `backpurple`:  `purpleback`,
 142     `backred`:     `redback`,
 143 }
 144 
 145 var styles = map[string]string{
 146     `blue`:            "\x1b[38;2;0;95;215m",
 147     `bold`:            "\x1b[1m",
 148     `doubleunderline`: "\x1b[21m",
 149     `gray`:            "\x1b[38;2;168;168;168m",
 150     `green`:           "\x1b[38;2;0;135;95m",
 151     `inverse`:         "\x1b[7m",
 152     `magenta`:         "\x1b[38;2;215;0;255m",
 153     `orange`:          "\x1b[38;2;215;95;0m",
 154     `plain`:           "\x1b[0m",
 155     `purple`:          "\x1b[38;2;135;95;255m",
 156     `red`:             "\x1b[38;2;204;0;0m",
 157     `strike`:          "\x1b[9m",
 158     `underline`:       "\x1b[4m",
 159 
 160     `blueback`:    "\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m",
 161     `grayback`:    "\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m",
 162     `greenback`:   "\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m",
 163     `magentaback`: "\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m",
 164     `orangeback`:  "\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m",
 165     `purpleback`:  "\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m",
 166     `redback`:     "\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m",
 167 }
 168 
 169 func main() {
 170     buffered := false
 171     insensitive := false
 172     args := os.Args[1:]
 173 
 174     for len(args) > 0 {
 175         switch args[0] {
 176         case `-b`, `--b`, `-buffered`, `--buffered`:
 177             buffered = true
 178             args = args[1:]
 179             continue
 180 
 181         case `-h`, `--h`, `-help`, `--help`:
 182             os.Stdout.WriteString(info[1:])
 183             return
 184 
 185         case `-i`, `--i`, `-ins`, `--ins`:
 186             insensitive = true
 187             args = args[1:]
 188             continue
 189         }
 190 
 191         break
 192     }
 193 
 194     if len(args) > 0 && args[0] == `--` {
 195         args = args[1:]
 196     }
 197 
 198     if len(args)%2 != 0 {
 199         const msg = "you forgot the style-name for/after the last regex\n"
 200         os.Stderr.WriteString(msg)
 201         os.Exit(1)
 202         return
 203     }
 204 
 205     nerr := 0
 206     pairs := make([]pair, 0, len(args)/2)
 207 
 208     for len(args) >= 2 {
 209         src := args[0]
 210         sname := args[1]
 211 
 212         var err error
 213         var pat pattern
 214         if insensitive {
 215             pat, err = compile(`(?i)` + src)
 216         } else {
 217             pat, err = compile(src)
 218         }
 219         if err != nil {
 220             os.Stderr.WriteString(err.Error())
 221             os.Stderr.WriteString("\n")
 222             nerr++
 223         }
 224 
 225         if alias, ok := styleAliases[sname]; ok {
 226             sname = alias
 227         }
 228 
 229         style, ok := styles[sname]
 230         if !ok {
 231             os.Stderr.WriteString("no style named `")
 232             os.Stderr.WriteString(args[1])
 233             os.Stderr.WriteString("`\n")
 234             nerr++
 235         }
 236 
 237         pairs = append(pairs, pair{style: style, pat: pat})
 238         args = args[2:]
 239     }
 240 
 241     if nerr > 0 {
 242         os.Exit(1)
 243         return
 244     }
 245 
 246     liveLines := !buffered
 247     if !buffered {
 248         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
 249             liveLines = false
 250         }
 251     }
 252 
 253     sc := bufio.NewScanner(os.Stdin)
 254     sc.Buffer(nil, 8*1024*1024*1024)
 255     bw := bufio.NewWriter(os.Stdout)
 256     defer bw.Flush()
 257 
 258     for i := 0; sc.Scan(); i++ {
 259         s := sc.Bytes()
 260         if i == 0 && bytes.HasPrefix(s, []byte{0xef, 0xbb, 0xbf}) {
 261             s = s[3:]
 262         }
 263 
 264         handleLine(bw, s, pairs)
 265         if bw.WriteByte('\n') != nil {
 266             return
 267         }
 268 
 269         if !liveLines {
 270             continue
 271         }
 272 
 273         if bw.Flush() != nil {
 274             return
 275         }
 276     }
 277 }
 278 
 279 // pattern is a regular-expression pattern which distinguishes between the
 280 // start/end of a line and those of the chunks it can be used to match
 281 type pattern struct {
 282     // expr is the regular-expression
 283     expr *regexp.Regexp
 284 
 285     // begin is whether the regexp refers to the start of a line
 286     begin bool
 287 
 288     // end is whether the regexp refers to the end of a line
 289     end bool
 290 }
 291 
 292 func compile(src string) (pattern, error) {
 293     expr, err := regexp.Compile(src)
 294 
 295     var pat pattern
 296     pat.expr = expr
 297     pat.begin = strings.HasPrefix(src, `^`) || strings.HasPrefix(src, `(?i)^`)
 298     pat.end = strings.HasSuffix(src, `$`) && !strings.HasSuffix(src, `\$`)
 299     return pat, err
 300 }
 301 
 302 func (p pattern) findIndex(s []byte, i int, last int) (start int, stop int) {
 303     if i > 0 && p.begin {
 304         return -1, -1
 305     }
 306     if i != last && p.end {
 307         return -1, -1
 308     }
 309 
 310     span := p.expr.FindIndex(s)
 311     // also ignore empty regex matches to avoid infinite outer loops,
 312     // as skipping empty slices isn't advancing at all, leaving the
 313     // string stuck to being empty-matched forever by the same regex
 314     if len(span) != 2 || span[0] == span[1] {
 315         return -1, -1
 316     }
 317 
 318     return span[0], span[1]
 319 }
 320 
 321 // pair is a regex/style pair
 322 type pair struct {
 323     // style is the ANSI-sequence to style regex matches
 324     style string
 325 
 326     // pat is the regex pattern
 327     pat pattern
 328 }
 329 
 330 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 331 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 332 // indices which can be independently negative when either the start/end of
 333 // a sequence isn't found; given their fairly-common use, even the hyperlink
 334 // ESC]8 sequences are supported
 335 func indexEscapeSequence(s []byte) (int, int) {
 336     var prev byte
 337 
 338     for i, b := range s {
 339         if prev == '\x1b' && b == '[' {
 340             j := indexLetter(s[i+1:])
 341             if j < 0 {
 342                 return i, -1
 343             }
 344             return i - 1, i + 1 + j + 1
 345         }
 346 
 347         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 348             j := indexPair(s[i+1:], '\x1b', '\\')
 349             if j < 0 {
 350                 return i, -1
 351             }
 352             return i - 1, i + 1 + j + 2
 353         }
 354 
 355         prev = b
 356     }
 357 
 358     return -1, -1
 359 }
 360 
 361 func indexLetter(s []byte) int {
 362     for i, b := range s {
 363         upper := b &^ 32
 364         if 'A' <= upper && upper <= 'Z' {
 365             return i
 366         }
 367     }
 368 
 369     return -1
 370 }
 371 
 372 func indexPair(s []byte, x byte, y byte) int {
 373     var prev byte
 374 
 375     for i, b := range s {
 376         if prev == x && b == y && i > 0 {
 377             return i
 378         }
 379         prev = b
 380     }
 381 
 382     return -1
 383 }
 384 
 385 func handleLine(w *bufio.Writer, s []byte, with []pair) {
 386     n := 0
 387     last := countChunks(s) - 1
 388     if last < 0 {
 389         last = 0
 390     }
 391 
 392     for len(s) > 0 {
 393         i, j := indexEscapeSequence(s)
 394         if i < 0 {
 395             handleLineChunk(w, s, with, n, last)
 396             return
 397         }
 398 
 399         handleLineChunk(w, s[:i], with, n, last)
 400         if i > 0 {
 401             n++
 402         }
 403 
 404         w.Write(s[i:j])
 405 
 406         if j < 0 {
 407             break
 408         }
 409         s = s[j:]
 410     }
 411 }
 412 
 413 func countChunks(s []byte) int {
 414     chunks := 0
 415 
 416     for len(s) > 0 {
 417         i, j := indexEscapeSequence(s)
 418         if i < 0 {
 419             break
 420         }
 421 
 422         if i > 0 {
 423             chunks++
 424         }
 425 
 426         if j < 0 {
 427             break
 428         }
 429         s = s[j:]
 430     }
 431 
 432     if len(s) > 0 {
 433         chunks++
 434     }
 435     return chunks
 436 }
 437 
 438 func handleLineChunk(w *bufio.Writer, s []byte, with []pair, n int, last int) {
 439     for len(s) > 0 {
 440         which, start, end := -1, -1, -1
 441         for i, pair := range with {
 442             j, k := pair.pat.findIndex(s, n, last)
 443             if j >= 0 && (j < start || start < 0) {
 444                 which, start, end = i, j, k
 445             }
 446         }
 447 
 448         if start < 0 {
 449             w.Write(s)
 450             return
 451         }
 452 
 453         w.Write(s[:start])
 454         w.WriteString(with[which].style)
 455         w.Write(s[start:end])
 456         w.WriteString("\x1b[0m")
 457         s = s[end:]
 458     }
 459 }