File: coma.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath coma.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "io"
  37     "os"
  38     "regexp"
  39 )
  40 
  41 const info = `
  42 coma [options...] [regexes/style pairs...]
  43 
  44 
  45 COlor MAtches ANSI-styles matching regular expressions along lines read
  46 from the standard input. The regular-expression mode used is "re2", which
  47 is a superset of the commonly-used "extended-mode".
  48 
  49 Regexes always avoid matching any ANSI-style sequences, to avoid messing
  50 those up. Also, multiple matches in a line never overlap: at each step
  51 along a line, the earliest-starting match among the regexes always wins,
  52 as the order regexes are given among the arguments never matters.
  53 
  54 The options are, available both in single and double-dash versions
  55 
  56     -h      show this help message
  57     -help   show this help message
  58 
  59     -i      match regexes case-insensitively
  60     -ins    match regexes case-insensitively
  61 `
  62 
  63 var styleAliases = map[string]string{
  64     `b`: `blue`,
  65     `g`: `green`,
  66     `m`: `magenta`,
  67     `o`: `orange`,
  68     `p`: `purple`,
  69     `r`: `red`,
  70     `u`: `underline`,
  71 
  72     `bb`: `blueback`,
  73     `bg`: `greenback`,
  74     `bm`: `magentaback`,
  75     `bo`: `orangeback`,
  76     `bp`: `purpleback`,
  77     `br`: `redback`,
  78 
  79     `gb`: `greenback`,
  80     `mb`: `magentaback`,
  81     `ob`: `orangeback`,
  82     `pb`: `purpleback`,
  83     `rb`: `redback`,
  84 
  85     `hi`:  `inverse`,
  86     `inv`: `inverse`,
  87     `mag`: `magenta`,
  88 
  89     `du`: `doubleunderline`,
  90 
  91     `flip`: `inverse`,
  92     `swap`: `inverse`,
  93 
  94     `reset`:     `plain`,
  95     `highlight`: `inverse`,
  96     `hilite`:    `inverse`,
  97     `invert`:    `inverse`,
  98     `inverted`:  `inverse`,
  99     `swapped`:   `inverse`,
 100 
 101     `dunderline`:  `doubleunderline`,
 102     `dunderlined`: `doubleunderline`,
 103 
 104     `strikethrough`: `strike`,
 105     `strikethru`:    `strike`,
 106     `struck`:        `strike`,
 107 
 108     `underlined`: `underline`,
 109 
 110     `bblue`:    `blueback`,
 111     `bgray`:    `grayback`,
 112     `bgreen`:   `greenback`,
 113     `bmagenta`: `magentaback`,
 114     `borange`:  `orangeback`,
 115     `bpurple`:  `purpleback`,
 116     `bred`:     `redback`,
 117 
 118     `bgblue`:    `blueback`,
 119     `bggray`:    `grayback`,
 120     `bggreen`:   `greenback`,
 121     `bgmag`:     `magentaback`,
 122     `bgmagenta`: `magentaback`,
 123     `bgorange`:  `orangeback`,
 124     `bgpurple`:  `purpleback`,
 125     `bgred`:     `redback`,
 126 
 127     `bluebg`:    `blueback`,
 128     `graybg`:    `grayback`,
 129     `greenbg`:   `greenback`,
 130     `magbg`:     `magentaback`,
 131     `magentabg`: `magentaback`,
 132     `orangebg`:  `orangeback`,
 133     `purplebg`:  `purpleback`,
 134     `redbg`:     `redback`,
 135 
 136     `backblue`:    `blueback`,
 137     `backgray`:    `grayback`,
 138     `backgreen`:   `greenback`,
 139     `backmag`:     `magentaback`,
 140     `backmagenta`: `magentaback`,
 141     `backorange`:  `orangeback`,
 142     `backpurple`:  `purpleback`,
 143     `backred`:     `redback`,
 144 }
 145 
 146 var styles = map[string]string{
 147     `blue`:            "\x1b[38;2;0;95;215m",
 148     `bold`:            "\x1b[1m",
 149     `doubleunderline`: "\x1b[21m",
 150     `gray`:            "\x1b[38;2;168;168;168m",
 151     `green`:           "\x1b[38;2;0;135;95m",
 152     `inverse`:         "\x1b[7m",
 153     `magenta`:         "\x1b[38;2;215;0;255m",
 154     `orange`:          "\x1b[38;2;215;95;0m",
 155     `plain`:           "\x1b[0m",
 156     `purple`:          "\x1b[38;2;135;95;255m",
 157     `red`:             "\x1b[38;2;204;0;0m",
 158     `strike`:          "\x1b[9m",
 159     `underline`:       "\x1b[4m",
 160 
 161     `blueback`:    "\x1b[48;2;0;95;215m\x1b[38;2;238;238;238m",
 162     `grayback`:    "\x1b[48;2;168;168;168m\x1b[38;2;238;238;238m",
 163     `greenback`:   "\x1b[48;2;0;135;95m\x1b[38;2;238;238;238m",
 164     `magentaback`: "\x1b[48;2;215;0;255m\x1b[38;2;238;238;238m",
 165     `orangeback`:  "\x1b[48;2;215;95;0m\x1b[38;2;238;238;238m",
 166     `purpleback`:  "\x1b[48;2;135;95;255m\x1b[38;2;238;238;238m",
 167     `redback`:     "\x1b[48;2;204;0;0m\x1b[38;2;238;238;238m",
 168 }
 169 
 170 type patternStylePair struct {
 171     expr  *regexp.Regexp
 172     style string
 173 }
 174 
 175 func main() {
 176     buffered := false
 177     insensitive := false
 178     args := os.Args[1:]
 179 
 180 out:
 181     for len(args) > 0 {
 182         switch args[0] {
 183         case `-b`, `--b`, `-buffered`, `--buffered`:
 184             buffered = true
 185             args = args[1:]
 186             continue
 187 
 188         case `-h`, `--h`, `-help`, `--help`:
 189             os.Stdout.WriteString(info[1:])
 190             return
 191 
 192         case `-i`, `--i`, `-ins`, `--ins`:
 193             insensitive = true
 194             args = args[1:]
 195             continue
 196 
 197         default:
 198             break out
 199         }
 200     }
 201 
 202     if len(args) > 0 && args[0] == `--` {
 203         args = args[1:]
 204     }
 205 
 206     if len(args)%2 != 0 {
 207         const msg = "you forgot the style-name for/after the last regex\n"
 208         os.Stderr.WriteString(msg)
 209         os.Exit(1)
 210     }
 211 
 212     nerr := 0
 213     pairs := make([]patternStylePair, 0, len(args)/2)
 214 
 215     for len(args) >= 2 {
 216         src := args[0]
 217         sname := args[1]
 218 
 219         var err error
 220         var exp *regexp.Regexp
 221         if insensitive {
 222             exp, err = regexp.Compile(`(?i)` + src)
 223         } else {
 224             exp, err = regexp.Compile(src)
 225         }
 226         if err != nil {
 227             os.Stderr.WriteString(err.Error())
 228             os.Stderr.WriteString("\n")
 229             nerr++
 230         }
 231 
 232         if alias, ok := styleAliases[sname]; ok {
 233             sname = alias
 234         }
 235 
 236         style, ok := styles[sname]
 237         if !ok {
 238             os.Stderr.WriteString("no style named `")
 239             os.Stderr.WriteString(args[1])
 240             os.Stderr.WriteString("`\n")
 241             nerr++
 242         }
 243 
 244         pairs = append(pairs, patternStylePair{expr: exp, style: style})
 245         args = args[2:]
 246     }
 247 
 248     if nerr > 0 {
 249         os.Exit(1)
 250     }
 251 
 252     liveLines := !buffered
 253     if !buffered {
 254         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
 255             liveLines = false
 256         }
 257     }
 258 
 259     sc := bufio.NewScanner(os.Stdin)
 260     sc.Buffer(nil, 8*1024*1024*1024)
 261     bw := bufio.NewWriter(os.Stdout)
 262     defer bw.Flush()
 263 
 264     for i := 0; sc.Scan(); i++ {
 265         s := sc.Bytes()
 266         if i == 0 && len(s) > 2 && s[0] == 0xef && s[1] == 0xbb && s[2] == 0xbf {
 267             s = s[3:]
 268         }
 269 
 270         handleLine(bw, s, pairs)
 271         if err := bw.WriteByte('\n'); err != nil {
 272             return
 273         }
 274 
 275         if !liveLines {
 276             continue
 277         }
 278 
 279         if err := bw.Flush(); err != nil {
 280             return
 281         }
 282     }
 283 }
 284 
 285 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 286 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 287 // indices which can be independently negative when either the start/end of
 288 // a sequence isn't found; given their fairly-common use, even the hyperlink
 289 // ESC]8 sequences are supported
 290 func indexEscapeSequence(s []byte) (int, int) {
 291     var prev byte
 292 
 293     for i, b := range s {
 294         if prev == '\x1b' && b == '[' {
 295             j := indexLetter(s[i+1:])
 296             if j < 0 {
 297                 return i, -1
 298             }
 299             return i - 1, i + 1 + j + 1
 300         }
 301 
 302         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 303             j := indexPair(s[i+1:], '\x1b', '\\')
 304             if j < 0 {
 305                 return i, -1
 306             }
 307             return i - 1, i + 1 + j + 2
 308         }
 309 
 310         prev = b
 311     }
 312 
 313     return -1, -1
 314 }
 315 
 316 func indexLetter(s []byte) int {
 317     for i, b := range s {
 318         upper := b &^ 32
 319         if 'A' <= upper && upper <= 'Z' {
 320             return i
 321         }
 322     }
 323 
 324     return -1
 325 }
 326 
 327 func indexPair(s []byte, x byte, y byte) int {
 328     var prev byte
 329 
 330     for i, b := range s {
 331         if prev == x && b == y {
 332             return i
 333         }
 334         prev = b
 335     }
 336 
 337     return -1
 338 }
 339 
 340 func handleLine(w *bufio.Writer, s []byte, with []patternStylePair) {
 341     for len(s) > 0 {
 342         i, j := indexEscapeSequence(s)
 343         if i < 0 {
 344             handleLineChunk(w, s, with)
 345             return
 346         }
 347 
 348         handleLineChunk(w, s[:i], with)
 349         w.Write(s[i:j])
 350 
 351         if j < 0 {
 352             break
 353         }
 354         s = s[j:]
 355     }
 356 }
 357 
 358 func handleLineChunk(w *bufio.Writer, s []byte, with []patternStylePair) {
 359     start := -1
 360     end := -1
 361     which := -1
 362 
 363     for len(s) > 0 {
 364         start = -1
 365         for i, pair := range with {
 366             span := pair.expr.FindIndex(s)
 367             // also ignore empty regex matches to avoid infinite outer loops,
 368             // as skipping empty slices isn't advancing at all, leaving the
 369             // string stuck to being empty-matched forever by the same regex
 370             if span == nil || span[0] == span[1] {
 371                 continue
 372             }
 373 
 374             if span[0] < start || start < 0 {
 375                 start = span[0]
 376                 end = span[1]
 377                 which = i
 378             }
 379         }
 380 
 381         if start < 0 {
 382             w.Write(s)
 383             return
 384         }
 385 
 386         w.Write(s[:start])
 387         w.WriteString(with[which].style)
 388         w.Write(s[start:end])
 389         w.WriteString("\x1b[0m")
 390         s = s[end:]
 391     }
 392 }