File: ncol.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath ncol.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "errors"
  37     "io"
  38     "os"
  39     "strconv"
  40     "strings"
  41     "unicode/utf8"
  42 )
  43 
  44 const info = `
  45 ncol [options...] [filenames...]
  46 
  47 Nice COLumns realigns and styles data tables using ANSI color sequences. In
  48 particular, all auto-detected numbers are styled so they're easier to read
  49 at a glance. Input tables can be either lines of space-separated values or
  50 tab-separated values, and are auto-detected using the first non-empty line.
  51 
  52 When not given filepaths to read data from, this tool reads from standard
  53 input by default.
  54 
  55 The options are, available both in single and double-dash versions
  56 
  57     -h      show this help message
  58     -help   show this help message
  59 
  60     -no-sums      avoid showing a final row with column sums
  61     -unsummed     avoid showing a final row with column sums
  62 
  63     -no-tiles     avoid showing color-coded tiles at the start of lines
  64     -untiled      avoid showing color-coded tiles at the start of lines
  65 `
  66 
  67 const columnGap = 2
  68 
  69 // altDigitStyle is used to make 4+ digit-runs easier to read
  70 const altDigitStyle = "\x1b[38;2;168;168;168m"
  71 
  72 func main() {
  73     sums := true
  74     tiles := true
  75     args := os.Args[1:]
  76 
  77     if len(args) > 0 {
  78         switch args[0] {
  79         case `-h`, `--h`, `-help`, `--help`:
  80             os.Stderr.WriteString(info[1:])
  81             return
  82         }
  83     }
  84 
  85     for len(args) > 0 {
  86         switch args[0] {
  87         case
  88             `-no-sums`, `--no-sums`, `-no-totals`, `--no-totals`,
  89             `-unsummed`, `--unsummed`, `-untotaled`, `--untotaled`,
  90             `-untotalled`, `--untotalled`:
  91             sums = false
  92             args = args[1:]
  93             continue
  94 
  95         case `-no-tiles`, `--no-tiles`, `-untiled`, `--untiled`:
  96             tiles = false
  97             args = args[1:]
  98             continue
  99 
 100         case `--`:
 101             args = args[1:]
 102         }
 103 
 104         break
 105     }
 106 
 107     var res table
 108     res.ShowTiles = tiles
 109     res.ShowSums = sums
 110 
 111     if err := run(args, &res); err != nil {
 112         os.Stderr.WriteString(err.Error())
 113         os.Stderr.WriteString("\n")
 114         os.Exit(1)
 115     }
 116 }
 117 
 118 // table has all summary info gathered from the data, along with the row
 119 // themselves, stored as lines/strings
 120 type table struct {
 121     Columns int
 122 
 123     Rows []string
 124 
 125     MaxWidth []int
 126 
 127     MaxDotDecimals []int
 128 
 129     Numeric []int
 130 
 131     Sums []float64
 132 
 133     LoopItems func(line string, items int, t *table, f itemFunc) int
 134 
 135     sb strings.Builder
 136 
 137     ShowTiles bool
 138 
 139     ShowSums bool
 140 }
 141 
 142 type itemFunc func(i int, s string, t *table)
 143 
 144 func run(paths []string, res *table) error {
 145     for _, p := range paths {
 146         if err := handleFile(res, p); err != nil {
 147             return err
 148         }
 149     }
 150 
 151     if len(paths) == 0 {
 152         if err := handleReader(res, os.Stdin); err != nil {
 153             return err
 154         }
 155     }
 156 
 157     bw := bufio.NewWriterSize(os.Stdout, 32*1024)
 158     defer bw.Flush()
 159     realign(bw, res)
 160     return nil
 161 }
 162 
 163 func handleFile(res *table, path string) error {
 164     f, err := os.Open(path)
 165     if err != nil {
 166         // on windows, file-not-found error messages may mention `CreateFile`,
 167         // even when trying to open files in read-only mode
 168         return errors.New(`can't open file named ` + path)
 169     }
 170     defer f.Close()
 171     return handleReader(res, f)
 172 }
 173 
 174 func handleReader(t *table, r io.Reader) error {
 175     const gb = 1024 * 1024 * 1024
 176     sc := bufio.NewScanner(r)
 177     sc.Buffer(nil, 8*gb)
 178 
 179     for sc.Scan() {
 180         line := sc.Text()
 181         if len(line) == 0 {
 182             continue
 183         }
 184 
 185         t.Rows = append(t.Rows, line)
 186 
 187         if t.Columns == 0 {
 188             if t.LoopItems == nil {
 189                 if strings.IndexByte(line, '\t') >= 0 {
 190                     t.LoopItems = loopItemsTSV
 191                 } else {
 192                     t.LoopItems = loopItemsSSV
 193                 }
 194             }
 195 
 196             const maxInt = int(^uint(0) >> 1)
 197             t.Columns = t.LoopItems(line, maxInt, t, doNothing)
 198         }
 199 
 200         t.LoopItems(line, t.Columns, t, updateItem)
 201     }
 202 
 203     return sc.Err()
 204 }
 205 
 206 // doNothing is given to LoopItems to count items, while doing nothing else
 207 func doNothing(i int, s string, t *table) {
 208 }
 209 
 210 func updateItem(i int, s string, t *table) {
 211     // ensure column-info-slices have enough room
 212     if i >= len(t.MaxWidth) {
 213         t.MaxWidth = append(t.MaxWidth, 0)
 214         t.MaxDotDecimals = append(t.MaxDotDecimals, 0)
 215         t.Numeric = append(t.Numeric, 0)
 216         t.Sums = append(t.Sums, 0)
 217     }
 218 
 219     // keep track of widest rune-counts for each column
 220     w := countWidth(s)
 221     if t.MaxWidth[i] < w {
 222         t.MaxWidth[i] = w
 223     }
 224 
 225     // update stats for numeric items
 226     if isNumeric(s, &(t.sb)) {
 227         dd := countDotDecimals(s)
 228         if t.MaxDotDecimals[i] < dd {
 229             t.MaxDotDecimals[i] = dd
 230         }
 231 
 232         t.Numeric[i]++
 233         f, _ := strconv.ParseFloat(t.sb.String(), 64)
 234         t.Sums[i] += f
 235     }
 236 }
 237 
 238 // loopItemsSSV loops over a line's items, allocation-free style; when given
 239 // empty strings, the callback func is never called
 240 func loopItemsSSV(s string, max int, t *table, f itemFunc) int {
 241     i := 0
 242     s = trimTrailingSpaces(s)
 243 
 244     for {
 245         s = trimLeadingSpaces(s)
 246         if len(s) == 0 {
 247             return i
 248         }
 249 
 250         if i+1 == max {
 251             f(i, s, t)
 252             return i + 1
 253         }
 254 
 255         j := strings.IndexByte(s, ' ')
 256         if j < 0 {
 257             f(i, s, t)
 258             return i + 1
 259         }
 260 
 261         f(i, s[:j], t)
 262         s = s[j+1:]
 263         i++
 264     }
 265 
 266     return i + 1
 267 }
 268 
 269 func trimLeadingSpaces(s string) string {
 270     for len(s) > 0 && s[0] == ' ' {
 271         s = s[1:]
 272     }
 273     return s
 274 }
 275 
 276 func trimTrailingSpaces(s string) string {
 277     for len(s) > 0 && s[len(s)-1] == ' ' {
 278         s = s[:len(s)-1]
 279     }
 280     return s
 281 }
 282 
 283 // loopItemsTSV loops over a line's tab-separated items, allocation-free style;
 284 // when given empty strings, the callback func is never called
 285 func loopItemsTSV(s string, max int, t *table, f itemFunc) int {
 286     if len(s) == 0 {
 287         return 0
 288     }
 289 
 290     i := 0
 291 
 292     for {
 293         if i+1 == max {
 294             f(i, s, t)
 295             return i + 1
 296         }
 297 
 298         j := strings.IndexByte(s, '\t')
 299         if j < 0 {
 300             f(i, s, t)
 301             return i + 1
 302         }
 303 
 304         f(i, s[:j], t)
 305         s = s[j+1:]
 306         i++
 307     }
 308 }
 309 
 310 func skipLeadingEscapeSequences(s string) string {
 311     for len(s) >= 2 {
 312         if s[0] != '\x1b' {
 313             return s
 314         }
 315 
 316         switch s[1] {
 317         case '[':
 318             s = skipSingleLeadingANSI(s[2:])
 319 
 320         case ']':
 321             if len(s) < 3 || s[2] != '8' {
 322                 return s
 323             }
 324             s = skipSingleLeadingOSC(s[3:])
 325 
 326         default:
 327             return s
 328         }
 329     }
 330 
 331     return s
 332 }
 333 
 334 func skipSingleLeadingANSI(s string) string {
 335     for len(s) > 0 {
 336         upper := s[0] &^ 32
 337         s = s[1:]
 338         if 'A' <= upper && upper <= 'Z' {
 339             break
 340         }
 341     }
 342 
 343     return s
 344 }
 345 
 346 func skipSingleLeadingOSC(s string) string {
 347     var prev byte
 348 
 349     for len(s) > 0 {
 350         b := s[0]
 351         s = s[1:]
 352         if prev == '\x1b' && b == '\\' {
 353             break
 354         }
 355         prev = b
 356     }
 357 
 358     return s
 359 }
 360 
 361 // isNumeric checks if a string is valid/useable as a number
 362 func isNumeric(s string, sb *strings.Builder) bool {
 363     if len(s) == 0 {
 364         return false
 365     }
 366 
 367     sb.Reset()
 368 
 369     s = skipLeadingEscapeSequences(s)
 370     if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
 371         sb.WriteByte(s[0])
 372         s = s[1:]
 373     }
 374 
 375     s = skipLeadingEscapeSequences(s)
 376     if len(s) == 0 {
 377         return false
 378     }
 379     if b := s[0]; b == '.' {
 380         sb.WriteByte(b)
 381         return isDigits(s[1:], sb)
 382     }
 383 
 384     digits := 0
 385 
 386     for {
 387         s = skipLeadingEscapeSequences(s)
 388         if len(s) == 0 {
 389             break
 390         }
 391 
 392         b := s[0]
 393         sb.WriteByte(b)
 394 
 395         if b == '.' {
 396             return isDigits(s[1:], sb)
 397         }
 398 
 399         if !('0' <= b && b <= '9') {
 400             return false
 401         }
 402 
 403         digits++
 404         s = s[1:]
 405     }
 406 
 407     s = skipLeadingEscapeSequences(s)
 408     return len(s) == 0 && digits > 0
 409 }
 410 
 411 func isDigits(s string, sb *strings.Builder) bool {
 412     if len(s) == 0 {
 413         return false
 414     }
 415 
 416     digits := 0
 417 
 418     for {
 419         s = skipLeadingEscapeSequences(s)
 420         if len(s) == 0 {
 421             break
 422         }
 423 
 424         if b := s[0]; '0' <= b && b <= '9' {
 425             sb.WriteByte(b)
 426             s = s[1:]
 427             digits++
 428         } else {
 429             return false
 430         }
 431     }
 432 
 433     s = skipLeadingEscapeSequences(s)
 434     return len(s) == 0 && digits > 0
 435 }
 436 
 437 // countDecimals counts decimal digits from the string given, assuming it
 438 // represents a valid/useable float64, when parsed
 439 func countDecimals(s string) int {
 440     dot := strings.IndexByte(s, '.')
 441     if dot < 0 {
 442         return 0
 443     }
 444 
 445     decs := 0
 446     s = s[dot+1:]
 447 
 448     for len(s) > 0 {
 449         s = skipLeadingEscapeSequences(s)
 450         if len(s) == 0 {
 451             break
 452         }
 453         if '0' <= s[0] && s[0] <= '9' {
 454             decs++
 455         }
 456         s = s[1:]
 457     }
 458 
 459     return decs
 460 }
 461 
 462 // countDotDecimals is like func countDecimals, but this one also includes
 463 // the dot, when any decimals are present, else the count stays at 0
 464 func countDotDecimals(s string) int {
 465     decs := countDecimals(s)
 466     if decs > 0 {
 467         return decs + 1
 468     }
 469     return decs
 470 }
 471 
 472 func countWidth(s string) int {
 473     width := 0
 474 
 475     for len(s) > 0 {
 476         i := indexStartANSI(s)
 477         if i < 0 {
 478             width += utf8.RuneCountInString(s)
 479             return width
 480         }
 481 
 482         width += utf8.RuneCountInString(s[:i])
 483 
 484         for len(s) > 0 {
 485             upper := s[0] &^ 32
 486             s = s[1:]
 487             if 'A' <= upper && upper <= 'Z' {
 488                 break
 489             }
 490         }
 491     }
 492 
 493     return width
 494 }
 495 
 496 func indexStartANSI(s string) int {
 497     var prev byte
 498 
 499     for i := range s {
 500         b := s[i]
 501         if prev == '\x1b' && b == '[' {
 502             return i - 1
 503         }
 504         prev = b
 505     }
 506 
 507     return -1
 508 }
 509 
 510 func realign(w *bufio.Writer, t *table) {
 511     // make sums row first, as final alignments are usually affected by these
 512     var sums []string
 513     if t.ShowSums {
 514         sums = make([]string, 0, t.Columns)
 515     }
 516 
 517     for i := 0; i < t.Columns && t.ShowSums; i++ {
 518         s := `-`
 519         width := 1
 520 
 521         if t.Numeric[i] > 0 {
 522             decs := t.MaxDotDecimals[i]
 523             if decs > 0 {
 524                 decs--
 525             }
 526 
 527             var buf [64]byte
 528             s = string(strconv.AppendFloat(buf[:0], t.Sums[i], 'f', decs, 64))
 529             width = len(s)
 530         }
 531 
 532         if t.MaxWidth[i] < width {
 533             t.MaxWidth[i] = width
 534         }
 535         sums = append(sums, s)
 536     }
 537 
 538     due := 0
 539     showItem := func(i int, s string, t *table) {
 540         if i > 0 {
 541             due += columnGap
 542         }
 543 
 544         if isNumeric(s, &(t.sb)) {
 545             dd := countDotDecimals(s)
 546             rpad := t.MaxDotDecimals[i] - dd
 547             width := countWidth(s)
 548             lpad := t.MaxWidth[i] - (width + rpad) + due
 549             writeSpaces(w, lpad)
 550             f, _ := strconv.ParseFloat(t.sb.String(), 64)
 551             writeNumericItem(w, s, numericStyle(f))
 552             due = rpad
 553             return
 554         }
 555 
 556         writeSpaces(w, due)
 557         w.WriteString(s)
 558         due = t.MaxWidth[i] - countWidth(s)
 559     }
 560 
 561     writeTile := func(i int, s string, t *table) {
 562         if len(s) == 0 {
 563             w.WriteString("\x1b[0m○")
 564             return
 565         }
 566 
 567         if isNumeric(s, &(t.sb)) {
 568             f, _ := strconv.ParseFloat(t.sb.String(), 64)
 569             w.WriteString(numericStyle(f))
 570             w.WriteString("")
 571             return
 572         }
 573 
 574         if s[0] == ' ' || s[len(s)-1] == ' ' {
 575             w.WriteString("\x1b[38;2;196;160;0m■")
 576             return
 577         }
 578 
 579         w.WriteString("\x1b[38;2;128;128;128m■")
 580     }
 581 
 582     // show realigned rows
 583 
 584     for _, line := range t.Rows {
 585         due = 0
 586         if t.ShowTiles {
 587             end := t.LoopItems(line, t.Columns, t, writeTile)
 588             if end < len(t.MaxWidth)-1 {
 589                 w.WriteString("\x1b[0m")
 590             }
 591             for i := end + 1; i < len(t.MaxWidth); i++ {
 592                 w.WriteString("×")
 593             }
 594             w.WriteString("\x1b[0m")
 595             due += columnGap
 596         }
 597 
 598         t.LoopItems(line, t.Columns, t, showItem)
 599         if w.WriteByte('\n') != nil {
 600             return
 601         }
 602     }
 603 
 604     if t.Columns > 0 && t.ShowSums {
 605         realignSums(w, t, sums)
 606     }
 607 }
 608 
 609 func realignSums(w *bufio.Writer, t *table, sums []string) {
 610     due := 0
 611     if t.ShowTiles {
 612         due += t.Columns + columnGap
 613     }
 614 
 615     for i, s := range sums {
 616         if i > 0 {
 617             due += columnGap
 618         }
 619 
 620         if t.Numeric[i] == 0 {
 621             writeSpaces(w, due)
 622             w.WriteString(s)
 623             due = t.MaxWidth[i] - countWidth(s)
 624             continue
 625         }
 626 
 627         lpad := t.MaxWidth[i] - len(s) + due
 628         writeSpaces(w, lpad)
 629         writeNumericItem(w, s, numericStyle(t.Sums[i]))
 630         due = 0
 631     }
 632 
 633     w.WriteByte('\n')
 634 }
 635 
 636 // writeSpaces does what it says, minimizing calls to write-like funcs
 637 func writeSpaces(w *bufio.Writer, n int) {
 638     const spaces = `                                `
 639     if n < 1 {
 640         return
 641     }
 642 
 643     for n >= len(spaces) {
 644         w.WriteString(spaces)
 645         n -= len(spaces)
 646     }
 647     w.WriteString(spaces[:n])
 648 }
 649 
 650 func writeRowTiles(w *bufio.Writer, s string, t *table, writeTile itemFunc) {
 651     end := t.LoopItems(s, t.Columns, t, writeTile)
 652 
 653     if end < len(t.MaxWidth)-1 {
 654         w.WriteString("\x1b[0m")
 655     }
 656     for i := end + 1; i < len(t.MaxWidth); i++ {
 657         w.WriteString("×")
 658     }
 659     w.WriteString("\x1b[0m")
 660 }
 661 
 662 func numericStyle(f float64) string {
 663     if f > 0 {
 664         if float64(int64(f)) == f {
 665             return "\x1b[38;2;0;135;0m"
 666         }
 667         return "\x1b[38;2;0;155;95m"
 668     }
 669     if f < 0 {
 670         if float64(int64(f)) == f {
 671             return "\x1b[38;2;204;0;0m"
 672         }
 673         return "\x1b[38;2;215;95;95m"
 674     }
 675     if f == 0 {
 676         return "\x1b[38;2;0;95;215m"
 677     }
 678     return "\x1b[38;2;128;128;128m"
 679 }
 680 
 681 func writeNumericItem(w *bufio.Writer, s string, startStyle string) {
 682     w.WriteString(startStyle)
 683     if len(s) > 0 && (s[0] == '-' || s[0] == '+') {
 684         w.WriteByte(s[0])
 685         s = s[1:]
 686     }
 687 
 688     dot := strings.IndexByte(s, '.')
 689     if dot < 0 {
 690         restyleDigits(w, s, altDigitStyle)
 691         w.WriteString("\x1b[0m")
 692         return
 693     }
 694 
 695     if len(s[:dot]) > 3 {
 696         restyleDigits(w, s[:dot], altDigitStyle)
 697         w.WriteString("\x1b[0m")
 698         w.WriteString(startStyle)
 699         w.WriteByte('.')
 700     } else {
 701         w.WriteString(s[:dot])
 702         w.WriteByte('.')
 703     }
 704 
 705     rest := s[dot+1:]
 706     restyleDigits(w, rest, altDigitStyle)
 707     if len(rest) < 4 {
 708         w.WriteString("\x1b[0m")
 709     }
 710 }
 711 
 712 // restyleDigits renders a run of digits as alternating styled/unstyled runs
 713 // of 3 digits, which greatly improves readability, and is the only purpose
 714 // of this app; string is assumed to be all decimal digits
 715 func restyleDigits(w *bufio.Writer, digits string, altStyle string) {
 716     if len(digits) < 4 {
 717         // digit sequence is short, so emit it as is
 718         w.WriteString(digits)
 719         return
 720     }
 721 
 722     // separate leading 0..2 digits which don't align with the 3-digit groups
 723     i := len(digits) % 3
 724     // emit leading digits unstyled, if there are any
 725     w.WriteString(digits[:i])
 726     // the rest is guaranteed to have a length which is a multiple of 3
 727     digits = digits[i:]
 728 
 729     // start by styling, unless there were no leading digits
 730     style := i != 0
 731 
 732     for len(digits) > 0 {
 733         if style {
 734             w.WriteString(altStyle)
 735             w.WriteString(digits[:3])
 736             w.WriteString("\x1b[0m")
 737         } else {
 738             w.WriteString(digits[:3])
 739         }
 740 
 741         // advance to the next triple: the start of this func is supposed
 742         // to guarantee this step always works
 743         digits = digits[3:]
 744 
 745         // alternate between styled and unstyled 3-digit groups
 746         style = !style
 747     }
 748 }