File: ncol.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath ncol.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "errors"
  37     "io"
  38     "os"
  39     "strconv"
  40     "strings"
  41     "unicode/utf8"
  42 )
  43 
  44 const info = `
  45 ncol [options...] [filenames...]
  46 
  47 Nice COLumns realigns and styles data tables using ANSI color sequences. In
  48 particular, all auto-detected numbers are styled so they're easier to read
  49 at a glance. Input tables can be either lines of space-separated values or
  50 tab-separated values, and are auto-detected using the first non-empty line.
  51 
  52 When not given filepaths to read data from, this tool reads from standard
  53 input by default.
  54 
  55 The only option is the help option, using any of "-h", "--h", "-help", or
  56 "--help".
  57 `
  58 
  59 // altDigitStyle is used to make 4+ digit-runs easier to read
  60 const altDigitStyle = "\x1b[38;2;168;168;168m"
  61 
  62 func main() {
  63     if len(os.Args) > 1 {
  64         switch os.Args[1] {
  65         case `-h`, `--h`, `-help`, `--help`:
  66             os.Stderr.WriteString(info[1:])
  67             return
  68         }
  69     }
  70 
  71     args := os.Args[1:]
  72     if len(args) > 0 && args[0] == `--` {
  73         args = args[1:]
  74     }
  75 
  76     if err := run(args); err != nil {
  77         os.Stderr.WriteString(err.Error())
  78         os.Stderr.WriteString("\n")
  79         os.Exit(1)
  80     }
  81 }
  82 
  83 func run(paths []string) error {
  84     var res table
  85 
  86     for _, p := range paths {
  87         if err := handleFile(&res, p); err != nil {
  88             return err
  89         }
  90     }
  91 
  92     if len(paths) == 0 {
  93         if err := handleReader(&res, os.Stdin); err != nil {
  94             return err
  95         }
  96     }
  97 
  98     bw := bufio.NewWriter(os.Stdout)
  99     defer bw.Flush()
 100     realign(bw, res)
 101     return nil
 102 }
 103 
 104 func handleFile(res *table, path string) error {
 105     f, err := os.Open(path)
 106     if err != nil {
 107         // on windows, file-not-found error messages may mention `CreateFile`,
 108         // even when trying to open files in read-only mode
 109         return errors.New(`can't open file named ` + path)
 110     }
 111     defer f.Close()
 112     return handleReader(res, f)
 113 }
 114 
 115 func handleReader(res *table, r io.Reader) error {
 116     const gb = 1024 * 1024 * 1024
 117     sc := bufio.NewScanner(r)
 118     sc.Buffer(nil, 8*gb)
 119 
 120     for sc.Scan() {
 121         res.update(sc.Text())
 122     }
 123     return sc.Err()
 124 }
 125 
 126 // loopItemsSSV loops over a line's items, allocation-free style; when given
 127 // empty strings, the callback func is never called
 128 func loopItemsSSV(s string, max int, f func(i int, s string)) {
 129     for len(s) > 0 && s[len(s)-1] == ' ' {
 130         s = s[:len(s)-1]
 131     }
 132 
 133     for i := 0; true; i++ {
 134         for len(s) > 0 && s[0] == ' ' {
 135             s = s[1:]
 136         }
 137 
 138         if len(s) == 0 {
 139             return
 140         }
 141 
 142         if i+1 == max {
 143             f(i, s)
 144             return
 145         }
 146 
 147         if j := strings.IndexByte(s, ' '); j >= 0 {
 148             f(i, s[:j])
 149             s = s[j+1:]
 150             continue
 151         }
 152 
 153         f(i, s)
 154         return
 155     }
 156 }
 157 
 158 // loopItemsTSV loops over a line's tab-separated items, allocation-free style;
 159 // when given empty strings, the callback func is never called
 160 func loopItemsTSV(s string, max int, f func(i int, s string)) {
 161     if len(s) == 0 {
 162         return
 163     }
 164 
 165     for i := 0; true; i++ {
 166         if i+1 == max {
 167             f(i, s)
 168             return
 169         }
 170 
 171         if j := strings.IndexByte(s, '\t'); j >= 0 {
 172             f(i, s[:j])
 173             s = s[j+1:]
 174             continue
 175         }
 176 
 177         f(i, s)
 178         return
 179     }
 180 }
 181 
 182 func skipSingleLeadingANSI(s string) string {
 183     if len(s) < 3 || s[0] != '\x1b' || s[1] != '[' {
 184         return s
 185     }
 186 
 187     s = s[2:]
 188 
 189     for len(s) > 0 {
 190         b := s[0]
 191         s = s[1:]
 192         if ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z') {
 193             break
 194         }
 195     }
 196 
 197     return s
 198 }
 199 
 200 func skipLeadingANSI(s string) string {
 201     prev := len(s)
 202 
 203     for len(s) > 0 {
 204         s = skipSingleLeadingANSI(s)
 205         if len(s) == prev {
 206             return s
 207         }
 208         prev = len(s)
 209     }
 210 
 211     return s
 212 }
 213 
 214 // isNumeric checks if a string is a valid/useable float64, which excludes
 215 // NaNs and the infinities
 216 // func isNumeric(s string) bool {
 217 //  f, err := strconv.ParseFloat(s, 64)
 218 //  return err == nil && !math.IsNaN(f) && !math.IsInf(f, 0)
 219 // }
 220 
 221 func isDigits(s string, sb *strings.Builder) bool {
 222     if len(s) == 0 {
 223         return false
 224     }
 225 
 226     digits := 0
 227 
 228     for {
 229         s = skipLeadingANSI(s)
 230         if len(s) == 0 {
 231             break
 232         }
 233 
 234         if b := s[0]; '0' <= b && b <= '9' {
 235             sb.WriteByte(b)
 236             s = s[1:]
 237             digits++
 238         } else {
 239             return false
 240         }
 241     }
 242 
 243     s = skipLeadingANSI(s)
 244     return len(s) == 0 && digits > 0
 245 }
 246 
 247 // isNumeric checks if a string is valid/useable as a number
 248 func isNumeric(s string, sb *strings.Builder) bool {
 249     if len(s) == 0 {
 250         return false
 251     }
 252 
 253     sb.Reset()
 254 
 255     s = skipLeadingANSI(s)
 256     if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
 257         sb.WriteByte(s[0])
 258         s = s[1:]
 259     }
 260 
 261     s = skipLeadingANSI(s)
 262     if len(s) == 0 {
 263         return false
 264     }
 265     if b := s[0]; b == '.' {
 266         sb.WriteByte(b)
 267         return isDigits(s[1:], sb)
 268     }
 269 
 270     digits := 0
 271 
 272     for {
 273         s = skipLeadingANSI(s)
 274         if len(s) == 0 {
 275             break
 276         }
 277 
 278         b := s[0]
 279         sb.WriteByte(b)
 280 
 281         if b == '.' {
 282             return isDigits(s[1:], sb)
 283         }
 284 
 285         if !('0' <= b && b <= '9') {
 286             return false
 287         }
 288 
 289         digits++
 290         s = s[1:]
 291     }
 292 
 293     s = skipLeadingANSI(s)
 294     return len(s) == 0 && digits > 0
 295 }
 296 
 297 // // countDecimals counts decimal digits from the string given, assuming it
 298 // // represents a valid/useable float64, when parsed
 299 // func countDecimals(s string) int {
 300 //  if dot := strings.IndexByte(s, '.'); dot >= 0 {
 301 //      return len(s) - dot - 1
 302 //  }
 303 //  return 0
 304 // }
 305 
 306 // countDecimals counts decimal digits from the string given, assuming it
 307 // represents a valid/useable float64, when parsed
 308 func countDecimals(s string) int {
 309     dot := strings.IndexByte(s, '.')
 310     if dot < 0 {
 311         return 0
 312     }
 313 
 314     decs := 0
 315     s = s[dot+1:]
 316 
 317     for len(s) > 0 {
 318         s = skipLeadingANSI(s)
 319         if len(s) == 0 {
 320             break
 321         }
 322         if '0' <= s[0] && s[0] <= '9' {
 323             decs++
 324         }
 325         s = s[1:]
 326     }
 327 
 328     return decs
 329 }
 330 
 331 // countDotDecimals is like func countDecimals, but this one also includes
 332 // the dot, when any decimals are present, else the count stays at 0
 333 func countDotDecimals(s string) int {
 334     decs := countDecimals(s)
 335     if decs > 0 {
 336         return decs + 1
 337     }
 338     return decs
 339 }
 340 
 341 // func countWidth(s string) int{
 342 //  return utf8.RuneCountInString(s)
 343 // }
 344 
 345 func countWidth(s string) int {
 346     c := 0
 347     for len(s) > 0 {
 348         i := strings.Index(s, "\x1b[")
 349         if i < 0 {
 350             c += utf8.RuneCountInString(s)
 351             return c
 352         }
 353 
 354         c += utf8.RuneCountInString(s[:i])
 355         s = s[i+2:]
 356 
 357         for len(s) > 0 {
 358             b := s[0]
 359             s = s[1:]
 360             if ('A' <= b && b <= 'Z') || ('a' <= b && b <= 'z') {
 361                 break
 362             }
 363         }
 364     }
 365 
 366     return c
 367 }
 368 
 369 // table has all summary info gathered from the data, along with the row
 370 // themselves, stored as lines/strings
 371 type table struct {
 372     Columns int
 373 
 374     Rows []string
 375 
 376     MaxWidth []int
 377 
 378     MaxDotDecimals []int
 379 
 380     Numeric []int
 381 
 382     Sums []float64
 383 
 384     LoopItems func(line string, items int, f func(i int, s string))
 385 }
 386 
 387 func (t *table) update(line string) {
 388     if len(line) == 0 {
 389         return
 390     }
 391 
 392     t.Rows = append(t.Rows, line)
 393 
 394     if t.LoopItems == nil {
 395         if strings.ContainsRune(line, '\t') {
 396             t.LoopItems = loopItemsTSV
 397         } else {
 398             t.LoopItems = loopItemsSSV
 399         }
 400         t.LoopItems(line, t.Columns, func(i int, s string) {
 401             t.Columns++
 402         })
 403     }
 404 
 405     var sb strings.Builder
 406 
 407     t.LoopItems(line, t.Columns, func(i int, s string) {
 408         // ensure column-info-slices have enough room
 409         if i >= len(t.MaxWidth) {
 410             t.MaxWidth = append(t.MaxWidth, 0)
 411             t.MaxDotDecimals = append(t.MaxDotDecimals, 0)
 412             t.Numeric = append(t.Numeric, 0)
 413             t.Sums = append(t.Sums, 0)
 414         }
 415 
 416         // keep track of widest rune-counts for each column
 417         w := countWidth(s)
 418         if t.MaxWidth[i] < w {
 419             t.MaxWidth[i] = w
 420         }
 421 
 422         // update stats for numeric items
 423         if isNumeric(s, &sb) {
 424             dd := countDotDecimals(s)
 425             if t.MaxDotDecimals[i] < dd {
 426                 t.MaxDotDecimals[i] = dd
 427             }
 428 
 429             t.Numeric[i]++
 430             f, _ := strconv.ParseFloat(sb.String(), 64)
 431             t.Sums[i] += f
 432         }
 433     })
 434 }
 435 
 436 func realign(w *bufio.Writer, t table) {
 437     var sb strings.Builder
 438     sums := make([]string, 0, t.Columns)
 439 
 440     for i := 0; i < t.Columns; i++ {
 441         s := `-`
 442         width := 1
 443 
 444         if t.Numeric[i] > 0 {
 445             decs := t.MaxDotDecimals[i]
 446             if decs > 0 {
 447                 decs--
 448             }
 449 
 450             var buf [64]byte
 451             s = string(strconv.AppendFloat(buf[:0], t.Sums[i], 'f', decs, 64))
 452             width = len(s)
 453         }
 454 
 455         if t.MaxWidth[i] < width {
 456             t.MaxWidth[i] = width
 457         }
 458         sums = append(sums, s)
 459     }
 460 
 461     for _, line := range t.Rows {
 462         due := 0
 463 
 464         writeRowTiles(w, line, t, &sb)
 465 
 466         t.LoopItems(line, t.Columns, func(i int, s string) {
 467             due += 2
 468 
 469             if isNumeric(s, &sb) {
 470                 dd := countDotDecimals(s)
 471                 rpad := t.MaxDotDecimals[i] - dd
 472                 width := countWidth(s)
 473                 lpad := t.MaxWidth[i] - (width + rpad) + due
 474                 writeSpaces(w, lpad)
 475                 f, _ := strconv.ParseFloat(sb.String(), 64)
 476                 writeNumericItem(w, s, numericStyle(f))
 477                 due = rpad
 478                 return
 479             }
 480 
 481             writeSpaces(w, due)
 482             w.WriteString(s)
 483             due = t.MaxWidth[i] - countWidth(s)
 484         })
 485 
 486         if w.WriteByte('\n') != nil {
 487             return
 488         }
 489     }
 490 
 491     if t.Columns == 0 {
 492         return
 493     }
 494 
 495     due := 0
 496     writeSpaces(w, t.Columns)
 497 
 498     for i, s := range sums {
 499         due += 2
 500 
 501         if t.Numeric[i] == 0 {
 502             writeSpaces(w, due)
 503             w.WriteString(s)
 504             due = t.MaxWidth[i] - countWidth(s)
 505             continue
 506         }
 507 
 508         lpad := t.MaxWidth[i] - len(s) + due
 509         writeSpaces(w, lpad)
 510         writeNumericItem(w, s, numericStyle(t.Sums[i]))
 511         due = 0
 512     }
 513 
 514     w.WriteByte('\n')
 515 }
 516 
 517 // writeSpaces does what it says, minimizing calls to write-like funcs
 518 func writeSpaces(w *bufio.Writer, n int) {
 519     const spaces = `                                `
 520     if n < 1 {
 521         return
 522     }
 523 
 524     for n >= len(spaces) {
 525         w.WriteString(spaces)
 526         n -= len(spaces)
 527     }
 528     w.WriteString(spaces[:n])
 529 }
 530 
 531 func writeRowTiles(w *bufio.Writer, row string, t table, sb *strings.Builder) {
 532     end := 0
 533     t.LoopItems(row, t.Columns, func(i int, s string) {
 534         writeTile(w, s, sb)
 535         end = i
 536     })
 537 
 538     if end < len(t.MaxWidth)-1 {
 539         w.WriteString("\x1b[0m")
 540     }
 541     for i := end + 1; i < len(t.MaxWidth); i++ {
 542         w.WriteString("×")
 543     }
 544     w.WriteString("\x1b[0m")
 545 }
 546 
 547 func writeTile(w *bufio.Writer, s string, sb *strings.Builder) {
 548     if len(s) == 0 {
 549         w.WriteString("\x1b[0m○")
 550         return
 551     }
 552 
 553     if isNumeric(s, sb) {
 554         f, _ := strconv.ParseFloat(sb.String(), 64)
 555         w.WriteString(numericStyle(f))
 556         w.WriteString("")
 557         return
 558     }
 559 
 560     if s[0] == ' ' || s[len(s)-1] == ' ' {
 561         w.WriteString("\x1b[38;2;196;160;0m■")
 562         return
 563     }
 564 
 565     w.WriteString("\x1b[38;2;128;128;128m■")
 566 }
 567 
 568 func numericStyle(f float64) string {
 569     if f > 0 {
 570         if float64(int64(f)) == f {
 571             return "\x1b[38;2;0;135;0m"
 572         }
 573         return "\x1b[38;2;0;155;95m"
 574     }
 575     if f < 0 {
 576         if float64(int64(f)) == f {
 577             return "\x1b[38;2;204;0;0m"
 578         }
 579         return "\x1b[38;2;215;95;95m"
 580     }
 581     if f == 0 {
 582         return "\x1b[38;2;0;95;215m"
 583     }
 584     return "\x1b[38;2;128;128;128m"
 585 }
 586 
 587 func writeNumericItem(w *bufio.Writer, s string, startStyle string) {
 588     w.WriteString(startStyle)
 589     if len(s) > 0 && (s[0] == '-' || s[0] == '+') {
 590         w.WriteByte(s[0])
 591         s = s[1:]
 592     }
 593 
 594     dot := strings.IndexByte(s, '.')
 595     if dot < 0 {
 596         restyleDigits(w, s, altDigitStyle)
 597         w.WriteString("\x1b[0m")
 598         return
 599     }
 600 
 601     if len(s[:dot]) > 3 {
 602         restyleDigits(w, s[:dot], altDigitStyle)
 603         w.WriteString("\x1b[0m")
 604         w.WriteString(startStyle)
 605         w.WriteByte('.')
 606     } else {
 607         w.WriteString(s[:dot])
 608         w.WriteByte('.')
 609     }
 610 
 611     rest := s[dot+1:]
 612     restyleDigits(w, rest, altDigitStyle)
 613     if len(rest) < 4 {
 614         w.WriteString("\x1b[0m")
 615     }
 616 }
 617 
 618 // restyleDigits renders a run of digits as alternating styled/unstyled runs
 619 // of 3 digits, which greatly improves readability, and is the only purpose
 620 // of this app; string is assumed to be all decimal digits
 621 func restyleDigits(w *bufio.Writer, digits string, altStyle string) {
 622     if len(digits) < 4 {
 623         // digit sequence is short, so emit it as is
 624         w.WriteString(digits)
 625         return
 626     }
 627 
 628     // separate leading 0..2 digits which don't align with the 3-digit groups
 629     i := len(digits) % 3
 630     // emit leading digits unstyled, if there are any
 631     w.WriteString(digits[:i])
 632     // the rest is guaranteed to have a length which is a multiple of 3
 633     digits = digits[i:]
 634 
 635     // start by styling, unless there were no leading digits
 636     style := i != 0
 637 
 638     for len(digits) > 0 {
 639         if style {
 640             w.WriteString(altStyle)
 641             w.WriteString(digits[:3])
 642             w.WriteString("\x1b[0m")
 643         } else {
 644             w.WriteString(digits[:3])
 645         }
 646 
 647         // advance to the next triple: the start of this func is supposed
 648         // to guarantee this step always works
 649         digits = digits[3:]
 650 
 651         // alternate between styled and unstyled 3-digit groups
 652         style = !style
 653     }
 654 }