File: realign.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath realign.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "errors"
  37     "io"
  38     "os"
  39     "strings"
  40     "unicode/utf8"
  41 )
  42 
  43 const info = `
  44 realign [options...] [filenames...]
  45 
  46 Realign all detected columns, right-aligning any detected numbers in any
  47 column. ANSI style-codes are also kept as given.
  48 
  49 The options are, available both in single and double-dash versions
  50 
  51     -h, -help          show this help message
  52     -m, max-columns    use the row with the most items for the item-count
  53 `
  54 
  55 func main() {
  56     maxWidth := false
  57     args := os.Args[1:]
  58 
  59     for len(args) > 0 {
  60         if args[0] == `--` {
  61             args = args[1:]
  62             break
  63         }
  64 
  65         switch args[0] {
  66         case `-h`, `--h`, `-help`, `--help`:
  67             os.Stdout.WriteString(info[1:])
  68             return
  69 
  70         case
  71             `-m`, `--m`,
  72             `-maxcols`, `--maxcols`,
  73             `-max-columns`, `--max-columns`:
  74             maxWidth = true
  75             args = args[1:]
  76             continue
  77         }
  78     }
  79 
  80     if err := run(args, maxWidth); err != nil {
  81         os.Stderr.WriteString(err.Error())
  82         os.Stderr.WriteString("\n")
  83         os.Exit(1)
  84         return
  85     }
  86 }
  87 
  88 // table has all summary info gathered from the data, along with the row
  89 // themselves, stored as lines/strings
  90 type table struct {
  91     Columns int
  92 
  93     Rows []string
  94 
  95     MaxWidth []int
  96 
  97     MaxDotDecimals []int
  98 
  99     LoopItems func(s string, max int, t *table, f itemFunc)
 100 
 101     MaxColumns bool
 102 }
 103 
 104 type itemFunc func(i int, s string, t *table)
 105 
 106 func run(paths []string, maxCols bool) error {
 107     var res table
 108     res.MaxColumns = maxCols
 109 
 110     for _, p := range paths {
 111         if err := handleFile(&res, p); err != nil {
 112             return err
 113         }
 114     }
 115 
 116     if len(paths) == 0 {
 117         if err := handleReader(&res, os.Stdin); err != nil {
 118             return err
 119         }
 120     }
 121 
 122     bw := bufio.NewWriterSize(os.Stdout, 32*1024)
 123     defer bw.Flush()
 124     realign(bw, res)
 125     return nil
 126 }
 127 
 128 func handleFile(res *table, path string) error {
 129     f, err := os.Open(path)
 130     if err != nil {
 131         // on windows, file-not-found error messages may mention `CreateFile`,
 132         // even when trying to open files in read-only mode
 133         return errors.New(`can't open file named ` + path)
 134     }
 135     defer f.Close()
 136     return handleReader(res, f)
 137 }
 138 
 139 func handleReader(t *table, r io.Reader) error {
 140     const gb = 1024 * 1024 * 1024
 141     sc := bufio.NewScanner(r)
 142     sc.Buffer(nil, 8*gb)
 143 
 144     const maxInt = int(^uint(0) >> 1)
 145     maxCols := maxInt
 146 
 147     for i := 0; sc.Scan(); i++ {
 148         s := sc.Text()
 149         if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") {
 150             s = s[3:]
 151         }
 152 
 153         if len(s) == 0 {
 154             if len(t.Rows) > 0 {
 155                 t.Rows = append(t.Rows, ``)
 156             }
 157             continue
 158         }
 159 
 160         t.Rows = append(t.Rows, s)
 161 
 162         if t.Columns == 0 {
 163             if t.LoopItems == nil {
 164                 if strings.IndexByte(s, '\t') >= 0 {
 165                     t.LoopItems = loopItemsTSV
 166                 } else {
 167                     t.LoopItems = loopItemsSSV
 168                 }
 169             }
 170 
 171             if !t.MaxColumns {
 172                 t.LoopItems(s, maxCols, t, updateColumnCount)
 173                 maxCols = t.Columns
 174             }
 175         }
 176 
 177         t.LoopItems(s, maxCols, t, updateItem)
 178     }
 179 
 180     return sc.Err()
 181 }
 182 
 183 func updateColumnCount(i int, s string, t *table) {
 184     t.Columns = i + 1
 185 }
 186 
 187 func updateItem(i int, s string, t *table) {
 188     // ensure column-info-slices have enough room
 189     if i >= len(t.MaxWidth) {
 190         if t.MaxColumns {
 191             t.Columns = i + 1
 192         }
 193         t.MaxWidth = append(t.MaxWidth, 0)
 194         t.MaxDotDecimals = append(t.MaxDotDecimals, 0)
 195     }
 196 
 197     // keep track of widest rune-counts for each column
 198     w := countWidth(s)
 199     if t.MaxWidth[i] < w {
 200         t.MaxWidth[i] = w
 201     }
 202 
 203     // update stats for numeric items
 204     if isNumeric(s) {
 205         dd := countDotDecimals(s)
 206         if t.MaxDotDecimals[i] < dd {
 207             t.MaxDotDecimals[i] = dd
 208         }
 209     }
 210 }
 211 
 212 // loopItemsSSV loops over a line's items, allocation-free style; when given
 213 // empty strings, the callback func is never called
 214 func loopItemsSSV(s string, max int, t *table, f itemFunc) {
 215     s = trimTrailingSpaces(s)
 216 
 217     for i := 0; true; i++ {
 218         s = trimLeadingSpaces(s)
 219         if len(s) == 0 {
 220             return
 221         }
 222 
 223         if i+1 == max {
 224             f(i, s, t)
 225             return
 226         }
 227 
 228         j := strings.IndexByte(s, ' ')
 229         if j < 0 {
 230             f(i, s, t)
 231             return
 232         }
 233 
 234         f(i, s[:j], t)
 235         s = s[j+1:]
 236     }
 237 }
 238 
 239 func trimLeadingSpaces(s string) string {
 240     for len(s) > 0 && s[0] == ' ' {
 241         s = s[1:]
 242     }
 243     return s
 244 }
 245 
 246 func trimTrailingSpaces(s string) string {
 247     for len(s) > 0 && s[len(s)-1] == ' ' {
 248         s = s[:len(s)-1]
 249     }
 250     return s
 251 }
 252 
 253 // loopItemsTSV loops over a line's tab-separated items, allocation-free style;
 254 // when given empty strings, the callback func is never called
 255 func loopItemsTSV(s string, max int, t *table, f itemFunc) {
 256     if len(s) == 0 {
 257         return
 258     }
 259 
 260     for i := 0; true; i++ {
 261         if i+1 == max {
 262             f(i, s, t)
 263             return
 264         }
 265 
 266         j := strings.IndexByte(s, '\t')
 267         if j < 0 {
 268             f(i, s, t)
 269             return
 270         }
 271 
 272         f(i, s[:j], t)
 273         s = s[j+1:]
 274     }
 275 }
 276 
 277 func skipLeadingEscapeSequences(s string) string {
 278     for len(s) >= 2 {
 279         if s[0] != '\x1b' {
 280             return s
 281         }
 282 
 283         switch s[1] {
 284         case '[':
 285             s = skipSingleLeadingANSI(s[2:])
 286 
 287         case ']':
 288             if len(s) < 3 || s[2] != '8' {
 289                 return s
 290             }
 291             s = skipSingleLeadingOSC(s[3:])
 292 
 293         default:
 294             return s
 295         }
 296     }
 297 
 298     return s
 299 }
 300 
 301 func skipSingleLeadingANSI(s string) string {
 302     for len(s) > 0 {
 303         upper := s[0] &^ 32
 304         s = s[1:]
 305         if 'A' <= upper && upper <= 'Z' {
 306             break
 307         }
 308     }
 309 
 310     return s
 311 }
 312 
 313 func skipSingleLeadingOSC(s string) string {
 314     var prev byte
 315 
 316     for len(s) > 0 {
 317         b := s[0]
 318         s = s[1:]
 319         if prev == '\x1b' && b == '\\' {
 320             break
 321         }
 322         prev = b
 323     }
 324 
 325     return s
 326 }
 327 
 328 // isNumeric checks if a string is valid/useable as a number
 329 func isNumeric(s string) bool {
 330     if len(s) == 0 {
 331         return false
 332     }
 333 
 334     s = skipLeadingEscapeSequences(s)
 335     if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
 336         s = s[1:]
 337     }
 338 
 339     s = skipLeadingEscapeSequences(s)
 340     if len(s) == 0 {
 341         return false
 342     }
 343     if s[0] == '.' {
 344         return isDigits(s[1:])
 345     }
 346 
 347     digits := 0
 348 
 349     for {
 350         s = skipLeadingEscapeSequences(s)
 351         if len(s) == 0 {
 352             break
 353         }
 354 
 355         if s[0] == '.' {
 356             return isDigits(s[1:])
 357         }
 358 
 359         if !('0' <= s[0] && s[0] <= '9') {
 360             return false
 361         }
 362 
 363         digits++
 364         s = s[1:]
 365     }
 366 
 367     s = skipLeadingEscapeSequences(s)
 368     return len(s) == 0 && digits > 0
 369 }
 370 
 371 func isDigits(s string) bool {
 372     if len(s) == 0 {
 373         return false
 374     }
 375 
 376     digits := 0
 377 
 378     for {
 379         s = skipLeadingEscapeSequences(s)
 380         if len(s) == 0 {
 381             break
 382         }
 383 
 384         if '0' <= s[0] && s[0] <= '9' {
 385             s = s[1:]
 386             digits++
 387         } else {
 388             return false
 389         }
 390     }
 391 
 392     s = skipLeadingEscapeSequences(s)
 393     return len(s) == 0 && digits > 0
 394 }
 395 
 396 // countDecimals counts decimal digits from the string given, assuming it
 397 // represents a valid/useable float64, when parsed
 398 func countDecimals(s string) int {
 399     dot := strings.IndexByte(s, '.')
 400     if dot < 0 {
 401         return 0
 402     }
 403 
 404     decs := 0
 405     s = s[dot+1:]
 406 
 407     for len(s) > 0 {
 408         s = skipLeadingEscapeSequences(s)
 409         if len(s) == 0 {
 410             break
 411         }
 412         if '0' <= s[0] && s[0] <= '9' {
 413             decs++
 414         }
 415         s = s[1:]
 416     }
 417 
 418     return decs
 419 }
 420 
 421 // countDotDecimals is like func countDecimals, but this one also includes
 422 // the dot, when any decimals are present, else the count stays at 0
 423 func countDotDecimals(s string) int {
 424     decs := countDecimals(s)
 425     if decs > 0 {
 426         return decs + 1
 427     }
 428     return decs
 429 }
 430 
 431 func countWidth(s string) int {
 432     width := 0
 433 
 434     for len(s) > 0 {
 435         i, j := indexEscapeSequence(s)
 436         if i < 0 {
 437             break
 438         }
 439         if j < 0 {
 440             j = len(s)
 441         }
 442 
 443         width += utf8.RuneCountInString(s[:i])
 444         s = s[j:]
 445     }
 446 
 447     // count trailing/all runes in strings which don't end with ANSI-sequences
 448     width += utf8.RuneCountInString(s)
 449     return width
 450 }
 451 
 452 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 453 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 454 // indices which can be independently negative when either the start/end of
 455 // a sequence isn't found; given their fairly-common use, even the hyperlink
 456 // ESC]8 sequences are supported
 457 func indexEscapeSequence(s string) (int, int) {
 458     var prev byte
 459 
 460     for i := range s {
 461         b := s[i]
 462 
 463         if prev == '\x1b' && b == '[' {
 464             j := indexLetter(s[i+1:])
 465             if j < 0 {
 466                 return i, -1
 467             }
 468             return i - 1, i + 1 + j + 1
 469         }
 470 
 471         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 472             j := indexPair(s[i+1:], '\x1b', '\\')
 473             if j < 0 {
 474                 return i, -1
 475             }
 476             return i - 1, i + 1 + j + 2
 477         }
 478 
 479         prev = b
 480     }
 481 
 482     return -1, -1
 483 }
 484 
 485 func indexLetter(s string) int {
 486     for i, b := range s {
 487         upper := b &^ 32
 488         if 'A' <= upper && upper <= 'Z' {
 489             return i
 490         }
 491     }
 492 
 493     return -1
 494 }
 495 
 496 func indexPair(s string, x byte, y byte) int {
 497     var prev byte
 498 
 499     for i := range s {
 500         b := s[i]
 501         if prev == x && b == y && i > 0 {
 502             return i
 503         }
 504         prev = b
 505     }
 506 
 507     return -1
 508 }
 509 
 510 func realign(w *bufio.Writer, t table) {
 511     due := 0
 512     showItem := func(i int, s string, t *table) {
 513         if i > 0 {
 514             due += 2
 515         }
 516 
 517         if isNumeric(s) {
 518             dd := countDotDecimals(s)
 519             rpad := t.MaxDotDecimals[i] - dd
 520             width := countWidth(s)
 521             lpad := t.MaxWidth[i] - (width + rpad) + due
 522             writeSpaces(w, lpad)
 523             w.WriteString(s)
 524             due = rpad
 525             return
 526         }
 527 
 528         writeSpaces(w, due)
 529         w.WriteString(s)
 530         due = t.MaxWidth[i] - countWidth(s)
 531     }
 532 
 533     for _, line := range t.Rows {
 534         due = 0
 535         if len(line) > 0 {
 536             t.LoopItems(line, t.Columns, &t, showItem)
 537         }
 538         if w.WriteByte('\n') != nil {
 539             break
 540         }
 541     }
 542 }
 543 
 544 // writeSpaces does what it says, minimizing calls to write-like funcs
 545 func writeSpaces(w *bufio.Writer, n int) {
 546     const spaces = `                                `
 547     if n < 1 {
 548         return
 549     }
 550 
 551     for n >= len(spaces) {
 552         w.WriteString(spaces)
 553         n -= len(spaces)
 554     }
 555     w.WriteString(spaces[:n])
 556 }