File: realign.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath realign.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "errors"
  37     "io"
  38     "os"
  39     "strings"
  40     "unicode/utf8"
  41 )
  42 
  43 const info = `
  44 realign [options...] [filenames...]
  45 
  46 Realign all detected columns, right-aligning any detected numbers in any
  47 column. ANSI style-codes are also kept as given.
  48 
  49 The options are, available both in single and double-dash versions
  50 
  51     -h, -help          show this help message
  52     -m, max-columns    use the row with the most items for the item-count
  53 `
  54 
  55 func main() {
  56     maxWidth := false
  57     args := os.Args[1:]
  58 
  59     for len(args) > 0 {
  60         if args[0] == `--` {
  61             args = args[1:]
  62             break
  63         }
  64 
  65         switch args[0] {
  66         case `-h`, `--h`, `-help`, `--help`:
  67             os.Stdout.WriteString(info[1:])
  68             return
  69 
  70         case
  71             `-m`, `--m`,
  72             `-maxcols`, `--maxcols`,
  73             `-max-columns`, `--max-columns`:
  74             maxWidth = true
  75             args = args[1:]
  76             continue
  77         }
  78     }
  79 
  80     if err := run(args, maxWidth); err != nil {
  81         os.Stderr.WriteString(err.Error())
  82         os.Stderr.WriteString("\n")
  83         os.Exit(1)
  84     }
  85 }
  86 
  87 // table has all summary info gathered from the data, along with the row
  88 // themselves, stored as lines/strings
  89 type table struct {
  90     Columns int
  91 
  92     Rows []string
  93 
  94     MaxWidth []int
  95 
  96     MaxDotDecimals []int
  97 
  98     LoopItems func(s string, max int, t *table, f itemFunc)
  99 
 100     MaxColumns bool
 101 }
 102 
 103 type itemFunc func(i int, s string, t *table)
 104 
 105 func run(paths []string, maxCols bool) error {
 106     var res table
 107     res.MaxColumns = maxCols
 108 
 109     for _, p := range paths {
 110         if err := handleFile(&res, p); err != nil {
 111             return err
 112         }
 113     }
 114 
 115     if len(paths) == 0 {
 116         if err := handleReader(&res, os.Stdin); err != nil {
 117             return err
 118         }
 119     }
 120 
 121     bw := bufio.NewWriterSize(os.Stdout, 32*1024)
 122     defer bw.Flush()
 123     realign(bw, res)
 124     return nil
 125 }
 126 
 127 func handleFile(res *table, path string) error {
 128     f, err := os.Open(path)
 129     if err != nil {
 130         // on windows, file-not-found error messages may mention `CreateFile`,
 131         // even when trying to open files in read-only mode
 132         return errors.New(`can't open file named ` + path)
 133     }
 134     defer f.Close()
 135     return handleReader(res, f)
 136 }
 137 
 138 func handleReader(t *table, r io.Reader) error {
 139     const gb = 1024 * 1024 * 1024
 140     sc := bufio.NewScanner(r)
 141     sc.Buffer(nil, 8*gb)
 142 
 143     const maxInt = int(^uint(0) >> 1)
 144     maxCols := maxInt
 145 
 146     for i := 0; sc.Scan(); i++ {
 147         s := sc.Text()
 148         if i == 0 && strings.HasPrefix(s, "\xef\xbb\xbf") {
 149             s = s[3:]
 150         }
 151 
 152         if len(s) == 0 {
 153             if len(t.Rows) > 0 {
 154                 t.Rows = append(t.Rows, ``)
 155             }
 156             continue
 157         }
 158 
 159         t.Rows = append(t.Rows, s)
 160 
 161         if t.Columns == 0 {
 162             if t.LoopItems == nil {
 163                 if strings.IndexByte(s, '\t') >= 0 {
 164                     t.LoopItems = loopItemsTSV
 165                 } else {
 166                     t.LoopItems = loopItemsSSV
 167                 }
 168             }
 169 
 170             if !t.MaxColumns {
 171                 t.LoopItems(s, maxCols, t, updateColumnCount)
 172                 maxCols = t.Columns
 173             }
 174         }
 175 
 176         t.LoopItems(s, maxCols, t, updateItem)
 177     }
 178 
 179     return sc.Err()
 180 }
 181 
 182 func updateColumnCount(i int, s string, t *table) {
 183     t.Columns = i + 1
 184 }
 185 
 186 func updateItem(i int, s string, t *table) {
 187     // ensure column-info-slices have enough room
 188     if i >= len(t.MaxWidth) {
 189         if t.MaxColumns {
 190             t.Columns = i + 1
 191         }
 192         t.MaxWidth = append(t.MaxWidth, 0)
 193         t.MaxDotDecimals = append(t.MaxDotDecimals, 0)
 194     }
 195 
 196     // keep track of widest rune-counts for each column
 197     w := countWidth(s)
 198     if t.MaxWidth[i] < w {
 199         t.MaxWidth[i] = w
 200     }
 201 
 202     // update stats for numeric items
 203     if isNumeric(s) {
 204         dd := countDotDecimals(s)
 205         if t.MaxDotDecimals[i] < dd {
 206             t.MaxDotDecimals[i] = dd
 207         }
 208     }
 209 }
 210 
 211 // loopItemsSSV loops over a line's items, allocation-free style; when given
 212 // empty strings, the callback func is never called
 213 func loopItemsSSV(s string, max int, t *table, f itemFunc) {
 214     s = trimTrailingSpaces(s)
 215 
 216     for i := 0; true; i++ {
 217         s = trimLeadingSpaces(s)
 218         if len(s) == 0 {
 219             return
 220         }
 221 
 222         if i+1 == max {
 223             f(i, s, t)
 224             return
 225         }
 226 
 227         j := strings.IndexByte(s, ' ')
 228         if j < 0 {
 229             f(i, s, t)
 230             return
 231         }
 232 
 233         f(i, s[:j], t)
 234         s = s[j+1:]
 235     }
 236 }
 237 
 238 func trimLeadingSpaces(s string) string {
 239     for len(s) > 0 && s[0] == ' ' {
 240         s = s[1:]
 241     }
 242     return s
 243 }
 244 
 245 func trimTrailingSpaces(s string) string {
 246     for len(s) > 0 && s[len(s)-1] == ' ' {
 247         s = s[:len(s)-1]
 248     }
 249     return s
 250 }
 251 
 252 // loopItemsTSV loops over a line's tab-separated items, allocation-free style;
 253 // when given empty strings, the callback func is never called
 254 func loopItemsTSV(s string, max int, t *table, f itemFunc) {
 255     if len(s) == 0 {
 256         return
 257     }
 258 
 259     for i := 0; true; i++ {
 260         if i+1 == max {
 261             f(i, s, t)
 262             return
 263         }
 264 
 265         j := strings.IndexByte(s, '\t')
 266         if j < 0 {
 267             f(i, s, t)
 268             return
 269         }
 270 
 271         f(i, s[:j], t)
 272         s = s[j+1:]
 273     }
 274 }
 275 
 276 func skipLeadingEscapeSequences(s string) string {
 277     for len(s) >= 2 {
 278         if s[0] != '\x1b' {
 279             return s
 280         }
 281 
 282         switch s[1] {
 283         case '[':
 284             s = skipSingleLeadingANSI(s[2:])
 285 
 286         case ']':
 287             if len(s) < 3 || s[2] != '8' {
 288                 return s
 289             }
 290             s = skipSingleLeadingOSC(s[3:])
 291 
 292         default:
 293             return s
 294         }
 295     }
 296 
 297     return s
 298 }
 299 
 300 func skipSingleLeadingANSI(s string) string {
 301     for len(s) > 0 {
 302         upper := s[0] &^ 32
 303         s = s[1:]
 304         if 'A' <= upper && upper <= 'Z' {
 305             break
 306         }
 307     }
 308 
 309     return s
 310 }
 311 
 312 func skipSingleLeadingOSC(s string) string {
 313     var prev byte
 314 
 315     for len(s) > 0 {
 316         b := s[0]
 317         s = s[1:]
 318         if prev == '\x1b' && b == '\\' {
 319             break
 320         }
 321         prev = b
 322     }
 323 
 324     return s
 325 }
 326 
 327 // isNumeric checks if a string is valid/useable as a number
 328 func isNumeric(s string) bool {
 329     if len(s) == 0 {
 330         return false
 331     }
 332 
 333     s = skipLeadingEscapeSequences(s)
 334     if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
 335         s = s[1:]
 336     }
 337 
 338     s = skipLeadingEscapeSequences(s)
 339     if len(s) == 0 {
 340         return false
 341     }
 342     if s[0] == '.' {
 343         return isDigits(s[1:])
 344     }
 345 
 346     digits := 0
 347 
 348     for {
 349         s = skipLeadingEscapeSequences(s)
 350         if len(s) == 0 {
 351             break
 352         }
 353 
 354         if s[0] == '.' {
 355             return isDigits(s[1:])
 356         }
 357 
 358         if !('0' <= s[0] && s[0] <= '9') {
 359             return false
 360         }
 361 
 362         digits++
 363         s = s[1:]
 364     }
 365 
 366     s = skipLeadingEscapeSequences(s)
 367     return len(s) == 0 && digits > 0
 368 }
 369 
 370 func isDigits(s string) bool {
 371     if len(s) == 0 {
 372         return false
 373     }
 374 
 375     digits := 0
 376 
 377     for {
 378         s = skipLeadingEscapeSequences(s)
 379         if len(s) == 0 {
 380             break
 381         }
 382 
 383         if '0' <= s[0] && s[0] <= '9' {
 384             s = s[1:]
 385             digits++
 386         } else {
 387             return false
 388         }
 389     }
 390 
 391     s = skipLeadingEscapeSequences(s)
 392     return len(s) == 0 && digits > 0
 393 }
 394 
 395 // countDecimals counts decimal digits from the string given, assuming it
 396 // represents a valid/useable float64, when parsed
 397 func countDecimals(s string) int {
 398     dot := strings.IndexByte(s, '.')
 399     if dot < 0 {
 400         return 0
 401     }
 402 
 403     decs := 0
 404     s = s[dot+1:]
 405 
 406     for len(s) > 0 {
 407         s = skipLeadingEscapeSequences(s)
 408         if len(s) == 0 {
 409             break
 410         }
 411         if '0' <= s[0] && s[0] <= '9' {
 412             decs++
 413         }
 414         s = s[1:]
 415     }
 416 
 417     return decs
 418 }
 419 
 420 // countDotDecimals is like func countDecimals, but this one also includes
 421 // the dot, when any decimals are present, else the count stays at 0
 422 func countDotDecimals(s string) int {
 423     decs := countDecimals(s)
 424     if decs > 0 {
 425         return decs + 1
 426     }
 427     return decs
 428 }
 429 
 430 func countWidth(s string) int {
 431     width := 0
 432 
 433     for len(s) > 0 {
 434         i, j := indexEscapeSequence(s)
 435         if i < 0 {
 436             break
 437         }
 438         if j < 0 {
 439             j = len(s)
 440         }
 441 
 442         width += utf8.RuneCountInString(s[:i])
 443         s = s[j:]
 444     }
 445 
 446     // count trailing/all runes in strings which don't end with ANSI-sequences
 447     width += utf8.RuneCountInString(s)
 448     return width
 449 }
 450 
 451 // indexEscapeSequence finds the first ANSI-style escape-sequence, which is
 452 // the multi-byte sequences starting with ESC[; the result is a pair of slice
 453 // indices which can be independently negative when either the start/end of
 454 // a sequence isn't found; given their fairly-common use, even the hyperlink
 455 // ESC]8 sequences are supported
 456 func indexEscapeSequence(s string) (int, int) {
 457     var prev byte
 458 
 459     for i := range s {
 460         b := s[i]
 461 
 462         if prev == '\x1b' && b == '[' {
 463             j := indexLetter(s[i+1:])
 464             if j < 0 {
 465                 return i, -1
 466             }
 467             return i - 1, i + 1 + j + 1
 468         }
 469 
 470         if prev == '\x1b' && b == ']' && i+1 < len(s) && s[i+1] == '8' {
 471             j := indexPair(s[i+1:], '\x1b', '\\')
 472             if j < 0 {
 473                 return i, -1
 474             }
 475             return i - 1, i + 1 + j + 2
 476         }
 477 
 478         prev = b
 479     }
 480 
 481     return -1, -1
 482 }
 483 
 484 func indexLetter(s string) int {
 485     for i, b := range s {
 486         upper := b &^ 32
 487         if 'A' <= upper && upper <= 'Z' {
 488             return i
 489         }
 490     }
 491 
 492     return -1
 493 }
 494 
 495 func indexPair(s string, x byte, y byte) int {
 496     var prev byte
 497 
 498     for i := range s {
 499         b := s[i]
 500         if prev == x && b == y && i > 0 {
 501             return i
 502         }
 503         prev = b
 504     }
 505 
 506     return -1
 507 }
 508 
 509 func realign(w *bufio.Writer, t table) {
 510     due := 0
 511     showItem := func(i int, s string, t *table) {
 512         if i > 0 {
 513             due += 2
 514         }
 515 
 516         if isNumeric(s) {
 517             dd := countDotDecimals(s)
 518             rpad := t.MaxDotDecimals[i] - dd
 519             width := countWidth(s)
 520             lpad := t.MaxWidth[i] - (width + rpad) + due
 521             writeSpaces(w, lpad)
 522             w.WriteString(s)
 523             due = rpad
 524             return
 525         }
 526 
 527         writeSpaces(w, due)
 528         w.WriteString(s)
 529         due = t.MaxWidth[i] - countWidth(s)
 530     }
 531 
 532     for _, line := range t.Rows {
 533         due = 0
 534         if len(line) > 0 {
 535             t.LoopItems(line, t.Columns, &t, showItem)
 536         }
 537         if w.WriteByte('\n') != nil {
 538             break
 539         }
 540     }
 541 }
 542 
 543 // writeSpaces does what it says, minimizing calls to write-like funcs
 544 func writeSpaces(w *bufio.Writer, n int) {
 545     const spaces = `                                `
 546     if n < 1 {
 547         return
 548     }
 549 
 550     for n >= len(spaces) {
 551         w.WriteString(spaces)
 552         n -= len(spaces)
 553     }
 554     w.WriteString(spaces[:n])
 555 }