File: realign.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath realign.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "errors"
  37     "io"
  38     "os"
  39     "strings"
  40     "unicode/utf8"
  41 )
  42 
  43 const info = `
  44 realign [options...] [filenames...]
  45 
  46 Realign all detected columns, right-aligning any detected numbers in any
  47 column. ANSI style-codes are also kept as given.
  48 
  49 The only option available is to show this help message, using any of
  50 "-h", "--h", "-help", or "--help", without the quotes.
  51 `
  52 
  53 func main() {
  54     args := os.Args[1:]
  55 
  56     if len(args) > 0 {
  57         switch args[0] {
  58         case `-h`, `--h`, `-help`, `--help`:
  59             os.Stderr.WriteString(info[1:])
  60             return
  61 
  62         case `--`:
  63             args = args[1:]
  64         }
  65     }
  66 
  67     if err := run(args); err != nil {
  68         os.Stderr.WriteString(err.Error())
  69         os.Stderr.WriteString("\n")
  70         os.Exit(1)
  71     }
  72 }
  73 
  74 // table has all summary info gathered from the data, along with the row
  75 // themselves, stored as lines/strings
  76 type table struct {
  77     Columns int
  78 
  79     Rows []string
  80 
  81     MaxWidth []int
  82 
  83     MaxDotDecimals []int
  84 
  85     LoopItems func(s string, max int, t *table, f itemFunc)
  86 }
  87 
  88 type itemFunc func(i int, s string, t *table)
  89 
  90 func run(paths []string) error {
  91     var res table
  92 
  93     for _, p := range paths {
  94         if err := handleFile(&res, p); err != nil {
  95             return err
  96         }
  97     }
  98 
  99     if len(paths) == 0 {
 100         if err := handleReader(&res, os.Stdin); err != nil {
 101             return err
 102         }
 103     }
 104 
 105     bw := bufio.NewWriterSize(os.Stdout, 32*1024)
 106     defer bw.Flush()
 107     realign(bw, res)
 108     return nil
 109 }
 110 
 111 func handleFile(res *table, path string) error {
 112     f, err := os.Open(path)
 113     if err != nil {
 114         // on windows, file-not-found error messages may mention `CreateFile`,
 115         // even when trying to open files in read-only mode
 116         return errors.New(`can't open file named ` + path)
 117     }
 118     defer f.Close()
 119     return handleReader(res, f)
 120 }
 121 
 122 func handleReader(t *table, r io.Reader) error {
 123     const gb = 1024 * 1024 * 1024
 124     sc := bufio.NewScanner(r)
 125     sc.Buffer(nil, 8*gb)
 126 
 127     for sc.Scan() {
 128         line := sc.Text()
 129         if len(line) == 0 {
 130             if len(t.Rows) > 0 {
 131                 t.Rows = append(t.Rows, ``)
 132             }
 133             continue
 134         }
 135 
 136         t.Rows = append(t.Rows, line)
 137 
 138         if t.Columns == 0 {
 139             if t.LoopItems == nil {
 140                 if strings.IndexByte(line, '\t') >= 0 {
 141                     t.LoopItems = loopItemsTSV
 142                 } else {
 143                     t.LoopItems = loopItemsSSV
 144                 }
 145             }
 146 
 147             const maxInt = int(^uint(0) >> 1)
 148             t.LoopItems(line, maxInt, t, updateColumnCount)
 149         }
 150 
 151         t.LoopItems(line, t.Columns, t, updateItem)
 152     }
 153 
 154     return sc.Err()
 155 }
 156 
 157 func updateColumnCount(i int, s string, t *table) {
 158     t.Columns = i + 1
 159 }
 160 
 161 func updateItem(i int, s string, t *table) {
 162     // ensure column-info-slices have enough room
 163     if i >= len(t.MaxWidth) {
 164         t.MaxWidth = append(t.MaxWidth, 0)
 165         t.MaxDotDecimals = append(t.MaxDotDecimals, 0)
 166     }
 167 
 168     // keep track of widest rune-counts for each column
 169     w := countWidth(s)
 170     if t.MaxWidth[i] < w {
 171         t.MaxWidth[i] = w
 172     }
 173 
 174     // update stats for numeric items
 175     if isNumeric(s) {
 176         dd := countDotDecimals(s)
 177         if t.MaxDotDecimals[i] < dd {
 178             t.MaxDotDecimals[i] = dd
 179         }
 180     }
 181 }
 182 
 183 // loopItemsSSV loops over a line's items, allocation-free style; when given
 184 // empty strings, the callback func is never called
 185 func loopItemsSSV(s string, max int, t *table, f itemFunc) {
 186     s = trimTrailingSpaces(s)
 187 
 188     for i := 0; true; i++ {
 189         s = trimLeadingSpaces(s)
 190         if len(s) == 0 {
 191             return
 192         }
 193 
 194         if i+1 == max {
 195             f(i, s, t)
 196             return
 197         }
 198 
 199         j := strings.IndexByte(s, ' ')
 200         if j < 0 {
 201             f(i, s, t)
 202             return
 203         }
 204 
 205         f(i, s[:j], t)
 206         s = s[j+1:]
 207     }
 208 }
 209 
 210 func trimLeadingSpaces(s string) string {
 211     for len(s) > 0 && s[0] == ' ' {
 212         s = s[1:]
 213     }
 214     return s
 215 }
 216 
 217 func trimTrailingSpaces(s string) string {
 218     for len(s) > 0 && s[len(s)-1] == ' ' {
 219         s = s[:len(s)-1]
 220     }
 221     return s
 222 }
 223 
 224 // loopItemsTSV loops over a line's tab-separated items, allocation-free style;
 225 // when given empty strings, the callback func is never called
 226 func loopItemsTSV(s string, max int, t *table, f itemFunc) {
 227     if len(s) == 0 {
 228         return
 229     }
 230 
 231     for i := 0; true; i++ {
 232         if i+1 == max {
 233             f(i, s, t)
 234             return
 235         }
 236 
 237         j := strings.IndexByte(s, '\t')
 238         if j < 0 {
 239             f(i, s, t)
 240             return
 241         }
 242 
 243         f(i, s[:j], t)
 244         s = s[j+1:]
 245     }
 246 }
 247 
 248 func skipLeadingEscapeSequences(s string) string {
 249     for len(s) >= 2 {
 250         if s[0] != '\x1b' {
 251             return s
 252         }
 253 
 254         switch s[1] {
 255         case '[':
 256             s = skipSingleLeadingANSI(s[2:])
 257 
 258         case ']':
 259             if len(s) < 3 || s[2] != '8' {
 260                 return s
 261             }
 262             s = skipSingleLeadingOSC(s[3:])
 263 
 264         default:
 265             return s
 266         }
 267     }
 268 
 269     return s
 270 }
 271 
 272 func skipSingleLeadingANSI(s string) string {
 273     for len(s) > 0 {
 274         upper := s[0] &^ 32
 275         s = s[1:]
 276         if 'A' <= upper && upper <= 'Z' {
 277             break
 278         }
 279     }
 280 
 281     return s
 282 }
 283 
 284 func skipSingleLeadingOSC(s string) string {
 285     var prev byte
 286 
 287     for len(s) > 0 {
 288         b := s[0]
 289         s = s[1:]
 290         if prev == '\x1b' && b == '\\' {
 291             break
 292         }
 293         prev = b
 294     }
 295 
 296     return s
 297 }
 298 
 299 // isNumeric checks if a string is valid/useable as a number
 300 func isNumeric(s string) bool {
 301     if len(s) == 0 {
 302         return false
 303     }
 304 
 305     s = skipLeadingEscapeSequences(s)
 306     if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
 307         s = s[1:]
 308     }
 309 
 310     s = skipLeadingEscapeSequences(s)
 311     if len(s) == 0 {
 312         return false
 313     }
 314     if s[0] == '.' {
 315         return isDigits(s[1:])
 316     }
 317 
 318     digits := 0
 319 
 320     for {
 321         s = skipLeadingEscapeSequences(s)
 322         if len(s) == 0 {
 323             break
 324         }
 325 
 326         if s[0] == '.' {
 327             return isDigits(s[1:])
 328         }
 329 
 330         if !('0' <= s[0] && s[0] <= '9') {
 331             return false
 332         }
 333 
 334         digits++
 335         s = s[1:]
 336     }
 337 
 338     s = skipLeadingEscapeSequences(s)
 339     return len(s) == 0 && digits > 0
 340 }
 341 
 342 func isDigits(s string) bool {
 343     if len(s) == 0 {
 344         return false
 345     }
 346 
 347     digits := 0
 348 
 349     for {
 350         s = skipLeadingEscapeSequences(s)
 351         if len(s) == 0 {
 352             break
 353         }
 354 
 355         if '0' <= s[0] && s[0] <= '9' {
 356             s = s[1:]
 357             digits++
 358         } else {
 359             return false
 360         }
 361     }
 362 
 363     s = skipLeadingEscapeSequences(s)
 364     return len(s) == 0 && digits > 0
 365 }
 366 
 367 // countDecimals counts decimal digits from the string given, assuming it
 368 // represents a valid/useable float64, when parsed
 369 func countDecimals(s string) int {
 370     dot := strings.IndexByte(s, '.')
 371     if dot < 0 {
 372         return 0
 373     }
 374 
 375     decs := 0
 376     s = s[dot+1:]
 377 
 378     for len(s) > 0 {
 379         s = skipLeadingEscapeSequences(s)
 380         if len(s) == 0 {
 381             break
 382         }
 383         if '0' <= s[0] && s[0] <= '9' {
 384             decs++
 385         }
 386         s = s[1:]
 387     }
 388 
 389     return decs
 390 }
 391 
 392 // countDotDecimals is like func countDecimals, but this one also includes
 393 // the dot, when any decimals are present, else the count stays at 0
 394 func countDotDecimals(s string) int {
 395     decs := countDecimals(s)
 396     if decs > 0 {
 397         return decs + 1
 398     }
 399     return decs
 400 }
 401 
 402 func countWidth(s string) int {
 403     width := 0
 404 
 405     for len(s) > 0 {
 406         i := indexStartANSI(s)
 407         if i < 0 {
 408             width += utf8.RuneCountInString(s)
 409             return width
 410         }
 411 
 412         width += utf8.RuneCountInString(s[:i])
 413 
 414         for len(s) > 0 {
 415             upper := s[0] &^ 32
 416             s = s[1:]
 417             if 'A' <= upper && upper <= 'Z' {
 418                 break
 419             }
 420         }
 421     }
 422 
 423     return width
 424 }
 425 
 426 func indexStartANSI(s string) int {
 427     var prev byte
 428 
 429     for i := range s {
 430         b := s[i]
 431         if prev == '\x1b' && b == '[' {
 432             return i - 1
 433         }
 434         prev = b
 435     }
 436 
 437     return -1
 438 }
 439 
 440 func realign(w *bufio.Writer, t table) {
 441     due := 0
 442     showItem := func(i int, s string, t *table) {
 443         if i > 0 {
 444             due += 2
 445         }
 446 
 447         if isNumeric(s) {
 448             dd := countDotDecimals(s)
 449             rpad := t.MaxDotDecimals[i] - dd
 450             width := countWidth(s)
 451             lpad := t.MaxWidth[i] - (width + rpad) + due
 452             writeSpaces(w, lpad)
 453             w.WriteString(s)
 454             due = rpad
 455             return
 456         }
 457 
 458         writeSpaces(w, due)
 459         w.WriteString(s)
 460         due = t.MaxWidth[i] - countWidth(s)
 461     }
 462 
 463     for _, line := range t.Rows {
 464         due = 0
 465         if len(line) > 0 {
 466             t.LoopItems(line, t.Columns, &t, showItem)
 467         }
 468         if w.WriteByte('\n') != nil {
 469             break
 470         }
 471     }
 472 }
 473 
 474 // writeSpaces does what it says, minimizing calls to write-like funcs
 475 func writeSpaces(w *bufio.Writer, n int) {
 476     const spaces = `                                `
 477     if n < 1 {
 478         return
 479     }
 480 
 481     for n >= len(spaces) {
 482         w.WriteString(spaces)
 483         n -= len(spaces)
 484     }
 485     w.WriteString(spaces[:n])
 486 }