File: sbs.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # sbs [column count...] [filepaths/URIs...]
  27 #
  28 # Side-By-Side lays out lines read from standard input into several columns,
  29 # separating them with a special symbol.
  30 #
  31 # If a column-count isn't given, the script tries to find the most columns
  32 # which can fit an 80-symbols-wide limit; if that's not possible, it simply
  33 # uses 1 column.
  34 #
  35 # Besides files, sbs can also fetch HTTP/HTTPS URIs; this makes it a general
  36 # plain-text data loader, when given the number 1 as its leading argument.
  37 #
  38 # Trailing carriage-returns from input lines are ignored; all output lines end
  39 # with a line-feed.
  40 
  41 
  42 from math import ceil
  43 from re import compile
  44 from sys import argv, exit, stderr, stdin, stdout
  45 from typing import List
  46 from urllib.request import urlopen
  47 
  48 
  49 # info is the message shown when the script isn't given any argument, or
  50 # when the leading argument is one of the standard cmd-line help options
  51 info = '''
  52 sbs [column count...] [filepaths/URIs...]
  53 
  54 Side-By-Side lays out lines read from standard input into several columns,
  55 separating them with a special symbol.
  56 
  57 If a column-count isn't given, the script tries to find the most columns
  58 which can fit an 80-symbols-wide limit; if that's not possible, it simply
  59 uses 1 column.
  60 
  61 Besides files, sbs can also fetch HTTP/HTTPS URIs; this makes it a general
  62 plain-text data loader, when given 1 as its leading argument.
  63 
  64 Trailing carriage-returns from input lines are ignored; all output lines end
  65 with a line-feed.
  66 '''.strip()
  67 
  68 
  69 # ansi_re matches ANSI-style sequences, and is used by func unstyled_width
  70 ansi_re = compile('\x1b\[([0-9]*[A-HJKST]|[0-9;]*m)')
  71 
  72 
  73 def fail(msg, code: int = 1) -> None:
  74     '''Show the error message given, and quit the app right away.'''
  75     print(f'\x1b[31m{msg}\x1b[0m', file=stderr)
  76     exit(code)
  77 
  78 
  79 
  80 # a leading help-option arg means show the help message and quit
  81 if len(argv) == 2 and argv[1].lower() in ('-h', '--h', '-help', '--help'):
  82     print(info, file=stderr)
  83     exit(0)
  84 
  85 
  86 def can_fit(lines: List[str], numcols: int) -> bool:
  87     '''
  88     Check if lines laid out on the column-count given can fit an
  89     old-style 80-symbol-wide display.
  90     '''
  91 
  92     if len(lines) == 0 or numcols < 1:
  93         return True
  94 
  95     widths = find_col_widths(lines, numcols)
  96     sepwidth = 3 * max(len(widths) - 1, 0)
  97     return sum(widths) + sepwidth < 80
  98 
  99 
 100 def unstyled_width(s: str) -> int:
 101     '''Count symbols in strings, excluding ANSI-style codes'''
 102 
 103     c = len(s)
 104     for m in ansi_re.finditer(s):
 105         i = m.start()
 106         j = m.end()
 107         c -= j - i
 108     return c
 109 
 110 
 111 def find_output_height(lines: List[str], numcols: int) -> int:
 112     '''A formula used in several places around this script.'''
 113     return int(ceil(len(lines) / numcols))
 114 
 115 
 116 def find_col_widths(lines: List[str], numcols: int) -> List[int]:
 117     '''Figure out the max widths for all columns.'''
 118 
 119     if len(lines) == 0 or numcols < 1:
 120         return []
 121 
 122     # figure out all (max) widths for all columns
 123     widths = numcols * [0]
 124     height = find_output_height(lines, numcols)
 125     for i, start in enumerate(range(0, len(lines), height)):
 126         # don't go out of bounds on the last sub-slice
 127         end = min(start + height, len(lines) - 1)
 128 
 129         # find the max line-width in the current sub-slice ...
 130         maxw = 0
 131         for j in range(start, end):
 132             maxw = max(maxw, unstyled_width(lines[j]))
 133         # ... the sub-slice being a `virtual` column for the output
 134         widths[i] = maxw
 135     return widths
 136 
 137 
 138 def safe_index(src: List[str], i: int) -> str:
 139     '''Simplify control-flow in func sbs, by safely indexing lists'''
 140     return src[i] if i < len(src) else ''
 141 
 142 
 143 def sbs(w, lines: List[str], numcols: int) -> None:
 144     '''Lay out lines side-by-side'''
 145 
 146     if len(lines) == 0 or numcols < 1:
 147         return
 148 
 149     if numcols == 1:
 150         for s in lines:
 151             w.write(s)
 152             w.write('\n')
 153         return
 154 
 155     # use fewer columns, when there are fewer input lines
 156     if len(lines) < numcols:
 157         numcols = len(lines)
 158 
 159     height = find_output_height(lines, numcols)
 160     widths = find_col_widths(lines, numcols)
 161 
 162     # make tuple of all runs of spaces possibly needed to pad any column
 163     max_spaces = 0
 164     for l in widths:
 165         max_spaces = max(max_spaces, l)
 166     spaces = tuple(i * ' ' for i in range(max_spaces + 1))
 167 
 168     # emit output
 169     for row in range(height):
 170         for col in range(numcols):
 171             if col > 0:
 172                 # w.write(' |')
 173                 w.write('')
 174 
 175             k = col * height + row
 176             s = safe_index(lines, k)
 177             lastcol = col == numcols - 1
 178 
 179             if not lastcol:
 180                 if col > 0:
 181                     w.write(' ')
 182                 w.write(s)
 183                 # right-pad column with spaces, to align the next one
 184                 w.write(spaces[widths[col] - unstyled_width(s)])
 185             elif s != '':
 186                 # empty-string check avoided extra trailing spaces on last
 187                 # columns; original input lines can still have trailers
 188                 if col > 0:
 189                     w.write(' ')
 190                 w.write(s)
 191 
 192         w.write('\n')
 193 
 194 
 195 def seems_url(s: str) -> bool:
 196     for prot in ('https://', 'http://', 'file://', 'ftp://', 'data:'):
 197         if s.startswith(prot):
 198             return True
 199     return False
 200 
 201 
 202 def handle_lines(paths: List[str], fn, expand) -> None:
 203     '''
 204     Read all lines from all input sources needed, deferring how exactly
 205     to handle each line to the func given.
 206     '''
 207 
 208     def fix_line(line: str) -> str:
 209         return expand(line.rstrip('\r\n').rstrip('\n'))
 210 
 211     if paths.count('-') > 1:
 212         msg = 'reading from `-` (standard input) more than once not allowed'
 213         raise ValueError(msg)
 214 
 215     for path in paths:
 216         if path == '-':
 217             for line in stdin:
 218                 fn(fix_line(line))
 219             continue
 220 
 221         if seems_url(path):
 222             with urlopen(path) as inp:
 223                 for line in inp:
 224                     fn(fix_line(str(line, encoding='utf-8')))
 225             continue
 226 
 227         with open(path) as inp:
 228             for line in inp:
 229                 fn(fix_line(line))
 230 
 231     # read from stdin, when given no paths
 232     if len(paths) == 0:
 233         for line in stdin:
 234             fn(fix_line(line))
 235 
 236 
 237 def writeln(s: str) -> None:
 238     '''Func to simply output all lines, when given to func handle_lines.'''
 239     stdout.write(s)
 240     stdout.write('\n')
 241 
 242 
 243 args = argv[1:]
 244 
 245 # handle optional leading number of columns to use
 246 numcols = 0
 247 got_cols = False
 248 if len(args) > 0:
 249     try:
 250         n = int(args[0])
 251         if n > 0:
 252             numcols = n
 253         got_cols = True
 254         args = args[1:]
 255     except Exception as e:
 256         # fail(e)
 257         pass
 258 
 259 try:
 260     stdout.reconfigure(newline='\n', encoding='utf-8')
 261 
 262     if got_cols and numcols == 1:
 263         # no need to remember lines, which takes more memory and time;
 264         # this also avoids expanding tabs, keeping all lines the same,
 265         # except for trailing CRLF byte-pairs becoming line-feeds
 266         handle_lines(args, writeln, lambda s: s)
 267         exit(0)
 268 
 269     # read all lines to handle the normal case, with multiple columns
 270     lines: List[str] = []
 271     handle_lines(args, lambda s: lines.append(s), lambda s: s.expandtabs(4))
 272 
 273     if not got_cols:
 274         # find max columns which can fit an 80-symbols width; if that's
 275         # not possible for any column-count, stick to a 1 column output
 276         numcols = 1
 277 
 278         for cols in range(2, 30):
 279             if not can_fit(lines, cols):
 280                 # break
 281                 continue
 282 
 283             h = find_output_height(lines, cols)
 284             if h * cols < len(lines) + h:
 285                 # avoid completely-empty trailing columns
 286                 numcols = cols
 287 
 288     # lay out lines side-by-side
 289     sbs(stdout, lines, numcols)
 290 except BrokenPipeError:
 291     # quit quietly, instead of showing a confusing error message
 292     stderr.flush()
 293     stderr.close()
 294 except KeyboardInterrupt:
 295     # quit quietly, instead of showing a confusing error message
 296     stderr.flush()
 297     stderr.close()
 298     exit(2)
 299 except Exception as e:
 300     fail(e, 1)