File: sbs.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from math import ceil
  27 from re import compile
  28 from sys import argv, exit, stderr, stdin, stdout
  29 from typing import Iterable, List
  30 
  31 
  32 col_sep = ''
  33 tab_stop = 4
  34 max_auto_width = 80
  35 
  36 info = '''
  37 sbs [column count...] [filepaths/URIs...]
  38 
  39 
  40 Side-By-Side lays out lines read from all inputs given into several columns,
  41 separating them with a special symbol. If no named inputs are given, lines
  42 are read from the standard input instead; names can refer to files, but can
  43 also be HTTP/HTTPS URIs.
  44 
  45 If a column-count isn't given, the script tries to find the most columns
  46 which can fit a reasonable width-limit; when even a single column can't fit
  47 that limit, it simply emits all lines, which is the same as using 1 column.
  48 
  49 Trailing carriage-returns from input lines are ignored; all output lines end
  50 with a line-feed.
  51 '''
  52 
  53 # a leading help-option arg means show the help message and quit
  54 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  55     print(info.strip(), file=stderr)
  56     exit(0)
  57 
  58 
  59 # ansi_re matches ANSI-style sequences, and is used by func unstyled_width
  60 ansi_re = compile('\x1b\\[([0-9]*[A-HJKST]|[0-9;]*m)')
  61 
  62 
  63 def can_fit(lines: List[str], ncols: int, sep: str, width: int) -> bool:
  64     '''
  65     Check if lines laid out on the column-count given can fit the
  66     horizontal width given.
  67     '''
  68 
  69     if len(lines) == 0 or ncols < 1:
  70         return True
  71 
  72     total = 0
  73     colcount = 0
  74     sepwidth = (len(sep) + 2)
  75 
  76     for w in col_widths(lines, ncols):
  77         total += w
  78         colcount += 1
  79         if total + sepwidth * max(colcount - 1, 0) > width:
  80             return False
  81 
  82     return total + sepwidth * max(colcount - 1, 0) <= width
  83 
  84 
  85 def unstyled_width(s: str) -> int:
  86     'Count symbols in strings, excluding ANSI-styles.'
  87     extra = sum(m.end() - m.start() for m in ansi_re.finditer(s))
  88     return len(s) - extra
  89 
  90 
  91 def output_height(lines: List[str], numcols: int) -> int:
  92     return int(ceil(len(lines) / numcols))
  93 
  94 
  95 def col_widths(lines: List[str], numcols: int) -> Iterable:
  96     'Find the max widths for all columns.'
  97 
  98     if len(lines) == 0 or numcols < 1:
  99         return tuple()
 100 
 101     height = output_height(lines, numcols)
 102 
 103     for start in range(0, len(lines), height):
 104         # don't go out of bounds on the last sub-slice
 105         end = min(start + height, len(lines))
 106         if start == end:
 107             continue
 108 
 109         # find the max line-width in the current sub-slice, which
 110         # acts as a `virtual` column for the output
 111         yield max(unstyled_width(lines[j]) for j in range(start, end))
 112 
 113 
 114 def sbs(w, lines: List[str], numcols: int, colsep: str) -> None:
 115     if len(lines) == 0 or numcols < 1:
 116         return
 117 
 118     if numcols < 2:
 119         for s in lines:
 120             w.write(s)
 121             w.write('\n')
 122         return
 123 
 124     # use fewer columns, when there are too few input lines
 125     numcols = min(numcols, len(lines))
 126 
 127     height = output_height(lines, numcols)
 128     widths = tuple(col_widths(lines, numcols))
 129     numcols = min(numcols, len(widths))
 130 
 131     # make tuple of all runs of spaces up to the most needed to pad columns
 132     max_spaces = max(widths) if len(widths) > 0 else 0
 133     spaces = tuple(i * ' ' for i in range(max_spaces + 1))
 134 
 135     colsep = f' {colsep}'
 136 
 137     for row in range(height):
 138         for col in range(numcols):
 139             w.write(colsep if col > 0 else '')
 140             k = col * height + row
 141             s = lines[k] if k < len(lines) else ''
 142 
 143             if col < numcols - 1:
 144                 w.write(' ' if col > 0 else '')
 145                 w.write(s)
 146                 # right-pad column with spaces, to align the next one
 147                 w.write(spaces[widths[col] - unstyled_width(s)])
 148             elif s:
 149                 # empty-string check avoided extra trailing spaces on last
 150                 # columns; original input lines can still have trailers
 151                 w.write(' ' if col > 0 else '')
 152                 w.write(s)
 153 
 154         w.write('\n')
 155 
 156 
 157 def find_fit(lines: List[str], col_sep: str, max_width: int) -> int:
 158     '''
 159     Find max columns which can fit a predetermined n-symbols width;
 160     if that's not possible for any column-count, stick to 1 column.
 161     '''
 162 
 163     if len(lines) == 0 or max(unstyled_width(l) for l in lines) > max_width:
 164         return 1
 165 
 166     max_possible_cols = int(max_width / (len(col_sep) + 2))
 167     most = min(max_possible_cols, len(lines))
 168 
 169     # it's sometimes possible to fit more columns after the lowest
 170     # number of columns which fails the test, so loop backward
 171     for cols in range(most, 2, -1):
 172         if can_fit(lines, cols, col_sep, max_width):
 173             return cols
 174     return 1
 175 
 176 
 177 def seems_url(s: str) -> bool:
 178     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 179     return any(s.startswith(p) for p in protocols)
 180 
 181 
 182 def handle_lines(paths: List[str], handle, expand) -> None:
 183     '''
 184     Read all lines from all input sources needed, deferring how exactly
 185     to handle each line to the funcs given.
 186     '''
 187 
 188     def fix_line(line: str) -> str:
 189         return expand(line.rstrip('\r\n').rstrip('\n'))
 190 
 191     if paths.count('-') > 1:
 192         msg = 'reading from `-` (standard input) more than once not allowed'
 193         raise ValueError(msg)
 194 
 195     if any(seems_url(e) for e in paths):
 196         from urllib.request import urlopen
 197 
 198     for path in paths:
 199         if path == '-':
 200             for line in stdin:
 201                 handle(fix_line(line))
 202             continue
 203 
 204         if seems_url(path):
 205             with urlopen(path) as inp:
 206                 for line in inp:
 207                     handle(fix_line(str(line, encoding='utf-8')))
 208             continue
 209 
 210         with open(path, encoding='utf-8') as inp:
 211             for line in inp:
 212                 handle(fix_line(line))
 213 
 214     # read from stdin, when given no paths
 215     if len(paths) == 0:
 216         for line in stdin:
 217             handle(fix_line(line))
 218 
 219 
 220 args = argv[1:]
 221 
 222 # handle optional leading number of columns to use
 223 num_cols = 0
 224 got_cols = False
 225 try:
 226     n = int(args[0])
 227     if n > 0:
 228         num_cols = n
 229     got_cols = True
 230     args = args[1:]
 231 except Exception:
 232     pass
 233 
 234 try:
 235     if got_cols and num_cols == 1:
 236         # no need to remember lines, which takes more memory and time
 237         def writeln(s: str) -> None:
 238             stdout.write(s)
 239             stdout.write('\n')
 240         # handle_lines(args, writeln, lambda s: s)
 241         handle_lines(args, writeln, lambda s: s.expandtabs(tab_stop))
 242         exit(0)
 243 
 244     # read all lines to handle the normal case, with multiple columns
 245     lines = []
 246     ts = tab_stop
 247     handle_lines(args, lambda s: lines.append(s), lambda s: s.expandtabs(ts))
 248 
 249     if not got_cols:
 250         num_cols = find_fit(lines, col_sep, max_auto_width)
 251     sbs(stdout, lines, num_cols, col_sep)
 252 except BrokenPipeError:
 253     # quit quietly, instead of showing a confusing error message
 254     stderr.close()
 255 except KeyboardInterrupt:
 256     exit(2)
 257 except Exception as e:
 258     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 259     exit(1)