File: reprose.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from io import TextIOWrapper
  27 from re import compile
  28 from sys import argv, exit, stderr, stdin, stdout
  29 
  30 
  31 info = '''
  32 reprose [options...] [max-width...] [filepaths/URIs...]
  33 
  34 Reflow plain-text prose to limit its line-width (when possible), so it's
  35 easier to read. All indentations are lost as part of this process, which
  36 also turns all runs of spaces into single spaces as well.
  37 
  38 When not given a leading integer for the max line-width, the default is
  39 to limit lines up to 80-symbols wide, when possible.
  40 '''
  41 
  42 # no args or a leading help-option arg means show the help message and quit
  43 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'):
  44     print(info.strip(), file=stderr)
  45     exit(0)
  46 
  47 # spaces_re splits items in lines, and is used in func handle_lines
  48 spaces_re = compile(' +')
  49 
  50 
  51 def fail(msg, code: int = 1) -> None:
  52     'Show the error message given, and quit the app right away.'
  53     print(f'\x1b[31m{msg}\x1b[0m', file=stderr)
  54     exit(code)
  55 
  56 
  57 def seems_url(s: str) -> bool:
  58     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
  59     return any(s.startswith(p) for p in protocols)
  60 
  61 
  62 def handle_lines(w, src, max_width: int = 80) -> None:
  63     cur_width = 0
  64 
  65     for line in src:
  66         line = line.rstrip('\r\n').rstrip('\n')
  67         line = line.strip()
  68 
  69         # empty input lines always result in empty output lines, possibly
  70         # also ending a previous output line
  71         if not line:
  72             w.write('\n\n' if cur_width > 0 else '\n')
  73             cur_width = 0
  74             continue
  75 
  76         for s in spaces_re.split(line):
  77             s = s.strip()
  78             n = len(s)
  79 
  80             # stay on the current output line, if the current item fits
  81             if cur_width + int(cur_width > 0) + n <= max_width:
  82                 if cur_width > 0:
  83                     w.write(' ')
  84                     cur_width += 1
  85                 w.write(s)
  86                 cur_width += n
  87                 continue
  88 
  89             if cur_width > 0:
  90                 w.write('\n')
  91                 cur_width = 0
  92 
  93             # this item is at the start of its output line, either way
  94             if n <= max_width:
  95                 w.write(s)
  96                 cur_width = n
  97             else:
  98                 w.write(s)
  99                 w.write('\n')
 100                 cur_width = 0
 101 
 102     # don't forget to end the last output line
 103     if cur_width > 0:
 104         w.write('\n')
 105 
 106 
 107 try:
 108     if argv.count('-') > 1:
 109         msg = 'reading from `-` (standard input) more than once not allowed'
 110         raise ValueError(msg)
 111 
 112     if any(seems_url(e) for e in argv):
 113         from urllib.request import urlopen
 114 
 115     maxw = 80
 116     args = argv[1:]
 117     if len(args) > 0:
 118         try:
 119             n = int(args[0])
 120             args = args[1:]
 121             if n > 0:
 122                 maxw = n
 123         except Exception:
 124             pass
 125 
 126     # handle all named inputs given
 127     for path in args:
 128         if path == '-':
 129             handle_lines(stdout, stdin, maxw)
 130             continue
 131 
 132         if seems_url(path):
 133             with urlopen(path) as inp:
 134                 with TextIOWrapper(inp, encoding='utf-8') as txt:
 135                     handle_lines(stdout, txt, maxw)
 136             continue
 137 
 138         with open(path, encoding='utf-8') as inp:
 139             handle_lines(stdout, inp, maxw)
 140 
 141     if len(args) == 0:
 142         handle_lines(stdout, stdin, maxw)
 143 except BrokenPipeError:
 144     # quit quietly, instead of showing a confusing error message
 145     stderr.close()
 146 except KeyboardInterrupt:
 147     exit(2)
 148 except Exception as e:
 149     fail(e, 1)