File: reprose.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from re import compile
  27 from sys import argv, exit, stderr, stdin, stdout
  28 
  29 
  30 info = '''
  31 reprose [options...] [max-width...] [filepaths/URIs...]
  32 
  33 Reflow plain-text prose to limit its line-width (when possible), so it's
  34 easier to read. All indentations are lost as part of this process, which
  35 also turns all runs of spaces into single spaces as well.
  36 
  37 When not given a leading integer for the max line-width, the default is
  38 to limit lines up to 80-symbols wide, when possible.
  39 '''
  40 
  41 # no args or a leading help-option arg means show the help message and quit
  42 if len(argv) > 1 and argv[1] in ('-h', '--h', '-help', '--help'):
  43     print(info.strip())
  44     exit(0)
  45 
  46 # spaces_re splits items in lines, and is used in func handle_lines
  47 spaces_re = compile(' +')
  48 
  49 
  50 def fail(msg, code: int = 1) -> None:
  51     'Show the error message given, and quit the app right away.'
  52     print(f'\x1b[31m{msg}\x1b[0m', file=stderr)
  53     exit(code)
  54 
  55 
  56 def seems_url(s: str) -> bool:
  57     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
  58     return any(s.startswith(p) for p in protocols)
  59 
  60 
  61 def handle_lines(w, src, max_width: int = 80) -> None:
  62     cur_width = 0
  63 
  64     for line in src:
  65         line = line.rstrip('\r\n').rstrip('\n')
  66         line = line.strip()
  67 
  68         # empty input lines always result in empty output lines, possibly
  69         # also ending a previous output line
  70         if not line:
  71             w.write('\n\n' if cur_width > 0 else '\n')
  72             cur_width = 0
  73             continue
  74 
  75         for s in spaces_re.split(line):
  76             s = s.strip()
  77             n = len(s)
  78 
  79             # stay on the current output line, if the current item fits
  80             if cur_width + int(cur_width > 0) + n <= max_width:
  81                 if cur_width > 0:
  82                     w.write(' ')
  83                     cur_width += 1
  84                 w.write(s)
  85                 cur_width += n
  86                 continue
  87 
  88             if cur_width > 0:
  89                 w.write('\n')
  90                 cur_width = 0
  91 
  92             # this item is at the start of its output line, either way
  93             if n <= max_width:
  94                 w.write(s)
  95                 cur_width = n
  96             else:
  97                 w.write(s)
  98                 w.write('\n')
  99                 cur_width = 0
 100 
 101     # don't forget to end the last output line
 102     if cur_width > 0:
 103         w.write('\n')
 104 
 105 
 106 try:
 107     if argv.count('-') > 1:
 108         msg = 'reading from `-` (standard input) more than once not allowed'
 109         raise ValueError(msg)
 110 
 111     if any(seems_url(e) for e in argv):
 112         from io import TextIOWrapper
 113         from urllib.request import urlopen
 114 
 115     maxw = 80
 116     args = argv[1:]
 117     if len(args) > 0:
 118         try:
 119             n = int(args[0])
 120             args = args[1:]
 121             if n > 0:
 122                 maxw = n
 123         except Exception:
 124             pass
 125 
 126     # handle all named inputs given
 127     for path in args:
 128         if path == '-':
 129             handle_lines(stdout, stdin, maxw)
 130             continue
 131 
 132         if seems_url(path):
 133             with urlopen(path) as inp:
 134                 with TextIOWrapper(inp, encoding='utf-8') as txt:
 135                     handle_lines(stdout, txt, maxw)
 136             continue
 137 
 138         with open(path, encoding='utf-8') as inp:
 139             handle_lines(stdout, inp, maxw)
 140 
 141     if len(args) == 0:
 142         handle_lines(stdout, stdin, maxw)
 143 except BrokenPipeError:
 144     # quit quietly, instead of showing a confusing error message
 145     stderr.close()
 146 except KeyboardInterrupt:
 147     exit(2)
 148 except Exception as e:
 149     fail(e, 1)