File: hima.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from itertools import islice
  27 from re import compile, Match, Pattern
  28 from sys import argv, exit, maxsize, stderr, stdin, stdout
  29 from typing import List
  30 
  31 
  32 info = '''
  33 hima [regexes...]
  34 
  35 HIlight MAtches colors all substrings matching any of the regexes given.
  36 '''
  37 
  38 # if len(argv) == 1:
  39 #     print(info.strip(), file=stderr)
  40 #     exit(0)
  41 
  42 # ansi_re matches ANSI-style sequences, so they're only matched `around`
  43 ansi_re = compile('\x1b\\[[0-9;]*[A-Za-z]')
  44 
  45 
  46 def match(src: str, start: int, stop: int, regexes: List[Pattern]) -> Match:
  47     first = None
  48     for expr in regexes:
  49         m = expr.search(src, start, stop)
  50         if (not first) or (m and m.start() < first.start()):
  51             first = m
  52     return first
  53 
  54 
  55 def style_line(w, s: str, regexes: List[Pattern], ansi_re: Pattern) -> None:
  56     # start is used outside the regex-match loop to handle trailing parts
  57     # in lines
  58     start = 0
  59 
  60     # replace all regex-matches on the line by surrounding each matched
  61     # substring with ANSI styles/resets
  62     while True:
  63         m = ansi_re.search(s, start)
  64         if not m:
  65             start = style_chunk(w, s, start, maxsize, regexes)
  66             break
  67 
  68         stop = m.start()
  69         start = style_chunk(w, s, start, stop, regexes)
  70         # don't forget the last part of the line, or the whole line
  71         stop = m.end()
  72         w.write(s[start:stop])
  73         start = stop
  74 
  75     # don't forget the last part of the line, or the whole line
  76     w.write(s[start:])
  77     w.write('\n')
  78 
  79 
  80 def style_chunk(w, s: str, start: int, stop: int, pats: List[Pattern]) -> int:
  81     while True:
  82         m = match(s, start, stop, pats)
  83         if not m:
  84             return start
  85 
  86         i = m.start()
  87         j = m.end()
  88 
  89         # part before match
  90         w.write(s[start:i])
  91 
  92         # current match
  93         w.write('\x1b[7m')
  94         w.write(s[i:j])
  95         w.write('\x1b[0m')
  96 
  97         # the end of the match is the start of the `rest` of the string
  98         start = j
  99 
 100 
 101 try:
 102     regexes = [compile(s) for s in islice(argv, 1, None)]
 103     for line in stdin:
 104         # ignore trailing carriage-returns and/or line-feeds in input lines
 105         line = line.rstrip('\r\n').rstrip('\n')
 106         style_line(stdout, line, regexes, ansi_re)
 107 except BrokenPipeError:
 108     # quit quietly, instead of showing a confusing error message
 109     stderr.close()
 110 except KeyboardInterrupt:
 111     exit(2)