File: hima.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 from itertools import islice 27 from re import compile, Match, Pattern 28 from sys import argv, exit, maxsize, stderr, stdin, stdout 29 from typing import List 30 31 32 info = ''' 33 hima [regexes...] 34 35 HIlight MAtches colors all substrings matching any of the regexes given. 36 ''' 37 38 # if len(argv) == 1: 39 # print(info.strip(), file=stderr) 40 # exit(0) 41 42 # ansi_re matches ANSI-style sequences, so they're only matched `around` 43 ansi_re = compile('\x1b\\[[0-9;]*[A-Za-z]') 44 45 46 def match(src: str, start: int, stop: int, regexes: List[Pattern]) -> Match: 47 first = None 48 for expr in regexes: 49 m = expr.search(src, start, stop) 50 if (not first) or (m and m.start() < first.start()): 51 first = m 52 return first 53 54 55 def style_line(w, s: str, regexes: List[Pattern], ansi_re: Pattern) -> None: 56 # start is used outside the regex-match loop to handle trailing parts 57 # in lines 58 start = 0 59 60 # replace all regex-matches on the line by surrounding each matched 61 # substring with ANSI styles/resets 62 while True: 63 m = ansi_re.search(s, start) 64 if not m: 65 start = style_chunk(w, s, start, maxsize, regexes) 66 break 67 68 stop = m.start() 69 start = style_chunk(w, s, start, stop, regexes) 70 # don't forget the last part of the line, or the whole line 71 stop = m.end() 72 w.write(s[start:stop]) 73 start = stop 74 75 # don't forget the last part of the line, or the whole line 76 w.write(s[start:]) 77 w.write('\n') 78 79 80 def style_chunk(w, s: str, start: int, stop: int, pats: List[Pattern]) -> int: 81 while True: 82 m = match(s, start, stop, pats) 83 if not m: 84 return start 85 86 i = m.start() 87 j = m.end() 88 89 # part before match 90 w.write(s[start:i]) 91 92 # current match 93 w.write('\x1b[7m') 94 w.write(s[i:j]) 95 w.write('\x1b[0m') 96 97 # the end of the match is the start of the `rest` of the string 98 start = j 99 100 101 try: 102 regexes = [compile(s) for s in islice(argv, 1, None)] 103 for line in stdin: 104 # ignore trailing carriage-returns and/or line-feeds in input lines 105 line = line.rstrip('\r\n').rstrip('\n') 106 style_line(stdout, line, regexes, ansi_re) 107 except BrokenPipeError: 108 # quit quietly, instead of showing a confusing error message 109 stderr.close() 110 except KeyboardInterrupt: 111 exit(2)