#!/usr/bin/python3 # The MIT License (MIT) # # Copyright © 2024 pacman64 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from itertools import islice from re import compile, Match, Pattern from sys import argv, exit, maxsize, stderr, stdin, stdout from typing import List info = ''' hm [regexes...] Hilight Matches colors all substrings matching any of the regexes given. ''' # if len(argv) == 1: # print(info.strip(), file=stderr) # exit(0) # ansi_re matches ANSI-style sequences, so they're only matched `around` ansi_re = compile('\x1b\\[([0-9]*[A-HJKST]|[0-9;]*m)') def match(src: str, start: int, stop: int, regexes: List[Pattern]) -> Match: first = None for expr in regexes: m = expr.search(src, start, stop) if (not first) or (m and m.start() < first.start()): first = m return first def style_line(w, s: str, regexes: List[Pattern], ansi_re: Pattern) -> None: # start is used outside the regex-match loop to handle trailing parts # in lines start = 0 # replace all regex-matches on the line by surrounding each matched # substring with ANSI styles/resets while True: m = ansi_re.search(s, start) if not m: start = style_chunk(w, s, start, maxsize, regexes) break stop = m.start() start = style_chunk(w, s, start, stop, regexes) # don't forget the last part of the line, or the whole line stop = m.end() w.write(s[start:stop]) start = stop # don't forget the last part of the line, or the whole line w.write(s[start:]) w.write('\n') def style_chunk(w, s: str, start: int, stop: int, pats: List[Pattern]) -> int: while True: m = match(s, start, stop, pats) if not m: return start i = m.start() j = m.end() # part before match w.write(s[start:i]) # current match w.write('\x1b[7m') w.write(s[i:j]) w.write('\x1b[0m') # the end of the match is the start of the `rest` of the string start = j try: regexes = [compile(s) for s in islice(argv, 1, None)] for line in stdin: # ignore trailing carriage-returns and/or line-feeds in input lines line = line.rstrip('\r\n').rstrip('\n') style_line(stdout, line, regexes, ansi_re) except BrokenPipeError: # quit quietly, instead of showing a confusing error message stderr.close() except KeyboardInterrupt: exit(2)