#!/usr/bin/python3 # The MIT License (MIT) # # Copyright © 2024 pacman64 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from os import fstat from sys import argv, exit, stderr, stdin, stdout info = ''' nh [options...] [filepaths/URIs...] Nice Hexadecimals is a byte-viewer which shows bytes as base-16 values, using various ANSI styles to color-code output. Output lines end with a panel showing all ASCII sequences detected along: each such panel also includes all ASCII from the next row as well, since not doing that would make grepping/matching whole strings less reliable, as some matches may be missed simply due to the narrowness of the panel. Options, where leading double-dashes are also allowed: -h show this help message -help same as -h -n narrow output, which fits 80-column mode -narrow same as -n ''' # bytes2styled_hex has `pre-rendered` strings for each possible byte bytes2styled_hex = ( '\x1b[38;5;111m00 ', '\x1b[38;5;246m01 ', '\x1b[38;5;246m02 ', '\x1b[38;5;246m03 ', '\x1b[38;5;246m04 ', '\x1b[38;5;246m05 ', '\x1b[38;5;246m06 ', '\x1b[38;5;246m07 ', '\x1b[38;5;246m08 ', '\x1b[38;5;246m09 ', '\x1b[38;5;246m0a ', '\x1b[38;5;246m0b ', '\x1b[38;5;246m0c ', '\x1b[38;5;246m0d ', '\x1b[38;5;246m0e ', '\x1b[38;5;246m0f ', '\x1b[38;5;246m10 ', '\x1b[38;5;246m11 ', '\x1b[38;5;246m12 ', '\x1b[38;5;246m13 ', '\x1b[38;5;246m14 ', '\x1b[38;5;246m15 ', '\x1b[38;5;246m16 ', '\x1b[38;5;246m17 ', '\x1b[38;5;246m18 ', '\x1b[38;5;246m19 ', '\x1b[38;5;246m1a ', '\x1b[38;5;246m1b ', '\x1b[38;5;246m1c ', '\x1b[38;5;246m1d ', '\x1b[38;5;246m1e ', '\x1b[38;5;246m1f ', '\x1b[38;5;72m20\x1b[38;5;239m ', '\x1b[38;5;72m21\x1b[38;5;239m!', '\x1b[38;5;72m22\x1b[38;5;239m"', '\x1b[38;5;72m23\x1b[38;5;239m#', '\x1b[38;5;72m24\x1b[38;5;239m$', '\x1b[38;5;72m25\x1b[38;5;239m%', '\x1b[38;5;72m26\x1b[38;5;239m&', '\x1b[38;5;72m27\x1b[38;5;239m\'', '\x1b[38;5;72m28\x1b[38;5;239m(', '\x1b[38;5;72m29\x1b[38;5;239m)', '\x1b[38;5;72m2a\x1b[38;5;239m*', '\x1b[38;5;72m2b\x1b[38;5;239m+', '\x1b[38;5;72m2c\x1b[38;5;239m,', '\x1b[38;5;72m2d\x1b[38;5;239m-', '\x1b[38;5;72m2e\x1b[38;5;239m.', '\x1b[38;5;72m2f\x1b[38;5;239m/', '\x1b[38;5;72m30\x1b[38;5;239m0', '\x1b[38;5;72m31\x1b[38;5;239m1', '\x1b[38;5;72m32\x1b[38;5;239m2', '\x1b[38;5;72m33\x1b[38;5;239m3', '\x1b[38;5;72m34\x1b[38;5;239m4', '\x1b[38;5;72m35\x1b[38;5;239m5', '\x1b[38;5;72m36\x1b[38;5;239m6', '\x1b[38;5;72m37\x1b[38;5;239m7', '\x1b[38;5;72m38\x1b[38;5;239m8', '\x1b[38;5;72m39\x1b[38;5;239m9', '\x1b[38;5;72m3a\x1b[38;5;239m:', '\x1b[38;5;72m3b\x1b[38;5;239m;', '\x1b[38;5;72m3c\x1b[38;5;239m<', '\x1b[38;5;72m3d\x1b[38;5;239m=', '\x1b[38;5;72m3e\x1b[38;5;239m>', '\x1b[38;5;72m3f\x1b[38;5;239m?', '\x1b[38;5;72m40\x1b[38;5;239m@', '\x1b[38;5;72m41\x1b[38;5;239mA', '\x1b[38;5;72m42\x1b[38;5;239mB', '\x1b[38;5;72m43\x1b[38;5;239mC', '\x1b[38;5;72m44\x1b[38;5;239mD', '\x1b[38;5;72m45\x1b[38;5;239mE', '\x1b[38;5;72m46\x1b[38;5;239mF', '\x1b[38;5;72m47\x1b[38;5;239mG', '\x1b[38;5;72m48\x1b[38;5;239mH', '\x1b[38;5;72m49\x1b[38;5;239mI', '\x1b[38;5;72m4a\x1b[38;5;239mJ', '\x1b[38;5;72m4b\x1b[38;5;239mK', '\x1b[38;5;72m4c\x1b[38;5;239mL', '\x1b[38;5;72m4d\x1b[38;5;239mM', '\x1b[38;5;72m4e\x1b[38;5;239mN', '\x1b[38;5;72m4f\x1b[38;5;239mO', '\x1b[38;5;72m50\x1b[38;5;239mP', '\x1b[38;5;72m51\x1b[38;5;239mQ', '\x1b[38;5;72m52\x1b[38;5;239mR', '\x1b[38;5;72m53\x1b[38;5;239mS', '\x1b[38;5;72m54\x1b[38;5;239mT', '\x1b[38;5;72m55\x1b[38;5;239mU', '\x1b[38;5;72m56\x1b[38;5;239mV', '\x1b[38;5;72m57\x1b[38;5;239mW', '\x1b[38;5;72m58\x1b[38;5;239mX', '\x1b[38;5;72m59\x1b[38;5;239mY', '\x1b[38;5;72m5a\x1b[38;5;239mZ', '\x1b[38;5;72m5b\x1b[38;5;239m[', '\x1b[38;5;72m5c\x1b[38;5;239m\\', '\x1b[38;5;72m5d\x1b[38;5;239m]', '\x1b[38;5;72m5e\x1b[38;5;239m^', '\x1b[38;5;72m5f\x1b[38;5;239m_', '\x1b[38;5;72m60\x1b[38;5;239m`', '\x1b[38;5;72m61\x1b[38;5;239ma', '\x1b[38;5;72m62\x1b[38;5;239mb', '\x1b[38;5;72m63\x1b[38;5;239mc', '\x1b[38;5;72m64\x1b[38;5;239md', '\x1b[38;5;72m65\x1b[38;5;239me', '\x1b[38;5;72m66\x1b[38;5;239mf', '\x1b[38;5;72m67\x1b[38;5;239mg', '\x1b[38;5;72m68\x1b[38;5;239mh', '\x1b[38;5;72m69\x1b[38;5;239mi', '\x1b[38;5;72m6a\x1b[38;5;239mj', '\x1b[38;5;72m6b\x1b[38;5;239mk', '\x1b[38;5;72m6c\x1b[38;5;239ml', '\x1b[38;5;72m6d\x1b[38;5;239mm', '\x1b[38;5;72m6e\x1b[38;5;239mn', '\x1b[38;5;72m6f\x1b[38;5;239mo', '\x1b[38;5;72m70\x1b[38;5;239mp', '\x1b[38;5;72m71\x1b[38;5;239mq', '\x1b[38;5;72m72\x1b[38;5;239mr', '\x1b[38;5;72m73\x1b[38;5;239ms', '\x1b[38;5;72m74\x1b[38;5;239mt', '\x1b[38;5;72m75\x1b[38;5;239mu', '\x1b[38;5;72m76\x1b[38;5;239mv', '\x1b[38;5;72m77\x1b[38;5;239mw', '\x1b[38;5;72m78\x1b[38;5;239mx', '\x1b[38;5;72m79\x1b[38;5;239my', '\x1b[38;5;72m7a\x1b[38;5;239mz', '\x1b[38;5;72m7b\x1b[38;5;239m{', '\x1b[38;5;72m7c\x1b[38;5;239m|', '\x1b[38;5;72m7d\x1b[38;5;239m}', '\x1b[38;5;72m7e\x1b[38;5;239m~', '\x1b[38;5;246m7f ', '\x1b[38;5;246m80 ', '\x1b[38;5;246m81 ', '\x1b[38;5;246m82 ', '\x1b[38;5;246m83 ', '\x1b[38;5;246m84 ', '\x1b[38;5;246m85 ', '\x1b[38;5;246m86 ', '\x1b[38;5;246m87 ', '\x1b[38;5;246m88 ', '\x1b[38;5;246m89 ', '\x1b[38;5;246m8a ', '\x1b[38;5;246m8b ', '\x1b[38;5;246m8c ', '\x1b[38;5;246m8d ', '\x1b[38;5;246m8e ', '\x1b[38;5;246m8f ', '\x1b[38;5;246m90 ', '\x1b[38;5;246m91 ', '\x1b[38;5;246m92 ', '\x1b[38;5;246m93 ', '\x1b[38;5;246m94 ', '\x1b[38;5;246m95 ', '\x1b[38;5;246m96 ', '\x1b[38;5;246m97 ', '\x1b[38;5;246m98 ', '\x1b[38;5;246m99 ', '\x1b[38;5;246m9a ', '\x1b[38;5;246m9b ', '\x1b[38;5;246m9c ', '\x1b[38;5;246m9d ', '\x1b[38;5;246m9e ', '\x1b[38;5;246m9f ', '\x1b[38;5;246ma0 ', '\x1b[38;5;246ma1 ', '\x1b[38;5;246ma2 ', '\x1b[38;5;246ma3 ', '\x1b[38;5;246ma4 ', '\x1b[38;5;246ma5 ', '\x1b[38;5;246ma6 ', '\x1b[38;5;246ma7 ', '\x1b[38;5;246ma8 ', '\x1b[38;5;246ma9 ', '\x1b[38;5;246maa ', '\x1b[38;5;246mab ', '\x1b[38;5;246mac ', '\x1b[38;5;246mad ', '\x1b[38;5;246mae ', '\x1b[38;5;246maf ', '\x1b[38;5;246mb0 ', '\x1b[38;5;246mb1 ', '\x1b[38;5;246mb2 ', '\x1b[38;5;246mb3 ', '\x1b[38;5;246mb4 ', '\x1b[38;5;246mb5 ', '\x1b[38;5;246mb6 ', '\x1b[38;5;246mb7 ', '\x1b[38;5;246mb8 ', '\x1b[38;5;246mb9 ', '\x1b[38;5;246mba ', '\x1b[38;5;246mbb ', '\x1b[38;5;246mbc ', '\x1b[38;5;246mbd ', '\x1b[38;5;246mbe ', '\x1b[38;5;246mbf ', '\x1b[38;5;246mc0 ', '\x1b[38;5;246mc1 ', '\x1b[38;5;246mc2 ', '\x1b[38;5;246mc3 ', '\x1b[38;5;246mc4 ', '\x1b[38;5;246mc5 ', '\x1b[38;5;246mc6 ', '\x1b[38;5;246mc7 ', '\x1b[38;5;246mc8 ', '\x1b[38;5;246mc9 ', '\x1b[38;5;246mca ', '\x1b[38;5;246mcb ', '\x1b[38;5;246mcc ', '\x1b[38;5;246mcd ', '\x1b[38;5;246mce ', '\x1b[38;5;246mcf ', '\x1b[38;5;246md0 ', '\x1b[38;5;246md1 ', '\x1b[38;5;246md2 ', '\x1b[38;5;246md3 ', '\x1b[38;5;246md4 ', '\x1b[38;5;246md5 ', '\x1b[38;5;246md6 ', '\x1b[38;5;246md7 ', '\x1b[38;5;246md8 ', '\x1b[38;5;246md9 ', '\x1b[38;5;246mda ', '\x1b[38;5;246mdb ', '\x1b[38;5;246mdc ', '\x1b[38;5;246mdd ', '\x1b[38;5;246mde ', '\x1b[38;5;246mdf ', '\x1b[38;5;246me0 ', '\x1b[38;5;246me1 ', '\x1b[38;5;246me2 ', '\x1b[38;5;246me3 ', '\x1b[38;5;246me4 ', '\x1b[38;5;246me5 ', '\x1b[38;5;246me6 ', '\x1b[38;5;246me7 ', '\x1b[38;5;246me8 ', '\x1b[38;5;246me9 ', '\x1b[38;5;246mea ', '\x1b[38;5;246meb ', '\x1b[38;5;246mec ', '\x1b[38;5;246med ', '\x1b[38;5;246mee ', '\x1b[38;5;246mef ', '\x1b[38;5;246mf0 ', '\x1b[38;5;246mf1 ', '\x1b[38;5;246mf2 ', '\x1b[38;5;246mf3 ', '\x1b[38;5;246mf4 ', '\x1b[38;5;246mf5 ', '\x1b[38;5;246mf6 ', '\x1b[38;5;246mf7 ', '\x1b[38;5;246mf8 ', '\x1b[38;5;246mf9 ', '\x1b[38;5;246mfa ', '\x1b[38;5;246mfb ', '\x1b[38;5;246mfc ', '\x1b[38;5;246mfd ', '\x1b[38;5;246mfe ', '\x1b[38;5;209mff ', ) # int2ascii slightly speeds up func show_ascii int2ascii = tuple(chr(i) if 32 <= i < 127 else ' ' for i in range(256)) # visible noticeably speeds up func show_ascii; notice how spaces (code 32) # aren't considered visible symbols, which makes sense in func show_ascii visible = tuple(32 < i < 127 for i in range(256)) def show_hex(w, src, chunk_size: int = 16) -> None: 'Handle all input from the source given, emitting styled output.' # make the ruler/line-breather, which shows up every 5 hex-output lines pre = 8 * ' ' pat = ' ·' pat = int(3 * chunk_size / len(pat)) * pat sep_line = f'{pre} \x1b[38;5;245m{pat}\x1b[0m\n' # n is the current byte offset shown at the start of each display line n = 0 # lines keeps track of the main output line/row count, to figure out # when to put `breather` lines lines = 0 # prev remembers the previous chunk, as showing ASCII content for # 2 output-lines worth of bytes requires staying 1 step behind, so # to speak prev = src.read(chunk_size) if len(prev) == 0: return while True: chunk = src.read(chunk_size) if len(chunk) == 0: break if lines % 5 == 0 and lines > 0: w.write(sep_line) show_line(w, n, prev, chunk, chunk_size) n += len(prev) prev = chunk lines += 1 # don't forget the last output line if len(prev) > 0: if lines % 5 == 0 and lines > 0: w.write(sep_line) show_line(w, n, prev, bytes(), chunk_size) def show_line(w, n: int, prev, chunk, chunk_size: int) -> None: 'Help func show_hex do its job, simplifying its control flow.' # w.write(f'{n:8} \x1b[48;5;254m') show_restyled_uint(w, n, 8) w.write(' \x1b[48;5;254m') for e in prev: w.write(bytes2styled_hex[e]) w.write('\x1b[0m') show_ascii(w, prev, chunk, 3 * (chunk_size - len(prev)) + 2) w.write('\n') def show_restyled_uint(w, n: int, width: int) -> None: 'Alternate styles on 3-item chunks of digits from the integer given.' digits = str(n) l = len(digits) # left-pad digits with spaces to fill the output-width given write_spaces(w, width - l) # it's quicker to just emit short-enough digit-runs verbatim if l < 4: w.write(digits) return # emit leading chunk of digits, which is the only one which # can have fewer than 3 items lead = l % 3 w.write(digits[:lead]) # the rest of the string now has a multiple of 3 items left start = lead # start by styling the next digit-group only if there was a # non-empty leading group at the start of the full digit-run use_style = lead > 0 # alternate styles until the string is over while start < l: # the digits left are always a multiple of 3 stop = start + 3 if use_style: w.write('\x1b[38;5;248m') w.write(digits[start:stop]) w.write('\x1b[0m') else: w.write(digits[start:stop]) # switch style and advance to the next 3-digit chunk use_style = not use_style start = stop def show_ascii(w, first, second: bytes, pre: int) -> None: 'Emit the ASCII side-panel for func show_hex.' # prev_vis keeps track of the previous byte's `visibility`, so spaces # are added when bytes change from non-visible-ASCII to visible-ASCII prev_vis = False is_vis = False spaces = pre # show ASCII symbols from the first `line` in the pair for e in first: is_vis = visible[e] if is_vis: if not prev_vis: write_spaces(w, spaces) spaces = 1 w.write(int2ascii[e]) prev_vis = is_vis # do the same for the second `line` in the pair for e in second: is_vis = visible[e] if is_vis: if not prev_vis: write_spaces(w, spaces) spaces = 1 w.write(int2ascii[e]) prev_vis = is_vis def write_spaces(w, n: int) -> None: 'Emit the number of spaces given, minimizing `write` calls.' if n < 1: return if n < len(spaces): w.write(spaces[n]) return while n >= len(spaces): w.write(spaces[-1]) n -= len(spaces) w.write(spaces[n]) def seems_url(s: str) -> bool: protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') return any(s.startswith(p) for p in protocols) # args is the `proper` list of arguments given to the script args = argv[1:] # a leading help-option arg means show the help message and quit if len(args) > 0 and args[0] in ('-h', '--h', '-help', '--help'): print(info.strip(), file=stderr) exit(0) # narrow-output is to fit results in 80-column mode bytes_per_line = 16 if len(args) > 0 and args[0] in ('-n', '--n', '-narrow', '--narrow'): bytes_per_line = 12 args = args[1:] elif len(args) > 0: # allow a leading integer argument to set exactly how many bytes per # line to show in the styled output, before the ASCII-panel contents try: # try to parse an integer number, after turning double-dashes # into single ones, which may lead to parsed negative integers n = int(args[0].lstrip('-')) # negative integers are a result of option-style leading dashes n = int(abs(n)) if n > 0: # only change the width-setting if leading number isn't zero bytes_per_line = n # don't treat a leading integer as a filepath, no matter what args = args[1:] except Exception: # avoid exceptions if leading arg isn't a valid integer pass # spaces lets func write_spaces minimize `write` operations, resulting in # noticeable speed-ups when the script deals with megabytes of data spaces = tuple(i * ' ' for i in range(3 * bytes_per_line + 4)) try: if args.count('-') > 1: msg = 'reading from `-` (standard input) more than once not allowed' raise ValueError(msg) if any(seems_url(e) for e in args): from urllib.request import urlopen for i, path in enumerate(args): if i > 0: stdout.write('\n') stdout.write('\n') if path == '-': stdout.write('• - ()\n') stdout.write('\n') show_hex(stdout, stdin.buffer, bytes_per_line) continue if seems_url(path): with urlopen(path) as inp: stdout.write(f'• {path}\n') stdout.write('\n') show_hex(stdout, inp, bytes_per_line) continue with open(path, mode='rb', buffering=4_960) as inp: n = fstat(inp.fileno()).st_size stdout.write(f'• {path} \x1b[38;5;245m({n:,} bytes)\x1b[0m\n') stdout.write('\n') show_hex(stdout, inp, bytes_per_line) if len(args) == 0: stdout.write('• \n') stdout.write('\n') show_hex(stdout, stdin.buffer, bytes_per_line) except BrokenPipeError: # quit quietly, instead of showing a confusing error message stderr.close() except KeyboardInterrupt: exit(2) except Exception as e: print(f'\x1b[31m{e}\x1b[0m', file=stderr) exit(1)