#!/usr/bin/python3 # The MIT License (MIT) # # Copyright © 2024 pacman64 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. from io import TextIOWrapper from json import dumps from math import isinf, isnan from sys import argv, exit, stderr, stdin, stdout from typing import Any info = ''' detsv [options...] [filepath/URI...] DE-TSV turns a TSV-format (Tab-Separated Values) table into other common data formats, like JSON (JavaScript Object Notation), or `JSONS` (JSON Strings). The output is JSONS by default, since keeping the original input-data as strings can't accidentally mangle values. All (optional) leading options start with either single or double-dash, and most of them change the style/color used. Some of the options are, shown in their single-dash form: -h show this help message -help show this help message -j convert into JSON, parsing numbers, and using null for empty -json convert into JSON, parsing numbers, and using null for empty -jsons convert into JSON Strings, where values are strings or null -json-s convert into JSON Strings, where values are strings or null ''' # handle standard help cmd-line options, quitting right away in that case if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): print(info.strip(), file=stderr) exit(0) def narrow_value(s: str) -> Any: # empty strings become null values if s == '': return None # try parsing valid floating-point values try: f = float(s) if not (isnan(f) or isinf(f)): return f except Exception: pass # can't narrow/parse the string, so keep it as is return s def to_json(w, src, narrow = narrow_value) -> None: try: line = next(src) line = line.rstrip('\r\n').rstrip('\n').split('\t') header = line except Exception: # not even a header line w.write('[]\n') return # prepare row-holder dictionary kv = {} for s in header: kv[s] = None n = 0 for line in src: row = line.rstrip('\r\n').rstrip('\n').split('\t') w.write(',\n ' if n > 0 else '[\n ') # don't forget to reset values from previous row for s in header: kv[s] = None # update values up to what the current row gives for j, s in enumerate(row): kv[header[j]] = narrow(s) w.write(dumps(kv)) n += 1 # don't forget to close top-level array w.write('[]\n' if n == 0 else '\n]\n') def to_jsons(w, src) -> None: return to_json(w, src, lambda s: s) def to_tsv(w, src) -> None: for line in src: row = line.rstrip('\r\n').rstrip('\n').split('\t') for i, s in enumerate(row): if i > 0: w.write('\t') w.write(s) w.write('\n') def seems_url(s: str) -> bool: protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') return any(s.startswith(p) for p in protocols) def handle_input(w, r, outfmt: str) -> None: fmt2handler = { 'json': to_json, 'jsons': to_jsons, } if outfmt in fmt2handler: fmt2handler[outfmt](w, r) else: raise ValueError(f'unsupported output format {outfmt}') # fmt_names normalizes output-format names, and even has `self-aliases`, # so a membership check also checks if a name is supported fmt_names = { 'j': 'json', 'json': 'json', 'jsons': 'jsons', 'jsonstr': 'jsons', 'jsonstrings': 'jsons', 'json-s': 'jsons', 'json-str': 'jsons', 'json-strings': 'jsons', } args = argv[1:] # default output format is JSONS, since it can't accidentally mangle data out_format = fmt_names['jsons'] # handle leading output-format option, if present if len(args) > 0 and args[0].startswith('-'): s = args[0].lstrip('-').lower() if s in fmt_names: out_format = fmt_names[s] # skip leading arg, since it's clearly not a filepath args = args[1:] try: if len(args) == 0: path = '-' elif len(args) == 1: path = args[0] else: raise ValueError('multiple inputs not allowed') if path == '-': handle_input(stdout, stdin, out_format) elif seems_url(path): from urllib.request import urlopen with urlopen(path) as inp: # can't use byte-sources directly during string-processing with TextIOWrapper(inp, encoding='utf-8') as txt: handle_input(stdout, txt.readlines(), out_format) else: with open(path, encoding='utf-8') as inp: handle_input(stdout, inp, out_format) except BrokenPipeError: # quit quietly, instead of showing a confusing error message stderr.close() except KeyboardInterrupt: exit(2) except Exception as e: print(f'\x1b[31m{e}\x1b[0m', file=stderr) exit(1)