File: jsonl.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 from json import load, dump 27 from sys import argv, exit, stderr, stdin, stdout 28 29 30 info = ''' 31 jsonl [filepath/URI...] 32 33 JSON Lines turns valid JSON-input arrays into separate JSON lines, one for 34 each top-level item. Non-arrays result in a single JSON-line. 35 36 When not given a filepath or URI to load, standard input is used instead. 37 Every output line is always a single top-level item from the input. 38 ''' 39 40 # handle standard help cmd-line options, quitting right away in that case 41 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 42 print(info.strip()) 43 exit(0) 44 45 46 def jsonl(w, src) -> None: 47 data = load(src) 48 if isinstance(data, (list, tuple)): 49 for v in data: 50 emit(w, v) 51 else: 52 emit(w, data) 53 54 55 def emit(w, v) -> None: 56 dump(v, w, indent=None, allow_nan=False, separators=(', ', ': ')) 57 w.write('\n') 58 59 60 def seems_url(s: str) -> bool: 61 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 62 return any(s.startswith(p) for p in protocols) 63 64 65 try: 66 if len(argv) < 2: 67 jsonl(stdout, stdin.buffer) 68 elif len(argv) == 2: 69 name = argv[1] 70 if name == '-': 71 jsonl(stdout, stdin.buffer) 72 elif seems_url(name): 73 from urllib.request import urlopen 74 with urlopen(name) as inp: 75 jsonl(stdout, inp) 76 else: 77 with open(name, mode='rb') as inp: 78 jsonl(stdout, inp) 79 else: 80 raise ValueError('multiple inputs not allowed') 81 except BrokenPipeError: 82 # quit quietly, instead of showing a confusing error message 83 stderr.close() 84 exit(0) 85 except KeyboardInterrupt: 86 # stderr.close() 87 exit(2) 88 except Exception as e: 89 print(f'\x1b[31m{e}\x1b[0m', file=stderr) 90 exit(1)