File: jsonl.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 from json import load, dump 27 from sys import argv, exit, stderr, stdin, stdout 28 29 30 info = ''' 31 jsonl [filepath/URI...] 32 33 JSON Lines turns valid JSON-input arrays into separate JSON lines, one for 34 each top-level item. Non-arrays result in a single JSON-line. 35 36 When not given a filepath or URI to load, standard input is used instead. 37 Every output line is always a single top-level item from the input. 38 ''' 39 40 # handle standard help cmd-line options, quitting right away in that case 41 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 42 print(info.strip(), file=stderr) 43 exit(0) 44 45 46 def jsonl(w, src) -> None: 47 data = load(src) 48 49 if not isinstance(data, (list, tuple)): 50 emit(w, data) 51 return 52 53 for v in data: 54 emit(w, v) 55 56 57 def emit(w, v) -> None: 58 dump(v, w, indent=None, allow_nan=False, separators=(', ', ': ')) 59 w.write('\n') 60 61 62 def seems_url(s: str) -> bool: 63 protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:') 64 return any(s.startswith(p) for p in protocols) 65 66 67 try: 68 if len(argv) < 2: 69 jsonl(stdout, stdin.buffer) 70 elif len(argv) == 2: 71 name = argv[1] 72 if name == '-': 73 jsonl(stdout, stdin.buffer) 74 elif seems_url(name): 75 from urllib.request import urlopen 76 with urlopen(name) as inp: 77 jsonl(stdout, inp) 78 else: 79 with open(name, mode='rb') as inp: 80 jsonl(stdout, inp) 81 else: 82 raise ValueError('multiple inputs not allowed') 83 except BrokenPipeError: 84 # quit quietly, instead of showing a confusing error message 85 stderr.close() 86 except KeyboardInterrupt: 87 exit(2) 88 except Exception as e: 89 print(f'\x1b[31m{e}\x1b[0m', file=stderr) 90 exit(1)