File: dejson.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2020-2025 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from json import load, dump
  27 from sys import argv, stderr, stdin, stdout
  28 
  29 
  30 info = '''
  31 dejson [filepath/URI...]
  32 
  33 
  34 This script converts away from JSON into other formats, like TSV tables,
  35 auto-detecting a sensible output format, if possible.
  36 '''
  37 
  38 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  39     print(info.strip())
  40     exit(0)
  41 
  42 
  43 def dejson(w, data):
  44     if isinstance(data, (bool, float, int, str)):
  45         print(data, file=w)
  46         return
  47     if isinstance(data, (list, tuple)):
  48         handle_array(w, data)
  49         return
  50 
  51 
  52 def handle_array(w, data):
  53     keys = table_keys(data)
  54     if len(keys) > 0:
  55         handle_tsv(w, data)
  56         return
  57 
  58     for e in data:
  59         if isinstance(e, dict):
  60             dump(e, w)
  61 
  62 
  63 def table_keys(data):
  64     if not isinstance(data, (list, tuple)):
  65         return tuple()
  66     if len(data) == 0:
  67         return tuple()
  68 
  69     keys = {}
  70     for row in data:
  71         if not isinstance(row, dict):
  72             return tuple()
  73         for k in row.keys():
  74             keys[k] = None
  75     return tuple(keys.keys())
  76 
  77 
  78 def handle_tsv(w, data):
  79     check_tsv_cells(data[0].keys())
  80     print('\t'.join(data[0].keys()), file=w)
  81     for row in data:
  82         check_tsv_cells(row.values())
  83         print('\t'.join((fix_cell(e) for e in row.values())), file=w)
  84 
  85 
  86 def fix_cell(data):
  87     if isinstance(data, (list, tuple)):
  88         return ','.join(str(e) for e in data)
  89     return str(data) if not data is None else ''
  90 
  91 
  92 def check_tsv_cells(values):
  93     for e in values:
  94         if e is None or isinstance(e, (bool, int, float)):
  95             continue
  96 
  97         if isinstance(e, str):
  98             if any(b in e for b in ('\t', '\n', '\r', '\v', '\f')):
  99                 raise ValueError('string has TSV-incompatible symbols')
 100             continue
 101 
 102         if isinstance(e, (list, tuple)):
 103             for v in e:
 104                 if not isinstance(v, (bool, int, float, str)):
 105                     tsv_type_error(v)
 106                 if isinstance(v, str) and ',' in v:
 107                     m = 'can\'t join arrays with strings with commas in them'
 108                     raise ValueError(m)
 109             continue
 110 
 111         tsv_type_error(e)
 112 
 113 
 114 def tsv_type_error(e):
 115     raise ValueError(f'can\'t convert values of type {type(e)} into TSV')
 116 
 117 
 118 def seems_url(s: str) -> bool:
 119     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 120     return any(s.startswith(p) for p in protocols)
 121 
 122 
 123 try:
 124     if len(argv) > 2:
 125         raise ValueError('multiple inputs not allowed')
 126 
 127     if len(argv) < 2 or argv[1] == '-':
 128         dejson(stdout, load(stdin))
 129     else:
 130         name = argv[1]
 131         if seems_url(name):
 132             from urllib.request import urlopen
 133             with urlopen(name) as inp:
 134                 dejson(stdout, load(inp))
 135         else:
 136             with open(name, encoding='utf-8') as inp:
 137                 dejson(stdout, load(inp))
 138 except BrokenPipeError:
 139     # quit quietly, instead of showing a confusing error message
 140     stderr.close()
 141     exit(0)
 142 except KeyboardInterrupt:
 143     exit(2)
 144 except Exception as e:
 145     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 146     exit(1)