File: dejson.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 from json import load, dump
  27 from sys import argv, stderr, stdin, stdout
  28 
  29 
  30 info = '''
  31 dejson [filepath/URI...]
  32 
  33 
  34 This script converts away from JSON into other formats, like TSV tables,
  35 auto-detecting a sensible output format, if possible.
  36 '''
  37 
  38 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  39     print(info.strip())
  40     exit(0)
  41 
  42 
  43 def table_keys(data):
  44     if not isinstance(data, (list, tuple)):
  45         return tuple()
  46     if len(data) == 0:
  47         return tuple()
  48 
  49     keys = {}
  50     for row in data:
  51         if not isinstance(row, dict):
  52             return tuple()
  53         for k in row.keys():
  54             keys[k] = None
  55     return tuple(keys.keys())
  56 
  57 
  58 def dejson(w, data):
  59     if isinstance(data, (bool, float, int, str)):
  60         print(data, file=w)
  61         return
  62     if isinstance(data, (list, tuple)):
  63         handle_array(w, data)
  64         return
  65 
  66 
  67 def handle_array(w, data):
  68     keys = table_keys(data)
  69     if len(keys) > 0:
  70         handle_tsv(w, data)
  71         return
  72 
  73     for e in data:
  74         if isinstance(e, dict):
  75             dump(e, w)
  76         else:
  77             print(e, file=w)
  78 
  79 
  80 def fix_cell(data):
  81     if isinstance(data, (list, tuple)):
  82         return ','.join(str(e) for e in data)
  83     return str(data) if not data is None else ''
  84 
  85 
  86 def handle_tsv(w, data):
  87     check_tsv_cells(data[0].keys())
  88     print('\t'.join(data[0].keys()), file=w)
  89     for row in data:
  90         check_tsv_cells(row.values())
  91         print('\t'.join((fix_cell(e) for e in row.values())), file=w)
  92 
  93 
  94 def tsv_type_error(e):
  95     raise ValueError(f'can\'t convert values of type {type(e)} into TSV')
  96 
  97 
  98 def check_tsv_cells(values):
  99     for e in values:
 100         if e is None or isinstance(e, (bool, int, float)):
 101             continue
 102 
 103         if isinstance(e, str):
 104             if any(b in e for b in ('\t', '\n', '\r', '\v', '\f')):
 105                 raise ValueError('string has TSV-incompatible symbol')
 106             continue
 107 
 108         if isinstance(e, (list, tuple)):
 109             for v in e:
 110                 if not isinstance(v, (bool, int, float, str)):
 111                     tsv_type_error(v)
 112                 if isinstance(v, str) and ',' in v:
 113                     m = 'can\'t join arrays with strings with commas in them'
 114                     raise ValueError(m)
 115             continue
 116 
 117         tsv_type_error(e)
 118 
 119 
 120 def seems_url(s: str) -> bool:
 121     protocols = ('https://', 'http://', 'file://', 'ftp://', 'data:')
 122     return any(s.startswith(p) for p in protocols)
 123 
 124 
 125 try:
 126     if len(argv) < 2:
 127         dejson(stdout, load(stdin))
 128     elif len(argv) == 2:
 129         name = argv[1]
 130         if name == '-':
 131             dejson(stdout, load(stdin))
 132         elif seems_url(name):
 133             from urllib.request import urlopen
 134             with urlopen(name) as inp:
 135                 dejson(stdout, load(inp))
 136         else:
 137             with open(name, encoding='utf-8') as inp:
 138                 dejson(stdout, load(inp))
 139     else:
 140         raise ValueError('multiple inputs not allowed')
 141 except BrokenPipeError:
 142     # quit quietly, instead of showing a confusing error message
 143     stderr.close()
 144     exit(0)
 145 except KeyboardInterrupt:
 146     exit(2)
 147 except Exception as e:
 148     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 149     exit(1)