File: ungron.py
   1 #!/usr/bin/python3
   2 
   3 # The MIT License (MIT)
   4 #
   5 # Copyright © 2024 pacman64
   6 #
   7 # Permission is hereby granted, free of charge, to any person obtaining a copy
   8 # of this software and associated documentation files (the “Software”), to deal
   9 # in the Software without restriction, including without limitation the rights
  10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11 # copies of the Software, and to permit persons to whom the Software is
  12 # furnished to do so, subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be included in
  15 # all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24 
  25 
  26 # ungron [options...] [filepath/URI...]
  27 #
  28 # UNGRON converts unstyled `gron` (GRep jsON) text back into valid JSON.
  29 # Unlike the original `gron`, there's no sort-mode.
  30 #
  31 # For some context, the original `gron` is at
  32 # https://github.com/tomnomnom/gron
  33 #
  34 # Options, where leading double-dashes are also allowed:
  35 #
  36 #     -h         show this help message
  37 #     -help      show this help message
  38 #
  39 #     -m         monochrome (default), enables unstyled input-mode
  40 #     -c         color, enables ANSI-styled input-mode
  41 #     -color     enables ANSI-styled input-mode
  42 
  43 
  44 from json import dump, loads
  45 from re import compile as compile_re, sub
  46 from sys import argv, exit, stderr, stdin, stdout
  47 from typing import Any, List, Tuple, Union
  48 from urllib.request import urlopen
  49 
  50 
  51 # info is the help message shown when asked to
  52 info = '''
  53 ungron [options...] [filepath/URI...]
  54 
  55 UNGRON converts unstyled `gron` (GRep jsON) text back into valid JSON.
  56 Unlike the original `gron`, there's no sort-mode.
  57 
  58 For some context, the original `gron` is at
  59 https://github.com/tomnomnom/gron
  60 
  61 Options, where leading double-dashes are also allowed:
  62 
  63     -h         show this help message
  64     -help      show this help message
  65 
  66     -m         monochrome (default), enables unstyled input-mode
  67     -c         color, enables ANSI-styled input-mode
  68     -color     enables ANSI-styled input-mode
  69 '''.strip()
  70 
  71 # handle standard help cmd-line options, quitting right away in that case
  72 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'):
  73     print(info, file=stderr)
  74     exit(0)
  75 
  76 
  77 # ansi_re matches `gron`-style ANSI-codes, so they can be removed
  78 ansi_re = compile_re('\x1b\\[[^m]+m')
  79 
  80 
  81 def parse_gron_line(line: str) -> (List[Union[int, str]], Any):
  82     path = []
  83     end_path = -1
  84     start_value = -1
  85     end_value = -1
  86 
  87     if line.endswith(';\n'):
  88         end_value = len(line) - 2
  89     elif line.endswith(';\r\n'):
  90         end_value = len(line) - 3
  91     elif line.endswith(';'):
  92         end_value = len(line) - 1
  93 
  94     if end_value < 0:
  95         line = line.rstrip('\r\n').rstrip('\n')
  96         msg = f'invalid gron line, without trailing semicolon: {line}'
  97         raise ValueError(msg)
  98 
  99     start = 0
 100     cur = 0
 101     while cur < end_value:
 102         c = line[cur]
 103 
 104         # handle start/end of unquoted object keys
 105         if c == '.':
 106             # some dots appear right after `]`s, so avoid empty keys
 107             # when that's the case
 108             if cur > start + 1:
 109                 path.append(line[start:cur])
 110             cur += 1
 111             start = cur
 112             continue
 113 
 114         # handle start of quoted object keys and/or array indices
 115         if c == '[':
 116             # some dots appear right after `]`s, so avoid empty keys
 117             # when that's the case
 118             if cur > start + 1:
 119                 path.append(line[start:cur])
 120             cur += 1
 121             i = find_end_bracket(line, cur)
 122             if i < 1:
 123                 raise ValueError(f'invalid gron line {line}')
 124 
 125             p = loads(line[cur:i])
 126             path.append(p)
 127             cur = i + 1
 128             start = cur
 129             continue
 130 
 131         # detect the end of the path part
 132         if c == ' ' and line.startswith(' = ', cur):
 133             end_path = cur
 134             start_value = end_path + len(' = ')
 135             break
 136         if c == '=':
 137             end_path = cur
 138             start_value = end_path + 1
 139             break
 140 
 141         # give useful error message if input is ANSI-styled
 142         if c == '\x1b' and line.startswith('\x1b[', cur):
 143             raise ValueError('invalid gron line has ANSI-code styles in it')
 144 
 145         # go over symbols for unquoted object keys
 146         cur += 1
 147 
 148     if end_path < 0:
 149         line = line.rstrip('\r\n').rstrip('\n')
 150         raise ValueError(f'invalid gron line, missing `=`: {line}')
 151 
 152     # don't forget the path's last item
 153     if cur > start + 1:
 154         path.append(line[start:cur])
 155 
 156     return path, loads(line[start_value:end_value])
 157 
 158 
 159 def find_end_bracket(line: str, start: int) -> int:
 160     escaped = False
 161     quoted = False
 162 
 163     for i in range(start, len(line)):
 164         c = line[i]
 165 
 166         if escaped:
 167             escaped = False
 168             continue
 169         if c == '\\':
 170             escaped = True
 171             continue
 172 
 173         if c == '"':
 174             quoted = not quoted
 175             continue
 176         if c == ']':
 177             if not quoted:
 178                 return i
 179     return -1
 180 
 181 
 182 def update(dest: Any, path: List[Union[int, str]], value: Any) -> None:
 183     if len(path) == 0:
 184         raise ValueError('empty path in gron line')
 185 
 186     for i, p in enumerate(path):
 187         # ensure leading key is always `json`
 188         if i == 0:
 189             if p != 'json':
 190                 raise ValueError(f'leading key is {p}, instead of "json"')
 191             continue
 192 
 193         # handle array-index access
 194         if isinstance(p, int):
 195             if not isinstance(dest, list):
 196                 raise ValueError(f'can\'t index a {type(dest)}, only lists')
 197 
 198             # forbid explicit negative indices, which should never appear
 199             # in `gron`-style output anyway
 200             if p < 0:
 201                 raise ValueError(f'invalid index {p}')
 202 
 203             # expand array to include the index given, if needed
 204             if p == len(dest):
 205                 dest.append(None)
 206             elif p >= len(dest):
 207                 n = p - len(dest) + 1
 208                 dest.extend([None] * n)
 209 
 210             if i < len(path) - 1:
 211                 dest = dest[p]
 212             else:
 213                 # finally put the value given in its intended place
 214                 dest[p] = value
 215             continue
 216 
 217         # handle object-key access
 218         if isinstance(p, str):
 219             if not isinstance(dest, dict):
 220                 raise ValueError(f'can\'t key a {type(dest)}, only objects')
 221 
 222             # expand dictionary to include the key given, if needed
 223             if not p in dest and i < len(path) - 1:
 224                 k = path[i + 1]
 225                 if isinstance(k, int):
 226                     dest[p] = []
 227                 elif isinstance(k, str):
 228                     dest[p] = {}
 229 
 230             if i < len(path) - 1:
 231                 dest = dest[p]
 232             else:
 233                 # finally put the value given in its intended place
 234                 dest[p] = value
 235             continue
 236 
 237         # this should never happen
 238         raise ValueError(f'invalid key/index type {type(p)}')
 239 
 240 
 241 def convert(w, src, color_mode: bool) -> None:
 242     '''This func is where all the recursive output-action starts/happens.'''
 243 
 244     # data holds the repeatedly-updated `dumpable` value for the JSON-output
 245     data = None
 246 
 247     for line in src:
 248         if color_mode:
 249             # handle colored `gron`, by ignoring all ANSI-styles on the line
 250             line = sub(ansi_re, '', line)
 251 
 252         # ignore empty(ish) lines
 253         if line.strip() == '':
 254             continue
 255 
 256         path, value = parse_gron_line(line)
 257         # continue
 258 
 259         # handle top-level assignment
 260         if len(path) == 1 and path[0] == 'json':
 261             data = value
 262             continue
 263 
 264         # handle other updates
 265         update(data, path, value)
 266 
 267     # emit output as valid JSON
 268     dump(data, w, indent=2)
 269     w.write('\n')
 270 
 271 
 272 def seems_url(s: str) -> bool:
 273     for prot in ('https://', 'http://', 'file://', 'ftp://', 'data:'):
 274         if s.startswith(prot):
 275             return True
 276     return False
 277 
 278 
 279 # handle leading options
 280 start_args = 1
 281 color_mode = False
 282 while start_args < len(argv) and argv[start_args].startswith('-'):
 283     l = argv[start_args].lstrip('-').lower()
 284     if l in ('c', 'color'):
 285         color_mode = True
 286         start_args += 1
 287         continue
 288     if l in ('m'):
 289         color_mode = False
 290         start_args += 1
 291         continue
 292     break
 293 args = argv[start_args:]
 294 
 295 try:
 296     stdout.reconfigure(newline='\n', encoding='utf-8')
 297 
 298     if len(args) == 0:
 299         stdin.reconfigure(encoding='utf-8')
 300         convert(stdout, stdin, color_mode)
 301     elif len(args) == 1:
 302         name = args[0]
 303         if name == '-':
 304             stdin.reconfigure(encoding='utf-8')
 305             convert(stdout, stdin, color_mode)
 306         elif seems_url(name):
 307             with urlopen(name) as inp:
 308                 convert(stdout, inp, color_mode)
 309         else:
 310             with open(name, encoding='utf-8') as inp:
 311                 convert(stdout, inp, color_mode)
 312     else:
 313         raise ValueError('multiple inputs not allowed')
 314 except (BrokenPipeError, KeyboardInterrupt):
 315     # quit quietly, instead of showing a confusing error message
 316     stderr.flush()
 317     stderr.close()
 318 except Exception as e:
 319     print(f'\x1b[31m{e}\x1b[0m', file=stderr)
 320     exit(1)