File: ungron.py 1 #!/usr/bin/python3 2 3 # The MIT License (MIT) 4 # 5 # Copyright © 2024 pacman64 6 # 7 # Permission is hereby granted, free of charge, to any person obtaining a copy 8 # of this software and associated documentation files (the “Software”), to deal 9 # in the Software without restriction, including without limitation the rights 10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 # copies of the Software, and to permit persons to whom the Software is 12 # furnished to do so, subject to the following conditions: 13 # 14 # The above copyright notice and this permission notice shall be included in 15 # all copies or substantial portions of the Software. 16 # 17 # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 # SOFTWARE. 24 25 26 # ungron [options...] [filepath/URI...] 27 # 28 # UNGRON converts unstyled `gron` (GRep jsON) text back into valid JSON. 29 # Unlike the original `gron`, there's no sort-mode. 30 # 31 # For some context, the original `gron` is at 32 # https://github.com/tomnomnom/gron 33 # 34 # Options, where leading double-dashes are also allowed: 35 # 36 # -h show this help message 37 # -help show this help message 38 # 39 # -m monochrome (default), enables unstyled input-mode 40 # -c color, enables ANSI-styled input-mode 41 # -color enables ANSI-styled input-mode 42 43 44 from json import dump, loads 45 from re import compile as compile_re, sub 46 from sys import argv, exit, stderr, stdin, stdout 47 from typing import Any, List, Tuple, Union 48 from urllib.request import urlopen 49 50 51 # info is the help message shown when asked to 52 info = ''' 53 ungron [options...] [filepath/URI...] 54 55 UNGRON converts unstyled `gron` (GRep jsON) text back into valid JSON. 56 Unlike the original `gron`, there's no sort-mode. 57 58 For some context, the original `gron` is at 59 https://github.com/tomnomnom/gron 60 61 Options, where leading double-dashes are also allowed: 62 63 -h show this help message 64 -help show this help message 65 66 -m monochrome (default), enables unstyled input-mode 67 -c color, enables ANSI-styled input-mode 68 -color enables ANSI-styled input-mode 69 '''.strip() 70 71 # handle standard help cmd-line options, quitting right away in that case 72 if len(argv) == 2 and argv[1] in ('-h', '--h', '-help', '--help'): 73 print(info, file=stderr) 74 exit(0) 75 76 77 # ansi_re matches `gron`-style ANSI-codes, so they can be removed 78 ansi_re = compile_re('\x1b\\[[^m]+m') 79 80 81 def parse_gron_line(line: str) -> (List[Union[int, str]], Any): 82 path = [] 83 end_path = -1 84 start_value = -1 85 end_value = -1 86 87 if line.endswith(';\n'): 88 end_value = len(line) - 2 89 elif line.endswith(';\r\n'): 90 end_value = len(line) - 3 91 elif line.endswith(';'): 92 end_value = len(line) - 1 93 94 if end_value < 0: 95 line = line.rstrip('\r\n').rstrip('\n') 96 msg = f'invalid gron line, without trailing semicolon: {line}' 97 raise ValueError(msg) 98 99 start = 0 100 cur = 0 101 while cur < end_value: 102 c = line[cur] 103 104 # handle start/end of unquoted object keys 105 if c == '.': 106 # some dots appear right after `]`s, so avoid empty keys 107 # when that's the case 108 if cur > start + 1: 109 path.append(line[start:cur]) 110 cur += 1 111 start = cur 112 continue 113 114 # handle start of quoted object keys and/or array indices 115 if c == '[': 116 # some dots appear right after `]`s, so avoid empty keys 117 # when that's the case 118 if cur > start + 1: 119 path.append(line[start:cur]) 120 cur += 1 121 i = find_end_bracket(line, cur) 122 if i < 1: 123 raise ValueError(f'invalid gron line {line}') 124 125 p = loads(line[cur:i]) 126 path.append(p) 127 cur = i + 1 128 start = cur 129 continue 130 131 # detect the end of the path part 132 if c == ' ' and line.startswith(' = ', cur): 133 end_path = cur 134 start_value = end_path + len(' = ') 135 break 136 if c == '=': 137 end_path = cur 138 start_value = end_path + 1 139 break 140 141 # give useful error message if input is ANSI-styled 142 if c == '\x1b' and line.startswith('\x1b[', cur): 143 raise ValueError('invalid gron line has ANSI-code styles in it') 144 145 # go over symbols for unquoted object keys 146 cur += 1 147 148 if end_path < 0: 149 line = line.rstrip('\r\n').rstrip('\n') 150 raise ValueError(f'invalid gron line, missing `=`: {line}') 151 152 # don't forget the path's last item 153 if cur > start + 1: 154 path.append(line[start:cur]) 155 156 return path, loads(line[start_value:end_value]) 157 158 159 def find_end_bracket(line: str, start: int) -> int: 160 escaped = False 161 quoted = False 162 163 for i in range(start, len(line)): 164 c = line[i] 165 166 if escaped: 167 escaped = False 168 continue 169 if c == '\\': 170 escaped = True 171 continue 172 173 if c == '"': 174 quoted = not quoted 175 continue 176 if c == ']': 177 if not quoted: 178 return i 179 return -1 180 181 182 def update(dest: Any, path: List[Union[int, str]], value: Any) -> None: 183 if len(path) == 0: 184 raise ValueError('empty path in gron line') 185 186 for i, p in enumerate(path): 187 # ensure leading key is always `json` 188 if i == 0: 189 if p != 'json': 190 raise ValueError(f'leading key is {p}, instead of "json"') 191 continue 192 193 # handle array-index access 194 if isinstance(p, int): 195 if not isinstance(dest, list): 196 raise ValueError(f'can\'t index a {type(dest)}, only lists') 197 198 # forbid explicit negative indices, which should never appear 199 # in `gron`-style output anyway 200 if p < 0: 201 raise ValueError(f'invalid index {p}') 202 203 # expand array to include the index given, if needed 204 if p == len(dest): 205 dest.append(None) 206 elif p >= len(dest): 207 n = p - len(dest) + 1 208 dest.extend([None] * n) 209 210 if i < len(path) - 1: 211 dest = dest[p] 212 else: 213 # finally put the value given in its intended place 214 dest[p] = value 215 continue 216 217 # handle object-key access 218 if isinstance(p, str): 219 if not isinstance(dest, dict): 220 raise ValueError(f'can\'t key a {type(dest)}, only objects') 221 222 # expand dictionary to include the key given, if needed 223 if not p in dest and i < len(path) - 1: 224 k = path[i + 1] 225 if isinstance(k, int): 226 dest[p] = [] 227 elif isinstance(k, str): 228 dest[p] = {} 229 230 if i < len(path) - 1: 231 dest = dest[p] 232 else: 233 # finally put the value given in its intended place 234 dest[p] = value 235 continue 236 237 # this should never happen 238 raise ValueError(f'invalid key/index type {type(p)}') 239 240 241 def convert(w, src, color_mode: bool) -> None: 242 '''This func is where all the recursive output-action starts/happens.''' 243 244 # data holds the repeatedly-updated `dumpable` value for the JSON-output 245 data = None 246 247 for line in src: 248 if color_mode: 249 # handle colored `gron`, by ignoring all ANSI-styles on the line 250 line = sub(ansi_re, '', line) 251 252 # ignore empty(ish) lines 253 if line.strip() == '': 254 continue 255 256 path, value = parse_gron_line(line) 257 # continue 258 259 # handle top-level assignment 260 if len(path) == 1 and path[0] == 'json': 261 data = value 262 continue 263 264 # handle other updates 265 update(data, path, value) 266 267 # emit output as valid JSON 268 dump(data, w, indent=2) 269 w.write('\n') 270 271 272 def seems_url(s: str) -> bool: 273 for prot in ('https://', 'http://', 'file://', 'ftp://', 'data:'): 274 if s.startswith(prot): 275 return True 276 return False 277 278 279 # handle leading options 280 start_args = 1 281 color_mode = False 282 while start_args < len(argv) and argv[start_args].startswith('-'): 283 l = argv[start_args].lstrip('-').lower() 284 if l in ('c', 'color'): 285 color_mode = True 286 start_args += 1 287 continue 288 if l in ('m'): 289 color_mode = False 290 start_args += 1 291 continue 292 break 293 args = argv[start_args:] 294 295 try: 296 stdout.reconfigure(newline='\n', encoding='utf-8') 297 298 if len(args) == 0: 299 stdin.reconfigure(encoding='utf-8') 300 convert(stdout, stdin, color_mode) 301 elif len(args) == 1: 302 name = args[0] 303 if name == '-': 304 stdin.reconfigure(encoding='utf-8') 305 convert(stdout, stdin, color_mode) 306 elif seems_url(name): 307 with urlopen(name) as inp: 308 convert(stdout, inp, color_mode) 309 else: 310 with open(name, encoding='utf-8') as inp: 311 convert(stdout, inp, color_mode) 312 else: 313 raise ValueError('multiple inputs not allowed') 314 except (BrokenPipeError, KeyboardInterrupt): 315 # quit quietly, instead of showing a confusing error message 316 stderr.flush() 317 stderr.close() 318 except Exception as e: 319 print(f'\x1b[31m{e}\x1b[0m', file=stderr) 320 exit(1)