File: json2.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./json2 ./json2.c
  29 */
  30 
  31 #include <ctype.h>
  32 #include <stdarg.h>
  33 #include <stdbool.h>
  34 #include <stdint.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <string.h>
  38 
  39 #ifdef _WIN32
  40 #include <fcntl.h>
  41 #include <windows.h>
  42 #endif
  43 
  44 #ifdef RED_ERRORS
  45 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  46 #ifdef __APPLE__
  47 #define ERROR_STYLE "\x1b[31m"
  48 #endif
  49 #define RESET_STYLE "\x1b[0m"
  50 #else
  51 #define ERROR_STYLE
  52 #define RESET_STYLE
  53 #endif
  54 
  55 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  56 
  57 #ifndef IBUF_SIZE
  58 #define IBUF_SIZE (32 * 1024)
  59 #endif
  60 
  61 #ifndef OBUF_SIZE
  62 #define OBUF_SIZE (8 * 1024)
  63 #endif
  64 
  65 const char* info = ""
  66 "json2 [options...] [file...]\n"
  67 "\n"
  68 "\n"
  69 "JSON-2 converts/fixes JSON/pseudo-JSON input into indented multi-line JSON\n"
  70 "which uses 2 spaces for each indentation level.\n"
  71 "\n"
  72 "Besides formatting JSON, this tool also adapts almost-JSON input into\n"
  73 "valid JSON, since it\n"
  74 "\n"
  75 "    - ignores both rest-of-line and multi-line comments\n"
  76 "    - ignores extra/trailing commas in arrays and objects\n"
  77 "    - turns single-quoted strings/keys into double-quoted strings\n"
  78 "    - double-quotes unquoted object keys\n"
  79 "    - changes \\x 2-hex-digit into \\u 4-hex-digit string-escapes\n"
  80 "\n"
  81 "All options available can either start with a single or a double-dash\n"
  82 "\n"
  83 "    -h        show this help message\n"
  84 "    -help     show this help message\n"
  85 "";
  86 
  87 typedef struct j2_maker {
  88     FILE* in;
  89     FILE* out;
  90 
  91     unsigned char* ibuf;
  92     size_t ilen; // how many bytes are being used in the input buffer
  93     size_t icap; // the input buffer's capacity
  94     size_t ipos; // the current position in the input buffer
  95 
  96     size_t line; // the current line, used to show useful error messages
  97     size_t pos;  // the position in the current line, for error messages
  98 
  99     unsigned char* obuf;
 100     size_t ocap; // the output buffer's capacity
 101     size_t opos; // the current position in the output buffer
 102 
 103     ssize_t level; // the current indentation/nesting level
 104 
 105     int current;
 106     int next;
 107 } j2_maker;
 108 
 109 // advance_reader_pos helps func read_byte do its job
 110 static inline void advance_reader_pos(j2_maker* r, unsigned char b) {
 111     r->ipos++;
 112     if (b == '\n') {
 113         r->line++;
 114         r->pos = 1;
 115     } else {
 116         r->pos++;
 117     }
 118 }
 119 
 120 // read_byte does as it says: check its return for the value EOF, before
 121 // using it as the next byte
 122 static inline int read_byte(j2_maker* r) {
 123     if (r->ipos < r->ilen) {
 124         // inside current chunk
 125         const unsigned char b = r->ibuf[r->ipos];
 126         advance_reader_pos(r, b);
 127         return b;
 128     }
 129 
 130     // need to read the next block
 131     r->ipos = 0;
 132     r->ilen = fread(r->ibuf, sizeof(unsigned char), r->icap, r->in);
 133     if (r->ilen > 0) {
 134         const unsigned char b = r->ibuf[r->ipos];
 135         advance_reader_pos(r, b);
 136         return b;
 137     }
 138 
 139     // reached the end of data
 140     return EOF;
 141 }
 142 
 143 // advance is used in most of the code, instead of calling read_byte directly
 144 static inline void advance(j2_maker* r) {
 145     r->current = r->next;
 146     r->next = read_byte(r);
 147 }
 148 
 149 void fail(j2_maker* m, int code, const char* msg);
 150 
 151 void skip_line(j2_maker* r) {
 152     while (true) {
 153         advance(r);
 154         const int lead = r->current;
 155 
 156         if (lead == EOF) {
 157             break;
 158         }
 159 
 160         if (lead == '\n') {
 161             advance(r);
 162             break;
 163         }
 164     }
 165 }
 166 
 167 void skip_multiline_comment(j2_maker* r) {
 168     unsigned char prev = 0;
 169 
 170     while (true) {
 171         advance(r);
 172         const int lead = r->current;
 173 
 174         if (lead == EOF) {
 175             break;
 176         }
 177 
 178         if (prev == '*' && lead == '/') {
 179             advance(r);
 180             break;
 181         }
 182 
 183         prev = (unsigned char)lead;
 184     }
 185 }
 186 
 187 void skip_comment(j2_maker* r) {
 188     int lead = r->current;
 189 
 190     if (lead == '#') {
 191         skip_line(r);
 192         return;
 193     }
 194 
 195     if (lead != '/') {
 196         fail(r, 1, "expected a slash to start comments");
 197     }
 198 
 199     advance(r);
 200     lead = r->current;
 201 
 202     if (lead == '/') {
 203         skip_line(r);
 204         return;
 205     }
 206 
 207     if (lead == '*') {
 208         skip_multiline_comment(r);
 209         return;
 210     }
 211 
 212     fail(r, 1, "expected `//` or `/*` to start comments");
 213 }
 214 
 215 static inline void seek_token(j2_maker* r) {
 216     while (true) {
 217         const int lead = r->current;
 218 
 219         if (lead != EOF && lead <= ' ') {
 220             advance(r);
 221             continue;
 222         }
 223 
 224         if (lead == '/' || lead == '#') {
 225             skip_comment(r);
 226             continue;
 227         }
 228 
 229         break;
 230     }
 231 }
 232 
 233 bool starts_with_bom(const unsigned char* b, const size_t n) {
 234     return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf);
 235 }
 236 
 237 void restart_state(j2_maker* m, FILE* w, FILE* r) {
 238     m->in = r;
 239     m->ilen = 0;
 240     m->ipos = 0;
 241 
 242     m->out = w;
 243     m->opos = 0;
 244 
 245     m->line = 1;
 246     m->pos = 1;
 247 
 248     m->current = EOF;
 249     m->next = EOF;
 250 
 251     m->current = read_byte(m);
 252     if (m->current == EOF) {
 253         return;
 254     }
 255     m->next = read_byte(m);
 256 
 257     m->level = 0;
 258 
 259     // skip leading UTF-8 BOM (byte-order mark), if present
 260     if (starts_with_bom(m->ibuf, m->ilen)) {
 261         // a UTF-8 BOM has 3 bytes
 262         for (size_t i = 0; i < 3 && m->current != EOF; i++) {
 263             advance(m);
 264         }
 265     }
 266 }
 267 
 268 void write_byte(j2_maker* m, unsigned char b) {
 269     if (m->opos < m->ocap) {
 270         m->obuf[m->opos++] = b;
 271         return;
 272     }
 273 
 274     fwrite(m->obuf, 1, m->ocap, m->out);
 275     m->obuf[0] = b;
 276     m->opos = 1;
 277 }
 278 
 279 // write_bytes does as it says, minimizing the number of calls to fwrite
 280 void write_bytes(j2_maker* m, const unsigned char* src, size_t len) {
 281     const size_t rem = m->ocap - m->opos;
 282     if (len < rem) {
 283         memcpy(m->obuf + m->opos, src, len);
 284         m->opos += len;
 285         return;
 286     }
 287 
 288     for (size_t i = 0; i < len; i++) {
 289         write_byte(m, src[i]);
 290     }
 291 }
 292 
 293 void write_spaces(j2_maker* m, ssize_t n) {
 294     const unsigned char spaces[32] = "                                ";
 295     while (n > sizeof(spaces)) {
 296         write_bytes(m, spaces, sizeof(spaces));
 297         n -= sizeof(spaces);
 298     }
 299     if (n > 0) {
 300         write_bytes(m, spaces, n);
 301     }
 302 }
 303 
 304 static inline void indent(j2_maker* m) {
 305     write_spaces(m, 2 * m->level);
 306 }
 307 
 308 void flush(j2_maker* m) {
 309     if (m->opos > 0) {
 310         fwrite(m->obuf, 1, m->opos, m->out);
 311     }
 312     m->opos = 0;
 313     fflush(m->out);
 314 }
 315 
 316 // https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/
 317 
 318 static inline bool check_2_byte_rune(int a, int b) {
 319     return (0xc2 <= a && a <= 0xdf) && (0x80 <= b && b <= 0xbf);
 320 }
 321 
 322 bool check_3_byte_rune(int a, int b, int c) {
 323     return (
 324         (a == 0xe0) &&
 325         (0xa0 <= b && b <= 0xbf) &&
 326         (0x80 <= c && c <= 0xbf)
 327     ) || (
 328         (0xe1 <= a && a <= 0xec) &&
 329         (0x80 <= b && b <= 0xbf) &&
 330         (0x80 <= c && c <= 0xbf)
 331     ) || (
 332         (a == 0xed) &&
 333         (0x80 <= b && b <= 0x9f) &&
 334         (0x80 <= c && c <= 0xbf)
 335     ) || (
 336         (a == 0xee || a == 0xef) &&
 337         (0x80 <= b && b <= 0xbf) &&
 338         (0x80 <= c && c <= 0xbf)
 339     );
 340 }
 341 
 342 bool check_4_byte_rune(int a, int b, int c, int d) {
 343     return (
 344         (a == 0xf0) &&
 345         (0x90 <= b && b <= 0xbf) &&
 346         (0x80 <= c && c <= 0xbf) &&
 347         (0x80 <= d && d <= 0xbf)
 348     ) || (
 349         (a == 0xf1 || a == 0xf3) &&
 350         (0x80 <= b && b <= 0xbf) &&
 351         (0x80 <= c && c <= 0xbf) &&
 352         (0x80 <= d && d <= 0xbf)
 353     ) || (
 354         (a == 0xf4) &&
 355         (0x80 <= b && b <= 0xbf) &&
 356         (0x80 <= c && c <= 0x8f) &&
 357         (0x80 <= d && d <= 0xbf)
 358     );
 359 }
 360 
 361 // write_replacement_char is the recommended action to handle invalid bytes
 362 void write_replacement_char(j2_maker* m) {
 363     write_byte(m, 0xef);
 364     write_byte(m, 0xbf);
 365     write_byte(m, 0xbd);
 366 }
 367 
 368 void handle_invalid_rune(j2_maker* m) {
 369     // fail(m, 1, "invalid unicode value");
 370     write_replacement_char(m);
 371 }
 372 
 373 // write_rune is following the table at https://en.wikipedia.org/wiki/UTF-8
 374 void write_rune(j2_maker* m, uint32_t rune) {
 375     if (rune < (1 << 7)) {
 376         write_byte(m, rune);
 377         return;
 378     }
 379 
 380     if (rune < (1 << (5 + 6))) {
 381         const int a = 0b11000000 | (rune >> 6);
 382         const int b = 0b10000000 | (rune & 0b00111111);
 383         if (check_2_byte_rune(a, b)) {
 384             write_byte(m, a);
 385             write_byte(m, b);
 386         } else {
 387             write_replacement_char(m);
 388         }
 389         return;
 390     }
 391 
 392     if (rune < (1 << (4 + 6 + 6))) {
 393         const int a = 0b11100000 | (rune >> 12);
 394         const int b = 0b10000000 | ((rune >> 6) & 0b00111111);
 395         const int c = 0b10000000 | (rune & 0b00111111);
 396         if (check_3_byte_rune(a, b, c)) {
 397             write_byte(m, a);
 398             write_byte(m, b);
 399             write_byte(m, c);
 400         } else {
 401             write_replacement_char(m);
 402         }
 403         return;
 404     }
 405 
 406     if (rune < (1 << (3 + 6 + 6 + 6))) {
 407         const int a = 0b11110000 | (rune >> 18);
 408         const int b = 0b10000000 | ((rune >> 12) & 0b00111111);
 409         const int c = 0b10000000 | ((rune >> 6) & 0b00111111);
 410         const int d = 0b10000000 | (rune & 0b00111111);
 411         if (check_4_byte_rune(a, b, c, d)) {
 412             write_byte(m, a);
 413             write_byte(m, b);
 414             write_byte(m, c);
 415             write_byte(m, d);
 416         } else {
 417             write_replacement_char(m);
 418         }
 419         return;
 420     }
 421 
 422     write_replacement_char(m);
 423 }
 424 
 425 void copy_utf8_rune(j2_maker* m) {
 426     const int a = m->current;
 427 
 428     if (a == EOF) {
 429         return;
 430     }
 431 
 432     // handle 1-byte runes
 433     if (a < 128) {
 434         write_byte(m, a);
 435         return;
 436     }
 437 
 438     advance(m);
 439     const int b = m->current;
 440 
 441     if (b == EOF) {
 442         handle_invalid_rune(m);
 443         return;
 444     }
 445 
 446     // handle 2-byte runes
 447     if (check_2_byte_rune(a, b)) {
 448         write_byte(m, a);
 449         write_byte(m, b);
 450         return;
 451     }
 452 
 453     advance(m);
 454     const int c = m->current;
 455 
 456     if (c == EOF) {
 457         handle_invalid_rune(m);
 458         return;
 459     }
 460 
 461     // handle 3-byte runes
 462     if (check_3_byte_rune(a, b, c)) {
 463         write_byte(m, a);
 464         write_byte(m, b);
 465         write_byte(m, c);
 466         return;
 467     }
 468 
 469     advance(m);
 470     const int d = m->current;
 471 
 472     if (d == EOF) {
 473         handle_invalid_rune(m);
 474         return;
 475     }
 476 
 477     // handle 4-byte runes
 478     if (check_4_byte_rune(a, b, c, d)) {
 479         write_byte(m, a);
 480         write_byte(m, b);
 481         write_byte(m, c);
 482         write_byte(m, d);
 483         return;
 484     }
 485 
 486     handle_invalid_rune(m);
 487 }
 488 
 489 // debug is available to diagnose any bug found
 490 void debug(j2_maker* m, const char* fmt, ...) {
 491     va_list args;
 492     va_start(args, fmt);
 493 
 494     if (m->in != stdin) {
 495         fclose(m->in);
 496     }
 497 
 498     write_byte(m, '\n');
 499 
 500     const unsigned long line = m->line;
 501     const unsigned long pos = m->pos;
 502     fprintf(stderr, "\x1b[46m\x1b[37mline %lu, pos %lu: ", line, pos);
 503     fprintf(stderr, fmt, args);
 504     fprintf(stderr, "\x1b[0m\n");
 505 
 506     va_end(args);
 507 
 508     exit(10);
 509 }
 510 
 511 // fail quits this app right after showing the error message given
 512 void fail(j2_maker* m, int code, const char* msg) {
 513     const unsigned long line = m->line;
 514     const unsigned long pos = m->pos;
 515 
 516     write_byte(m, '\n');
 517     flush(m);
 518     fprintf(stderr, ERROR_LINE("line %lu, pos %lu: %s"), line, pos, msg);
 519     exit(code);
 520 }
 521 
 522 bool demand_keyword(j2_maker* m, char* rest) {
 523     for (; rest[0] != 0; rest++) {
 524         const int lead = m->current;
 525         if (lead == EOF || lead != rest[0]) {
 526             return false;
 527         }
 528         advance(m);
 529     }
 530 
 531     return rest[0] == 0;
 532 }
 533 
 534 void handle_null(j2_maker* m) {
 535     if (!demand_keyword(m, "null")) {
 536         fail(m, 1, "expected `null` keyword");
 537     }
 538     write_bytes(m, (unsigned char*)"null", 4);
 539 }
 540 
 541 void handle_true(j2_maker* m) {
 542     if (!demand_keyword(m, "true")) {
 543         fail(m, 1, "expected `true` keyword");
 544     }
 545     write_bytes(m, (unsigned char*)"true", 4);
 546 }
 547 
 548 void handle_false(j2_maker* m) {
 549     if (!demand_keyword(m, "false")) {
 550         fail(m, 1, "expected `false` keyword");
 551     }
 552     write_bytes(m, (unsigned char*)"false", 5);
 553 }
 554 
 555 void handle_capital_none(j2_maker* m) {
 556     if (!demand_keyword(m, "None")) {
 557         fail(m, 1, "expected `None` keyword");
 558     }
 559     write_bytes(m, (unsigned char*)"null", 4);
 560 }
 561 
 562 void handle_capital_true(j2_maker* m) {
 563     if (!demand_keyword(m, "True")) {
 564         fail(m, 1, "expected `True` keyword");
 565     }
 566     write_bytes(m, (unsigned char*)"true", 4);
 567 }
 568 
 569 void handle_capital_false(j2_maker* m) {
 570     if (!demand_keyword(m, "False")) {
 571         fail(m, 1, "expected `False` keyword");
 572     }
 573     write_bytes(m, (unsigned char*)"false", 5);
 574 }
 575 
 576 void handle_digits(j2_maker* m) {
 577     if (!isdigit(m->current)) {
 578         fail(m, 1, "expected/missing digits");
 579     }
 580 
 581     while (isdigit(m->current)) {
 582         write_byte(m, m->current);
 583         advance(m);
 584     }
 585 }
 586 
 587 void handle_number(j2_maker* m) {
 588     handle_digits(m);
 589 
 590     const int lead = m->current;
 591 
 592     if (lead == '.') {
 593         write_byte(m, '.');
 594         advance(m);
 595 
 596         if (isdigit(m->current)) {
 597             handle_digits(m);
 598         } else {
 599             write_byte(m, '0');
 600         }
 601         return;
 602     }
 603 
 604     if (lead == 'e' || lead == 'E') {
 605         write_byte(m, lead);
 606         advance(m);
 607 
 608         if (m->current == '+') {
 609             advance(m);
 610         } else if (m->current == '-') {
 611             write_byte(m, '-');
 612             advance(m);
 613         }
 614 
 615         handle_digits(m);
 616     }
 617 }
 618 
 619 void handle_dot(j2_maker* m) {
 620     write_byte(m, '0');
 621     write_byte(m, '.');
 622     advance(m);
 623 
 624     if (!isdigit(m->current)) {
 625         fail(m, 1, "expected/missing digits after decimal dot");
 626     }
 627     handle_digits(m);
 628 }
 629 
 630 void handle_plus_number(j2_maker* m) {
 631     advance(m);
 632 
 633     if (m->current == '.') {
 634         handle_dot(m);
 635         return;
 636     }
 637     handle_number(m);
 638 }
 639 
 640 void handle_minus_number(j2_maker* m) {
 641     write_byte(m, '-');
 642     advance(m);
 643 
 644     if (m->current == '.') {
 645         handle_dot(m);
 646         return;
 647     }
 648     handle_number(m);
 649 }
 650 
 651 // decode_hex assumes valid hex digits, checked by func is_valid_hex
 652 uint32_t decode_hex(unsigned char hex) {
 653     if ('0' <= hex && hex <= '9') {
 654         return hex - '0';
 655     }
 656     if ('A' <= hex && hex <= 'F') {
 657         return hex - 'A' + 10;
 658     }
 659     if ('a' <= hex && hex <= 'f') {
 660         return hex - 'a' + 10;
 661     }
 662     return 0xffff;
 663 }
 664 
 665 static inline bool is_valid_hex(unsigned char b) {
 666     return false ||
 667         ('0' <= b && b <= '9') ||
 668         ('A' <= b && b <= 'F') ||
 669         ('a' <= b && b <= 'f');
 670 }
 671 
 672 // handle_low_char ensures characters whose ASCII codes are lower than spaces
 673 // are properly escaped for strings
 674 void handle_low_char(j2_maker* m, int c) {
 675     const char* hex = "0123456789ABCDEF";
 676 
 677     switch (c) {
 678     case '\t':
 679         write_byte(m, '\\');
 680         write_byte(m, 't');
 681         break;
 682     case '\n':
 683         write_byte(m, '\\');
 684         write_byte(m, 'n');
 685         break;
 686     case '\r':
 687         write_byte(m, '\\');
 688         write_byte(m, 'r');
 689         break;
 690     case '\b':
 691         write_byte(m, '\\');
 692         write_byte(m, 'b');
 693         break;
 694     case '\f':
 695         write_byte(m, '\\');
 696         write_byte(m, 'f');
 697         break;
 698     case '\v':
 699         write_byte(m, '\\');
 700         write_byte(m, 'v');
 701         break;
 702     default:
 703         write_byte(m, '\\');
 704         write_byte(m, 'u');
 705         write_byte(m, '0');
 706         write_byte(m, '0');
 707         write_byte(m, hex[c / 16]);
 708         write_byte(m, hex[c % 16]);
 709         break;
 710     }
 711 }
 712 
 713 void write_inner_string_hex_quad(j2_maker* m, const unsigned char quad[4]) {
 714     const uint32_t n = 0 +
 715         (decode_hex(quad[0]) << 12) +
 716         (decode_hex(quad[1]) << 8) +
 717         (decode_hex(quad[2]) << 4) +
 718         (decode_hex(quad[3]) << 0);
 719 
 720     switch (n) {
 721     case '"':
 722         write_byte(m, '\\');
 723         write_byte(m, '"');
 724         return;
 725     case '\\':
 726         write_byte(m, '\\');
 727         write_byte(m, '\\');
 728         return;
 729     }
 730 
 731     if (n >= ' ') {
 732         write_rune(m, n);
 733     } else {
 734         handle_low_char(m, n);
 735     }
 736 }
 737 
 738 void handle_hex_quad(j2_maker* m) {
 739     unsigned char quad[4];
 740     for (size_t i = 0; i < 4; i++) {
 741         advance(m);
 742         const int lead = m->current;
 743         if (lead == EOF) {
 744             fail(m, 1, "end of input before end of string");
 745         }
 746         if (is_valid_hex(lead)) {
 747             quad[i] = lead;
 748             continue;
 749         }
 750         fail(m, 1, "invalid hexadecimal digit in string");
 751     }
 752 
 753     write_inner_string_hex_quad(m, quad);
 754 }
 755 
 756 void handle_hex_pair(j2_maker* m) {
 757     unsigned char quad[4] = {'0', '0', '0', '0'};
 758     advance(m);
 759     const int a = m->current;
 760     advance(m);
 761     const int b = m->current;
 762     if (a == EOF || b == EOF) {
 763         fail(m, 1, "end of input before end of string");
 764     }
 765     if (!is_valid_hex(a) || !is_valid_hex(b)) {
 766         fail(m, 1, "invalid hexadecimal digit in string");
 767     }
 768 
 769     quad[2] = a;
 770     quad[3] = b;
 771     write_inner_string_hex_quad(m, quad);
 772 }
 773 
 774 void handle_string_escape(j2_maker* m, int c) {
 775     switch (c) {
 776     case '"':
 777     case '\\':
 778     case 'b':
 779     case 'f':
 780     case 'n':
 781     case 'r':
 782     case 't':
 783         write_byte(m, '\\');
 784         write_byte(m, c);
 785         break;
 786     case 'u':
 787         handle_hex_quad(m);
 788         break;
 789     case 'x':
 790         handle_hex_pair(m);
 791         break;
 792     case '\'':
 793         write_byte(m, '\'');
 794         break;
 795     default:
 796         write_byte(m, m->current);
 797         break;
 798     }
 799 }
 800 
 801 void handle_string(j2_maker* m) {
 802     const unsigned char quote = m->current;
 803     bool escaped = false;
 804 
 805     write_byte(m, '"');
 806 
 807     while (true) {
 808         advance(m);
 809 
 810         int c = m->current;
 811         if (c == EOF) {
 812             fail(m, 1, "input ended before string was close-quoted");
 813         }
 814 
 815         if (escaped) {
 816             handle_string_escape(m, c);
 817             escaped = false;
 818             continue;
 819         }
 820 
 821         switch (c) {
 822         case '\\':
 823             escaped = true;
 824             break;
 825         default:
 826             if (c == quote) {
 827                 write_byte(m, '"');
 828                 advance(m);
 829                 return;
 830             }
 831 
 832             // write_byte(m, c);
 833             if (c < ' ') {
 834                 handle_low_char(m, c);
 835             } else {
 836                 copy_utf8_rune(m);
 837             }
 838             break;
 839         }
 840     }
 841 }
 842 
 843 void handle_token(j2_maker* m, ssize_t lead_level);
 844 
 845 void handle_array(j2_maker* m) {
 846     size_t items = 0;
 847     const unsigned char end = m->current == '[' ? ']' : ')';
 848     m->level++;
 849     write_byte(m, '[');
 850     advance(m);
 851 
 852     while (true) {
 853         seek_token(m);
 854         const int lead = m->current;
 855 
 856         if (lead == EOF) {
 857             fail(m, 1, "unclosed array");
 858         }
 859 
 860         if (lead == ',') {
 861             advance(m);
 862             continue;
 863         }
 864 
 865         if (lead == end) {
 866             m->level--;
 867             if (items > 0) {
 868                 write_byte(m, '\n');
 869                 indent(m);
 870             }
 871             write_byte(m, ']');
 872             advance(m);
 873             return;
 874         }
 875 
 876         if (items > 0) {
 877             write_byte(m, ',');
 878         }
 879         write_byte(m, '\n');
 880         if (feof(m->out)) {
 881             return;
 882         }
 883         handle_token(m, m->level);
 884         items++;
 885     }
 886 }
 887 
 888 void handle_unquoted_key(j2_maker* m) {
 889     write_byte(m, '"');
 890 
 891     while (true) {
 892         int c = m->current;
 893         if (c == EOF) {
 894             fail(m, 1, "input ended with an object key");
 895         }
 896 
 897         write_byte(m, c);
 898         advance(m);
 899 
 900         c = m->current;
 901         if (!isalpha(c) && !isdigit(c) && c != '_') {
 902             break;
 903         }
 904     }
 905 
 906     write_byte(m, '"');
 907 }
 908 
 909 void handle_object(j2_maker* m) {
 910     size_t items = 0;
 911     m->level++;
 912     write_byte(m, '{');
 913     advance(m);
 914 
 915     while (true) {
 916         seek_token(m);
 917         int lead = m->current;
 918 
 919         if (lead == EOF) {
 920             fail(m, 1, "unclosed object");
 921         }
 922 
 923         if (lead == ',') {
 924             advance(m);
 925             continue;
 926         }
 927 
 928         if (lead == '}') {
 929             m->level--;
 930             if (items > 0) {
 931                 write_byte(m, '\n');
 932                 indent(m);
 933             }
 934             write_byte(m, '}');
 935             advance(m);
 936             return;
 937         }
 938 
 939         if (feof(m->out)) {
 940             return;
 941         }
 942 
 943         if (lead == '"' || lead == '\'') {
 944             if (items > 0) {
 945                 write_byte(m, ',');
 946             }
 947             write_byte(m, '\n');
 948             indent(m);
 949             handle_string(m);
 950         } else if (isalpha(lead) || lead == '_') {
 951             if (items > 0) {
 952                 write_byte(m, ',');
 953             }
 954             write_byte(m, '\n');
 955             indent(m);
 956             handle_unquoted_key(m);
 957         } else {
 958             fail(m, 1, "only strings or identifiers can be object keys");
 959         }
 960 
 961         seek_token(m);
 962         lead = m->current;
 963 
 964         if (lead == EOF) {
 965             fail(m, 1, "input ended after object-key and before value");
 966         }
 967 
 968         if (lead != ':') {
 969             fail(m, 1, "a `:` must follow all object keys");
 970         }
 971 
 972         write_byte(m, ':');
 973         write_byte(m, ' ');
 974         advance(m);
 975 
 976         seek_token(m);
 977         if (m->current == EOF) {
 978             fail(m, 1, "input ended after a `:` following an object-key");
 979         }
 980 
 981         handle_token(m, 0);
 982         items++;
 983     }
 984 }
 985 
 986 // dispatch ties leading bytes/chars in tokens to the funcs which handle them
 987 void (*dispatch[256])() = {
 988     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 989     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 990     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 991     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 992     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 993     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 994     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 995     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 996     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 997     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 998     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 999     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1000     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1001     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1002     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1003     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1004     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1005     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1006     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1007     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1008     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1009     NULL, NULL, NULL, NULL,
1010 };
1011 
1012 void handle_token(j2_maker* m, ssize_t lead_level) {
1013     write_spaces(m, 2 * lead_level);
1014     dispatch[m->current](m);
1015 }
1016 
1017 // handle_invalid_token shows an error message and quits the app right after
1018 void handle_invalid_token(j2_maker* m) {
1019     char msg[64];
1020     unsigned char c = (unsigned char)m->current;
1021     sprintf(msg, "%c (%d): invalid token", c, c);
1022     fail(m, 1, msg);
1023 }
1024 
1025 void handle_input(FILE* src) {
1026     unsigned char ibuf[IBUF_SIZE];
1027     unsigned char obuf[OBUF_SIZE];
1028 
1029     j2_maker m;
1030     m.ibuf = ibuf;
1031     m.icap = sizeof(ibuf);
1032     m.obuf = obuf;
1033     m.ocap = sizeof(obuf);
1034     restart_state(&m, stdout, src);
1035 
1036     // ignore leading whitespace/comment bytes, if present
1037     seek_token(&m);
1038 
1039     if (m.current == EOF) {
1040         fail(&m, 1, "empty input isn't valid JSON");
1041     }
1042 
1043     handle_token(&m, 0);
1044     write_byte(&m, '\n');
1045     flush(&m);
1046 
1047     // ignore trailing whitespace/comment bytes, if present
1048     seek_token(&m);
1049 
1050     // ignore trailing semicolon, if present
1051     if (m.current == ';') {
1052         advance(&m);
1053         // ignore trailing whitespace/comment bytes, if present
1054         seek_token(&m);
1055     }
1056 
1057     if (!feof(src) || m.current != EOF) {
1058         fail(&m, 1, "unexpected trailing JSON data");
1059     }
1060 }
1061 
1062 bool is_help_option(const char* s) {
1063     return (s[0] == '-' && s[1] != 0) && (
1064         strcmp(s, "-h") == 0 ||
1065         strcmp(s, "--h") == 0 ||
1066         strcmp(s, "-help") == 0 ||
1067         strcmp(s, "--help") == 0
1068     );
1069 }
1070 
1071 // run returns the error code
1072 int run(int nargs, char** args) {
1073     if (nargs > 0 && strcmp(args[0], "--") == 0) {
1074         nargs--;
1075         args++;
1076     }
1077 
1078     if (nargs > 1) {
1079         const char* msg = "can't use more than 1 named input";
1080         fprintf(stderr, ERROR_LINE("%s"), msg);
1081         return 1;
1082     }
1083 
1084     // use stdin when not given a filepath
1085     if (nargs == 0 || strcmp(args[0], "") == 0 || strcmp(args[0], "-") == 0) {
1086         handle_input(stdin);
1087         return 0;
1088     }
1089 
1090     const char* path = args[0];
1091     FILE* f = fopen(path, "rb");
1092     if (f == NULL) {
1093         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
1094         return 1;
1095     }
1096 
1097     handle_input(f);
1098     fclose(f);
1099 
1100     return 0;
1101 }
1102 
1103 int main(int argc, char** argv) {
1104 #ifdef _WIN32
1105     setmode(fileno(stdin), O_BINARY);
1106     // ensure output lines end in LF instead of CRLF on windows
1107     setmode(fileno(stdout), O_BINARY);
1108     setmode(fileno(stderr), O_BINARY);
1109 #endif
1110 
1111     if (argc > 1 && is_help_option(argv[1])) {
1112         printf("%s", info);
1113         return 0;
1114     }
1115 
1116     // the dispatch table starts as all null function-pointers
1117     for (size_t i = 0; i < sizeof(dispatch) / sizeof(dispatch[0]); i++) {
1118         dispatch[i] = handle_invalid_token;
1119     }
1120 
1121     for (size_t i = '0'; i <= '9'; i++) {
1122         dispatch[i] = handle_number;
1123     }
1124 
1125     dispatch['n'] = handle_null;
1126     dispatch['t'] = handle_true;
1127     dispatch['f'] = handle_false;
1128     dispatch['N'] = handle_capital_none;
1129     dispatch['T'] = handle_capital_true;
1130     dispatch['F'] = handle_capital_false;
1131     dispatch['.'] = handle_dot;
1132     dispatch['+'] = handle_plus_number;
1133     dispatch['-'] = handle_minus_number;
1134     dispatch['"'] = handle_string;
1135     dispatch['\''] = handle_string;
1136     dispatch['['] = handle_array;
1137     dispatch['('] = handle_array;
1138     dispatch['{'] = handle_object;
1139 
1140     // enable full/block-buffering for standard output
1141     setvbuf(stdout, NULL, _IOFBF, 0);
1142 
1143     return run(argc - 1, argv + 1) == 0 ? 0 : 1;
1144 }