File: json2.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./json2 ./json2.c
  29 */
  30 
  31 #include <ctype.h>
  32 #include <stdarg.h>
  33 #include <stdbool.h>
  34 #include <stdint.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <string.h>
  38 
  39 #ifdef _WIN32
  40 #include <fcntl.h>
  41 #include <windows.h>
  42 #endif
  43 
  44 #ifdef RED_ERRORS
  45 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  46 #ifdef __APPLE__
  47 #define ERROR_STYLE "\x1b[31m"
  48 #endif
  49 #define RESET_STYLE "\x1b[0m"
  50 #else
  51 #define ERROR_STYLE
  52 #define RESET_STYLE
  53 #endif
  54 
  55 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  56 
  57 #ifndef IBUF_SIZE
  58 #define IBUF_SIZE (32 * 1024)
  59 #endif
  60 
  61 #ifndef OBUF_SIZE
  62 #define OBUF_SIZE (8 * 1024)
  63 #endif
  64 
  65 const char* info = ""
  66 "json2 [options...] [file...]\n"
  67 "\n"
  68 "\n"
  69 "JSON-2 converts/fixes JSON/pseudo-JSON input into indented multi-line JSON\n"
  70 "which uses 2 spaces for each indentation level.\n"
  71 "\n"
  72 "Besides formatting JSON, this tool also adapts almost-JSON input into\n"
  73 "valid JSON, since it\n"
  74 "\n"
  75 "    - ignores both rest-of-line and multi-line comments\n"
  76 "    - ignores extra/trailing commas in arrays and objects\n"
  77 "    - turns single-quoted strings/keys into double-quoted strings\n"
  78 "    - double-quotes unquoted object keys\n"
  79 "    - changes \\x 2-hex-digit into \\u 4-hex-digit string-escapes\n"
  80 "\n"
  81 "All options available can either start with a single or a double-dash\n"
  82 "\n"
  83 "    -h        show this help message\n"
  84 "    -help     show this help message\n"
  85 "";
  86 
  87 typedef struct j2_maker {
  88     FILE* in;
  89     FILE* out;
  90 
  91     unsigned char* ibuf;
  92     size_t ilen; // how many bytes are being used in the input buffer
  93     size_t icap; // the input buffer's capacity
  94     size_t ipos; // the current position in the input buffer
  95 
  96     size_t line; // the current line, used to show useful error messages
  97     size_t pos;  // the position in the current line, for error messages
  98 
  99     unsigned char* obuf;
 100     size_t ocap; // the output buffer's capacity
 101     size_t opos; // the current position in the output buffer
 102 
 103     ssize_t level; // the current indentation/nesting level
 104 
 105     int current;
 106     int next;
 107 } j2_maker;
 108 
 109 // advance_reader_pos helps func read_byte do its job
 110 static inline void advance_reader_pos(j2_maker* r, unsigned char b) {
 111     r->ipos++;
 112     if (b == '\n') {
 113         r->line++;
 114         r->pos = 1;
 115     } else {
 116         r->pos++;
 117     }
 118 }
 119 
 120 // read_byte does as it says: check its return for the value EOF, before
 121 // using it as the next byte
 122 static inline int read_byte(j2_maker* r) {
 123     if (r->ipos < r->ilen) {
 124         // inside current chunk
 125         const unsigned char b = r->ibuf[r->ipos];
 126         advance_reader_pos(r, b);
 127         return b;
 128     }
 129 
 130     // need to read the next block
 131     r->ipos = 0;
 132     r->ilen = fread(r->ibuf, sizeof(unsigned char), r->icap, r->in);
 133     if (r->ilen > 0) {
 134         const unsigned char b = r->ibuf[r->ipos];
 135         advance_reader_pos(r, b);
 136         return b;
 137     }
 138 
 139     // reached the end of data
 140     return EOF;
 141 }
 142 
 143 // advance is used in most of the code, instead of calling read_byte directly
 144 static inline void advance(j2_maker* r) {
 145     r->current = r->next;
 146     r->next = read_byte(r);
 147 }
 148 
 149 void fail(j2_maker* m, int code, const char* msg);
 150 
 151 void skip_line(j2_maker* r) {
 152     while (true) {
 153         advance(r);
 154         const int lead = r->current;
 155 
 156         if (lead == EOF) {
 157             break;
 158         }
 159 
 160         if (lead == '\n') {
 161             advance(r);
 162             break;
 163         }
 164     }
 165 }
 166 
 167 void skip_multiline_comment(j2_maker* r) {
 168     unsigned char prev = 0;
 169 
 170     while (true) {
 171         advance(r);
 172         const int lead = r->current;
 173 
 174         if (lead == EOF) {
 175             break;
 176         }
 177 
 178         if (prev == '*' && lead == '/') {
 179             advance(r);
 180             break;
 181         }
 182 
 183         prev = (unsigned char)lead;
 184     }
 185 }
 186 
 187 void skip_comment(j2_maker* r) {
 188     int lead = r->current;
 189 
 190     if (lead == '#') {
 191         skip_line(r);
 192         return;
 193     }
 194 
 195     if (lead != '/') {
 196         fail(r, 1, "expected a slash to start comments");
 197     }
 198 
 199     advance(r);
 200     lead = r->current;
 201 
 202     if (lead == '/') {
 203         skip_line(r);
 204         return;
 205     }
 206 
 207     if (lead == '*') {
 208         skip_multiline_comment(r);
 209         return;
 210     }
 211 
 212     fail(r, 1, "expected `//` or `/*` to start comments");
 213 }
 214 
 215 static inline void seek_token(j2_maker* r) {
 216     while (true) {
 217         const int lead = r->current;
 218 
 219         if (lead != EOF && lead <= ' ') {
 220             advance(r);
 221             continue;
 222         }
 223 
 224         if (lead == '/' || lead == '#') {
 225             skip_comment(r);
 226             continue;
 227         }
 228 
 229         break;
 230     }
 231 }
 232 
 233 bool starts_with_bom(const unsigned char* b, const size_t n) {
 234     return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf);
 235 }
 236 
 237 void restart_state(j2_maker* m, FILE* w, FILE* r) {
 238     m->in = r;
 239     m->ilen = 0;
 240     m->ipos = 0;
 241 
 242     m->out = w;
 243     m->opos = 0;
 244 
 245     m->line = 1;
 246     m->pos = 1;
 247 
 248     m->current = EOF;
 249     m->next = EOF;
 250 
 251     m->current = read_byte(m);
 252     if (m->current == EOF) {
 253         return;
 254     }
 255     m->next = read_byte(m);
 256 
 257     m->level = 0;
 258 
 259     // skip leading UTF-8 BOM (byte-order mark), if present
 260     if (starts_with_bom(m->ibuf, m->ilen)) {
 261         // a UTF-8 BOM has 3 bytes
 262         for (size_t i = 0; i < 3 && m->current != EOF; i++) {
 263             advance(m);
 264         }
 265     }
 266 }
 267 
 268 void write_byte(j2_maker* m, unsigned char b) {
 269     if (m->opos < m->ocap) {
 270         m->obuf[m->opos++] = b;
 271         return;
 272     }
 273 
 274     fwrite(m->obuf, 1, m->ocap, m->out);
 275     m->obuf[0] = b;
 276     m->opos = 1;
 277 }
 278 
 279 // write_bytes does as it says, minimizing the number of calls to fwrite
 280 void write_bytes(j2_maker* m, const unsigned char* src, size_t len) {
 281     const size_t rem = m->ocap - m->opos;
 282     if (len < rem) {
 283         memcpy(m->obuf + m->opos, src, len);
 284         m->opos += len;
 285         return;
 286     }
 287 
 288     for (size_t i = 0; i < len; i++) {
 289         write_byte(m, src[i]);
 290     }
 291 }
 292 
 293 void write_spaces(j2_maker* m, ssize_t n) {
 294     const unsigned char spaces[32] = "                                ";
 295     while (n > sizeof(spaces)) {
 296         write_bytes(m, spaces, sizeof(spaces));
 297         n -= sizeof(spaces);
 298     }
 299     if (n > 0) {
 300         write_bytes(m, spaces, n);
 301     }
 302 }
 303 
 304 static inline void indent(j2_maker* m) {
 305     write_spaces(m, 2 * m->level);
 306 }
 307 
 308 void flush(j2_maker* m) {
 309     if (m->opos > 0) {
 310         fwrite(m->obuf, 1, m->opos, m->out);
 311     }
 312     m->opos = 0;
 313     fflush(m->out);
 314 }
 315 
 316 // https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/
 317 
 318 static inline bool check_2_byte_rune(int a, int b) {
 319     return (0xc2 <= a && a <= 0xdf) && (0x80 <= b && b <= 0xbf);
 320 }
 321 
 322 bool check_3_byte_rune(int a, int b, int c) {
 323     return (
 324         (a == 0xe0) &&
 325         (0xa0 <= b && b <= 0xbf) &&
 326         (0x80 <= c && c <= 0xbf)
 327     ) || (
 328         (0xe1 <= a && a <= 0xec) &&
 329         (0x80 <= b && b <= 0xbf) &&
 330         (0x80 <= c && c <= 0xbf)
 331     ) || (
 332         (a == 0xed) &&
 333         (0x80 <= b && b <= 0x9f) &&
 334         (0x80 <= c && c <= 0xbf)
 335     ) || (
 336         (a == 0xee || a == 0xef) &&
 337         (0x80 <= b && b <= 0xbf) &&
 338         (0x80 <= c && c <= 0xbf)
 339     );
 340 }
 341 
 342 bool check_4_byte_rune(int a, int b, int c, int d) {
 343     return (
 344         (a == 0xf0) &&
 345         (0x90 <= b && b <= 0xbf) &&
 346         (0x80 <= c && c <= 0xbf) &&
 347         (0x80 <= d && d <= 0xbf)
 348     ) || (
 349         (a == 0xf1 || a == 0xf3) &&
 350         (0x80 <= b && b <= 0xbf) &&
 351         (0x80 <= c && c <= 0xbf) &&
 352         (0x80 <= d && d <= 0xbf)
 353     ) || (
 354         (a == 0xf4) &&
 355         (0x80 <= b && b <= 0xbf) &&
 356         (0x80 <= c && c <= 0x8f) &&
 357         (0x80 <= d && d <= 0xbf)
 358     );
 359 }
 360 
 361 // write_replacement_char is the recommended action to handle invalid bytes
 362 void write_replacement_char(j2_maker* m) {
 363     write_byte(m, 0xef);
 364     write_byte(m, 0xbf);
 365     write_byte(m, 0xbd);
 366 }
 367 
 368 void handle_invalid_rune(j2_maker* m) {
 369     // fail(m, 1, "invalid unicode value");
 370     write_replacement_char(m);
 371 }
 372 
 373 // write_rune is following the table at https://en.wikipedia.org/wiki/UTF-8
 374 void write_rune(j2_maker* m, uint32_t rune) {
 375     if (rune < (1 << 7)) {
 376         write_byte(m, rune);
 377         return;
 378     }
 379 
 380     if (rune < (1 << (5 + 6))) {
 381         const int a = 0b11000000 | (rune >> 6);
 382         const int b = 0b10000000 | (rune & 0b00111111);
 383         if (check_2_byte_rune(a, b)) {
 384             write_byte(m, a);
 385             write_byte(m, b);
 386         } else {
 387             write_replacement_char(m);
 388         }
 389         return;
 390     }
 391 
 392     if (rune < (1 << (4 + 6 + 6))) {
 393         const int a = 0b11100000 | (rune >> 12);
 394         const int b = 0b10000000 | ((rune >> 6) & 0b00111111);
 395         const int c = 0b10000000 | (rune & 0b00111111);
 396         if (check_3_byte_rune(a, b, c)) {
 397             write_byte(m, a);
 398             write_byte(m, b);
 399             write_byte(m, c);
 400         } else {
 401             write_replacement_char(m);
 402         }
 403         return;
 404     }
 405 
 406     if (rune < (1 << (3 + 6 + 6 + 6))) {
 407         const int a = 0b11110000 | (rune >> 18);
 408         const int b = 0b10000000 | ((rune >> 12) & 0b00111111);
 409         const int c = 0b10000000 | ((rune >> 6) & 0b00111111);
 410         const int d = 0b10000000 | (rune & 0b00111111);
 411         if (check_4_byte_rune(a, b, c, d)) {
 412             write_byte(m, a);
 413             write_byte(m, b);
 414             write_byte(m, c);
 415             write_byte(m, d);
 416         } else {
 417             write_replacement_char(m);
 418         }
 419         return;
 420     }
 421 
 422     write_replacement_char(m);
 423 }
 424 
 425 void copy_utf8_rune(j2_maker* m) {
 426     const int a = m->current;
 427 
 428     if (a == EOF) {
 429         return;
 430     }
 431 
 432     // handle 1-byte runes
 433     if (a < 128) {
 434         write_byte(m, a);
 435         return;
 436     }
 437 
 438     advance(m);
 439     const int b = m->current;
 440 
 441     if (b == EOF) {
 442         handle_invalid_rune(m);
 443         return;
 444     }
 445 
 446     // handle 2-byte runes
 447     if (check_2_byte_rune(a, b)) {
 448         write_byte(m, a);
 449         write_byte(m, b);
 450         return;
 451     }
 452 
 453     advance(m);
 454     const int c = m->current;
 455 
 456     if (c == EOF) {
 457         handle_invalid_rune(m);
 458         return;
 459     }
 460 
 461     // handle 3-byte runes
 462     if (check_3_byte_rune(a, b, c)) {
 463         write_byte(m, a);
 464         write_byte(m, b);
 465         write_byte(m, c);
 466         return;
 467     }
 468 
 469     advance(m);
 470     const int d = m->current;
 471 
 472     if (d == EOF) {
 473         handle_invalid_rune(m);
 474         return;
 475     }
 476 
 477     // handle 4-byte runes
 478     if (check_4_byte_rune(a, b, c, d)) {
 479         write_byte(m, a);
 480         write_byte(m, b);
 481         write_byte(m, c);
 482         write_byte(m, d);
 483         return;
 484     }
 485 
 486     handle_invalid_rune(m);
 487 }
 488 
 489 // debug is available to diagnose any bug found
 490 void debug(j2_maker* m, const char* fmt, ...) {
 491     va_list args;
 492     va_start(args, fmt);
 493 
 494     if (m->in != stdin) {
 495         fclose(m->in);
 496     }
 497 
 498     write_byte(m, '\n');
 499 
 500     const unsigned long line = m->line;
 501     const unsigned long pos = m->pos;
 502     fprintf(stderr, "\x1b[46m\x1b[37mline %lu, pos %lu: ", line, pos);
 503     fprintf(stderr, fmt, args);
 504     fprintf(stderr, "\x1b[0m\n");
 505 
 506     va_end(args);
 507 
 508     exit(10);
 509 }
 510 
 511 // fail quits this app right after showing the error message given
 512 void fail(j2_maker* m, int code, const char* msg) {
 513     const unsigned long line = m->line;
 514     const unsigned long pos = m->pos;
 515 
 516     write_byte(m, '\n');
 517     flush(m);
 518     fprintf(stderr, ERROR_LINE("line %lu, pos %lu: %s"), line, pos, msg);
 519     exit(code);
 520 }
 521 
 522 bool demand_keyword(j2_maker* m, char* rest) {
 523     for (; rest[0] != 0; rest++) {
 524         const int lead = m->current;
 525         if (lead == EOF || lead != rest[0]) {
 526             return false;
 527         }
 528         advance(m);
 529     }
 530 
 531     return rest[0] == 0;
 532 }
 533 
 534 void handle_null(j2_maker* m) {
 535     if (!demand_keyword(m, "null")) {
 536         fail(m, 1, "expected `null` keyword");
 537     }
 538     write_bytes(m, (unsigned char*)"null", 4);
 539 }
 540 
 541 void handle_true(j2_maker* m) {
 542     if (!demand_keyword(m, "true")) {
 543         fail(m, 1, "expected `true` keyword");
 544     }
 545     write_bytes(m, (unsigned char*)"true", 4);
 546 }
 547 
 548 void handle_false(j2_maker* m) {
 549     if (!demand_keyword(m, "false")) {
 550         fail(m, 1, "expected `false` keyword");
 551     }
 552     write_bytes(m, (unsigned char*)"false", 5);
 553 }
 554 
 555 void handle_capital_none(j2_maker* m) {
 556     if (!demand_keyword(m, "None")) {
 557         fail(m, 1, "expected `None` keyword");
 558     }
 559     write_bytes(m, (unsigned char*)"null", 4);
 560 }
 561 
 562 void handle_capital_true(j2_maker* m) {
 563     if (!demand_keyword(m, "True")) {
 564         fail(m, 1, "expected `True` keyword");
 565     }
 566     write_bytes(m, (unsigned char*)"true", 4);
 567 }
 568 
 569 void handle_capital_false(j2_maker* m) {
 570     if (!demand_keyword(m, "False")) {
 571         fail(m, 1, "expected `False` keyword");
 572     }
 573     write_bytes(m, (unsigned char*)"false", 5);
 574 }
 575 
 576 void handle_digits(j2_maker* m) {
 577     if (!isdigit(m->current)) {
 578         fail(m, 1, "expected/missing digits");
 579     }
 580 
 581     while (isdigit(m->current)) {
 582         write_byte(m, m->current);
 583         advance(m);
 584     }
 585 }
 586 
 587 void handle_number(j2_maker* m) {
 588     handle_digits(m);
 589 
 590     const int lead = m->current;
 591 
 592     if (lead == '.') {
 593         write_byte(m, '.');
 594         advance(m);
 595 
 596         if (isdigit(m->current)) {
 597             handle_digits(m);
 598         } else {
 599             write_byte(m, '0');
 600         }
 601         return;
 602     }
 603 
 604     if (lead == 'e' || lead == 'E') {
 605         write_byte(m, lead);
 606         advance(m);
 607 
 608         if (m->current == '+') {
 609             advance(m);
 610         } else if (m->current == '-') {
 611             write_byte(m, '-');
 612             advance(m);
 613         }
 614 
 615         handle_digits(m);
 616     }
 617 }
 618 
 619 void handle_dot(j2_maker* m) {
 620     write_byte(m, '0');
 621     write_byte(m, '.');
 622     advance(m);
 623 
 624     if (!isdigit(m->current)) {
 625         fail(m, 1, "expected/missing digits after decimal dot");
 626     }
 627     handle_digits(m);
 628 }
 629 
 630 void handle_plus_number(j2_maker* m) {
 631     advance(m);
 632 
 633     if (m->current == '.') {
 634         handle_dot(m);
 635         return;
 636     }
 637     handle_number(m);
 638 }
 639 
 640 void handle_minus_number(j2_maker* m) {
 641     write_byte(m, '-');
 642     advance(m);
 643 
 644     if (m->current == '.') {
 645         handle_dot(m);
 646         return;
 647     }
 648     handle_number(m);
 649 }
 650 
 651 // decode_hex assumes valid hex digits, checked by func is_valid_hex
 652 uint32_t decode_hex(unsigned char hex) {
 653     if ('0' <= hex && hex <= '9') {
 654         return hex - '0';
 655     }
 656     if ('A' <= hex && hex <= 'F') {
 657         return hex - 'A' + 10;
 658     }
 659     if ('a' <= hex && hex <= 'f') {
 660         return hex - 'a' + 10;
 661     }
 662     return 0xffff;
 663 }
 664 
 665 static inline bool is_valid_hex(unsigned char b) {
 666     return false ||
 667         ('0' <= b && b <= '9') ||
 668         ('A' <= b && b <= 'F') ||
 669         ('a' <= b && b <= 'f');
 670 }
 671 
 672 // handle_low_char ensures characters whose ASCII codes are lower than spaces
 673 // are properly escaped for strings
 674 void handle_low_char(j2_maker* m, int c) {
 675     const char* hex = "0123456789ABCDEF";
 676 
 677     switch (c) {
 678     case '\t':
 679         write_byte(m, '\\');
 680         write_byte(m, 't');
 681         break;
 682     case '\n':
 683         write_byte(m, '\\');
 684         write_byte(m, 'n');
 685         break;
 686     case '\r':
 687         write_byte(m, '\\');
 688         write_byte(m, 'r');
 689         break;
 690     case '\b':
 691         write_byte(m, '\\');
 692         write_byte(m, 'b');
 693         break;
 694     case '\f':
 695         write_byte(m, '\\');
 696         write_byte(m, 'f');
 697         break;
 698     case '\v':
 699         write_byte(m, '\\');
 700         write_byte(m, 'v');
 701         break;
 702     default:
 703         write_byte(m, '\\');
 704         write_byte(m, 'u');
 705         write_byte(m, '0');
 706         write_byte(m, '0');
 707         write_byte(m, hex[c / 16]);
 708         write_byte(m, hex[c % 16]);
 709         break;
 710     }
 711 }
 712 
 713 void write_inner_string_hex_quad(j2_maker* m, const unsigned char quad[4]) {
 714     const uint32_t n = 0 +
 715         (decode_hex(quad[0]) << 12) +
 716         (decode_hex(quad[1]) << 8) +
 717         (decode_hex(quad[2]) << 4) +
 718         (decode_hex(quad[3]) << 0);
 719 
 720     switch (n) {
 721     case '"':
 722         write_byte(m, '\\');
 723         write_byte(m, '"');
 724         return;
 725     case '\\':
 726         write_byte(m, '\\');
 727         write_byte(m, '\\');
 728         return;
 729     }
 730 
 731     if (n >= ' ') {
 732         write_rune(m, n);
 733     } else {
 734         handle_low_char(m, n);
 735     }
 736 }
 737 
 738 void handle_hex_quad(j2_maker* m) {
 739     unsigned char quad[4];
 740     for (size_t i = 0; i < 4; i++) {
 741         advance(m);
 742         const int lead = m->current;
 743         if (lead == EOF) {
 744             fail(m, 1, "end of input before end of string");
 745         }
 746         if (is_valid_hex(lead)) {
 747             quad[i] = lead;
 748             continue;
 749         }
 750         fail(m, 1, "invalid hexadecimal digit in string");
 751     }
 752 
 753     write_inner_string_hex_quad(m, quad);
 754 }
 755 
 756 void handle_hex_pair(j2_maker* m) {
 757     unsigned char quad[4] = {'0', '0', '0', '0'};
 758     advance(m);
 759     const int a = m->current;
 760     advance(m);
 761     const int b = m->current;
 762     if (a == EOF || b == EOF) {
 763         fail(m, 1, "end of input before end of string");
 764     }
 765     if (!is_valid_hex(a) || !is_valid_hex(b)) {
 766         fail(m, 1, "invalid hexadecimal digit in string");
 767     }
 768 
 769     quad[2] = a;
 770     quad[3] = b;
 771     write_inner_string_hex_quad(m, quad);
 772 }
 773 
 774 void handle_string_escape(j2_maker* m, int c) {
 775     switch (c) {
 776     case '"':
 777     case '\\':
 778     case 'b':
 779     case 'f':
 780     case 'n':
 781     case 'r':
 782     case 't':
 783         write_byte(m, '\\');
 784         write_byte(m, c);
 785         break;
 786     case 'u':
 787         handle_hex_quad(m);
 788         break;
 789     case 'x':
 790         handle_hex_pair(m);
 791         break;
 792     case '\'':
 793         write_byte(m, '\'');
 794         break;
 795     default:
 796         write_byte(m, m->current);
 797         break;
 798     }
 799 }
 800 
 801 void handle_string(j2_maker* m) {
 802     const unsigned char quote = m->current;
 803     bool escaped = false;
 804 
 805     write_byte(m, '"');
 806 
 807     while (true) {
 808         advance(m);
 809 
 810         int c = m->current;
 811         if (c == EOF) {
 812             fail(m, 1, "input ended before string was close-quoted");
 813         }
 814 
 815         if (escaped) {
 816             handle_string_escape(m, c);
 817             escaped = false;
 818             continue;
 819         }
 820 
 821         switch (c) {
 822         case '\\':
 823             escaped = true;
 824             break;
 825         default:
 826             if (c == quote) {
 827                 write_byte(m, '"');
 828                 advance(m);
 829                 return;
 830             }
 831 
 832             // write_byte(m, c);
 833             if (c < ' ') {
 834                 handle_low_char(m, c);
 835             } else {
 836                 copy_utf8_rune(m);
 837             }
 838             break;
 839         }
 840     }
 841 }
 842 
 843 void handle_token(j2_maker* m, ssize_t lead_level);
 844 
 845 void handle_array(j2_maker* m) {
 846     m->level++;
 847     write_byte(m, '[');
 848     advance(m);
 849 
 850     for (size_t i = 0; true; i++) {
 851         seek_token(m);
 852         const int lead = m->current;
 853 
 854         if (lead == EOF) {
 855             fail(m, 1, "unclosed array");
 856         }
 857 
 858         if (lead == ',') {
 859             advance(m);
 860             continue;
 861         }
 862 
 863         if (lead == ']') {
 864             m->level--;
 865             if (i > 0) {
 866                 write_byte(m, '\n');
 867                 indent(m);
 868             }
 869             write_byte(m, ']');
 870             advance(m);
 871             return;
 872         }
 873 
 874         if (i > 0) {
 875             write_byte(m, ',');
 876         }
 877         write_byte(m, '\n');
 878         if (feof(m->out)) {
 879             return;
 880         }
 881         handle_token(m, m->level);
 882     }
 883 }
 884 
 885 void handle_unquoted_key(j2_maker* m) {
 886     write_byte(m, '"');
 887 
 888     while (true) {
 889         int c = m->current;
 890         if (c == EOF) {
 891             fail(m, 1, "input ended with an object key");
 892         }
 893 
 894         write_byte(m, c);
 895         advance(m);
 896 
 897         c = m->current;
 898         if (!isalpha(c) && !isdigit(c) && c != '_') {
 899             break;
 900         }
 901     }
 902 
 903     write_byte(m, '"');
 904 }
 905 
 906 void handle_object(j2_maker* m) {
 907     m->level++;
 908     write_byte(m, '{');
 909     advance(m);
 910 
 911     for (size_t i = 0; true; i++) {
 912         seek_token(m);
 913         int lead = m->current;
 914 
 915         if (lead == EOF) {
 916             fail(m, 1, "unclosed object");
 917         }
 918 
 919         if (lead == ',') {
 920             advance(m);
 921             continue;
 922         }
 923 
 924         if (lead == '}') {
 925             m->level--;
 926             if (i > 0) {
 927                 write_byte(m, '\n');
 928                 indent(m);
 929             }
 930             write_byte(m, '}');
 931             advance(m);
 932             return;
 933         }
 934 
 935         if (feof(m->out)) {
 936             return;
 937         }
 938 
 939         if (lead == '"' || lead == '\'') {
 940             if (i > 0) {
 941                 write_byte(m, ',');
 942             }
 943             write_byte(m, '\n');
 944             indent(m);
 945             handle_string(m);
 946         } else if (isalpha(lead) || lead == '_') {
 947             if (i > 0) {
 948                 write_byte(m, ',');
 949             }
 950             write_byte(m, '\n');
 951             indent(m);
 952             handle_unquoted_key(m);
 953         } else {
 954             fail(m, 1, "only strings or identifiers can be object keys");
 955         }
 956 
 957         seek_token(m);
 958         lead = m->current;
 959 
 960         if (lead == EOF) {
 961             fail(m, 1, "input ended after object-key and before value");
 962         }
 963 
 964         if (lead != ':') {
 965             fail(m, 1, "a `:` must follow all object keys");
 966         }
 967 
 968         write_byte(m, ':');
 969         write_byte(m, ' ');
 970         advance(m);
 971 
 972         seek_token(m);
 973         if (m->current == EOF) {
 974             fail(m, 1, "input ended after a `:` following an object-key");
 975         }
 976 
 977         handle_token(m, 0);
 978     }
 979 }
 980 
 981 // dispatch ties leading bytes/chars in tokens to the funcs which handle them
 982 void (*dispatch[256])() = {
 983     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 984     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 985     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 986     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 987     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 988     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 989     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 990     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 991     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 992     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 993     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 994     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 995     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 996     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 997     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 998     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 999     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1000     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1001     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1002     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1003     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1004     NULL, NULL, NULL, NULL,
1005 };
1006 
1007 void handle_token(j2_maker* m, ssize_t lead_level) {
1008     write_spaces(m, 2 * lead_level);
1009     dispatch[m->current](m);
1010 }
1011 
1012 // handle_invalid_token shows an error message and quits the app right after
1013 void handle_invalid_token(j2_maker* m) {
1014     char msg[64];
1015     unsigned char c = (unsigned char)m->current;
1016     sprintf(msg, "%c (%d): invalid token", c, c);
1017     fail(m, 1, msg);
1018 }
1019 
1020 void handle_input(FILE* src) {
1021     unsigned char ibuf[IBUF_SIZE];
1022     unsigned char obuf[OBUF_SIZE];
1023 
1024     j2_maker m;
1025     m.ibuf = ibuf;
1026     m.icap = sizeof(ibuf);
1027     m.obuf = obuf;
1028     m.ocap = sizeof(obuf);
1029     restart_state(&m, stdout, src);
1030 
1031     // ignore leading whitespace/comment bytes, if present
1032     seek_token(&m);
1033 
1034     if (m.current == EOF) {
1035         fail(&m, 1, "empty input isn't valid JSON");
1036     }
1037 
1038     handle_token(&m, 0);
1039     write_byte(&m, '\n');
1040     flush(&m);
1041 
1042     // ignore trailing whitespace/comment bytes, if present
1043     seek_token(&m);
1044 
1045     // ignore trailing semicolon, if present
1046     if (m.current == ';') {
1047         advance(&m);
1048         // ignore trailing whitespace/comment bytes, if present
1049         seek_token(&m);
1050     }
1051 
1052     if (!feof(src) || m.current != EOF) {
1053         fail(&m, 1, "unexpected trailing JSON data");
1054     }
1055 }
1056 
1057 bool is_help_option(const char* s) {
1058     return (s[0] == '-' && s[1] != 0) && (
1059         strcmp(s, "-h") == 0 ||
1060         strcmp(s, "--h") == 0 ||
1061         strcmp(s, "-help") == 0 ||
1062         strcmp(s, "--help") == 0
1063     );
1064 }
1065 
1066 // run returns the error code
1067 int run(int nargs, char** args) {
1068     if (nargs > 0 && strcmp(args[0], "--") == 0) {
1069         nargs--;
1070         args++;
1071     }
1072 
1073     if (nargs > 1) {
1074         const char* msg = "can't use more than 1 named input";
1075         fprintf(stderr, ERROR_LINE("%s"), msg);
1076         return 1;
1077     }
1078 
1079     // use stdin when not given a filepath
1080     if (nargs == 0 || strcmp(args[0], "") == 0 || strcmp(args[0], "-") == 0) {
1081         handle_input(stdin);
1082         return 0;
1083     }
1084 
1085     const char* path = args[0];
1086     FILE* f = fopen(path, "rb");
1087     if (f == NULL) {
1088         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
1089         return 1;
1090     }
1091 
1092     handle_input(f);
1093     fclose(f);
1094 
1095     return 0;
1096 }
1097 
1098 int main(int argc, char** argv) {
1099 #ifdef _WIN32
1100     setmode(fileno(stdin), O_BINARY);
1101     // ensure output lines end in LF instead of CRLF on windows
1102     setmode(fileno(stdout), O_BINARY);
1103     setmode(fileno(stderr), O_BINARY);
1104 #endif
1105 
1106     if (argc > 1 && is_help_option(argv[1])) {
1107         printf("%s", info);
1108         return 0;
1109     }
1110 
1111     // the dispatch table starts as all null function-pointers
1112     for (size_t i = 0; i < sizeof(dispatch) / sizeof(dispatch[0]); i++) {
1113         dispatch[i] = handle_invalid_token;
1114     }
1115 
1116     for (size_t i = '0'; i <= '9'; i++) {
1117         dispatch[i] = handle_number;
1118     }
1119 
1120     dispatch['n'] = handle_null;
1121     dispatch['t'] = handle_true;
1122     dispatch['f'] = handle_false;
1123     dispatch['N'] = handle_capital_none;
1124     dispatch['T'] = handle_capital_true;
1125     dispatch['F'] = handle_capital_false;
1126     dispatch['.'] = handle_dot;
1127     dispatch['+'] = handle_plus_number;
1128     dispatch['-'] = handle_minus_number;
1129     dispatch['"'] = handle_string;
1130     dispatch['\''] = handle_string;
1131     dispatch['['] = handle_array;
1132     dispatch['{'] = handle_object;
1133 
1134     // enable full/block-buffering for standard output
1135     setvbuf(stdout, NULL, _IOFBF, 0);
1136 
1137     return run(argc - 1, argv + 1) == 0 ? 0 : 1;
1138 }