File: j0.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./j0 ./j0.c
  29 */
  30 
  31 #include <ctype.h>
  32 #include <stdarg.h>
  33 #include <stdbool.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 
  38 #ifdef _WIN32
  39 #include <fcntl.h>
  40 #include <windows.h>
  41 #endif
  42 
  43 const char* info = ""
  44 "j0 [options...] [file...]\n"
  45 "\n"
  46 "\n"
  47 "Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output.\n"
  48 "Its output is always a single line, which ends with a line-feed.\n"
  49 "\n"
  50 "Besides minimizing bytes, this tool also adapts almost-JSON input into\n"
  51 "valid JSON, since it\n"
  52 "\n"
  53 "    - ignores both rest-of-line and multi-line comments\n"
  54 "    - ignores extra/trailing commas in arrays and objects\n"
  55 "    - turns single-quoted strings/keys into double-quoted strings\n"
  56 "    - double-quotes unquoted object keys\n"
  57 "    - changes \\x 2-hex-digit into \\u 4-hex-digit string-escapes\n"
  58 "\n"
  59 "All options available can either start with a single or a double-dash\n"
  60 "\n"
  61 "    -h        show this help message\n"
  62 "    -help     show this help message\n"
  63 "    -jsonl    emit JSON Lines, when top-level value is an array\n"
  64 "";
  65 
  66 typedef struct j0_maker {
  67     FILE* in;
  68     FILE* out;
  69 
  70     unsigned char* ibuf;
  71     size_t ilen; // how many bytes are being used in the input buffer
  72     size_t icap; // the input buffer's capacity
  73     size_t ipos; // the current position in the input buffer
  74 
  75     size_t line; // the current line, used to show useful error messages
  76     size_t pos;  // the position in the current line, for error messages
  77 
  78     int current;
  79     int next;
  80 } j0_maker;
  81 
  82 // advance_reader_pos helps func read_byte do its job
  83 void advance_reader_pos(j0_maker* r, unsigned char b) {
  84     r->ipos++;
  85     if (b == '\n') {
  86         r->line++;
  87         r->pos = 1;
  88     } else {
  89         r->pos++;
  90     }
  91 }
  92 
  93 // read_byte does as it says: check its return for the value EOF, before
  94 // using it as the next byte
  95 int read_byte(j0_maker* r) {
  96     if (r->ipos < r->ilen) {
  97         // inside current chunk
  98         const unsigned char b = r->ibuf[r->ipos];
  99         advance_reader_pos(r, b);
 100         return b;
 101     }
 102 
 103     // need to read the next block
 104     r->ipos = 0;
 105     r->ilen = fread(r->ibuf, sizeof(unsigned char), r->icap, r->in);
 106     if (r->ilen > 0) {
 107         const unsigned char b = r->ibuf[r->ipos];
 108         advance_reader_pos(r, b);
 109         return b;
 110     }
 111 
 112     // reached the end of data
 113     return EOF;
 114 }
 115 
 116 // advance is used in most of the code, instead of calling read_byte directly
 117 void advance(j0_maker* r) {
 118     r->current = r->next;
 119     r->next = read_byte(r);
 120 }
 121 
 122 void fail(j0_maker* s, int code, const char* msg);
 123 
 124 void skip_line(j0_maker* r) {
 125     while (true) {
 126         advance(r);
 127         const int lead = r->current;
 128 
 129         if (lead == EOF) {
 130             break;
 131         }
 132 
 133         if (lead == '\n') {
 134             advance(r);
 135             break;
 136         }
 137     }
 138 }
 139 
 140 void skip_multiline_comment(j0_maker* r) {
 141     unsigned char prev = 0;
 142 
 143     while (true) {
 144         advance(r);
 145         const int lead = r->current;
 146 
 147         if (lead == EOF) {
 148             break;
 149         }
 150 
 151         if (prev == '*' && lead == '/') {
 152             advance(r);
 153             break;
 154         }
 155 
 156         prev = (unsigned char)lead;
 157     }
 158 }
 159 
 160 void skip_comment(j0_maker* r) {
 161     int lead = r->current;
 162 
 163     if (lead == '#') {
 164         skip_line(r);
 165         return;
 166     }
 167 
 168     if (lead != '/') {
 169         fail(r, 1, "expected a slash to start comments");
 170     }
 171 
 172     advance(r);
 173     lead = r->current;
 174 
 175     if (lead == '/') {
 176         skip_line(r);
 177         return;
 178     }
 179 
 180     if (lead == '*') {
 181         skip_multiline_comment(r);
 182         return;
 183     }
 184 
 185     fail(r, 1, "expected `//` or `/*` to start comments");
 186 }
 187 
 188 void seek_token(j0_maker* r) {
 189     while (true) {
 190         const int lead = r->current;
 191 
 192         if (lead != EOF && lead <= ' ') {
 193             advance(r);
 194             continue;
 195         }
 196 
 197         if (lead == '/' || lead == '#') {
 198             skip_comment(r);
 199             continue;
 200         }
 201 
 202         break;
 203     }
 204 }
 205 
 206 bool starts_with_bom(const unsigned char* b, const size_t n) {
 207     return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf);
 208 }
 209 
 210 void restart_state(j0_maker* s, FILE* w, FILE* r) {
 211     s->in = r;
 212     s->ilen = 0;
 213     s->ipos = 0;
 214 
 215     s->out = w;
 216 
 217     s->line = 1;
 218     s->pos = 1;
 219 
 220     s->current = EOF;
 221     s->next = EOF;
 222 
 223     s->current = read_byte(s);
 224     if (s->current == EOF) {
 225         return;
 226     }
 227     s->next = read_byte(s);
 228 
 229     // skip leading UTF-8 BOM (byte-order mark), if present
 230     if (starts_with_bom(s->ibuf, s->ilen)) {
 231         // a UTF-8 BOM has 3 bytes
 232         for (size_t i = 0; i < 3 && s->current != EOF; i++) {
 233             advance(s);
 234         }
 235     }
 236 }
 237 
 238 // write_bytes does as it says, minimizing the number of calls to fwrite
 239 void write_bytes(j0_maker* w, const unsigned char* src, size_t len) {
 240     if (len > 0 && fwrite(src, len, 1, w->out) < 1) {
 241         if (feof(w->out)) {
 242             exit(0);
 243         }
 244 
 245         fail(w, 1, "failed to write more output");
 246     }
 247 }
 248 
 249 void write_byte(j0_maker* w, unsigned char b) {
 250     putc(b, w->out);
 251 }
 252 
 253 // https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/
 254 
 255 bool check_2_byte_rune(int a, int b) {
 256     return (0xc2 <= a && a <= 0xdf) && (0x80 <= b && b <= 0xbf);
 257 }
 258 
 259 bool check_3_byte_rune(int a, int b, int c) {
 260     return (
 261         (a == 0xe0) &&
 262         (0xa0 <= b && b <= 0xbf) &&
 263         (0x80 <= c && c <= 0xbf)
 264     ) || (
 265         (0xe1 <= a && a <= 0xec) &&
 266         (0x80 <= b && b <= 0xbf) &&
 267         (0x80 <= c && c <= 0xbf)
 268     ) || (
 269         (a == 0xed) &&
 270         (0x80 <= b && b <= 0x9f) &&
 271         (0x80 <= c && c <= 0xbf)
 272     ) || (
 273         (a == 0xee || a == 0xef) &&
 274         (0x80 <= b && b <= 0xbf) &&
 275         (0x80 <= c && c <= 0xbf)
 276     );
 277 }
 278 
 279 bool check_4_byte_rune(int a, int b, int c, int d) {
 280     return (
 281         (a == 0xf0) &&
 282         (0x90 <= b && b <= 0xbf) &&
 283         (0x80 <= c && c <= 0xbf) &&
 284         (0x80 <= d && d <= 0xbf)
 285     ) || (
 286         (a == 0xf1 || a == 0xf3) &&
 287         (0x80 <= b && b <= 0xbf) &&
 288         (0x80 <= c && c <= 0xbf) &&
 289         (0x80 <= d && d <= 0xbf)
 290     ) || (
 291         (a == 0xf4) &&
 292         (0x80 <= b && b <= 0xbf) &&
 293         (0x80 <= c && c <= 0x8f) &&
 294         (0x80 <= d && d <= 0xbf)
 295     );
 296 }
 297 
 298 // write_replacement_char is the recommended action to handle invalid bytes
 299 void write_replacement_char(FILE* w) {
 300     putc(0xef, w);
 301     putc(0xbf, w);
 302     putc(0xbd, w);
 303 }
 304 
 305 void handle_invalid_rune(j0_maker* m) {
 306     // fail(m, 1, "invalid unicode value");
 307     write_replacement_char(m->out);
 308 }
 309 
 310 void copy_utf8_rune(j0_maker* m) {
 311     FILE* w = m->out;
 312     const int a = m->current;
 313 
 314     if (a == EOF) {
 315         return;
 316     }
 317 
 318     // handle 1-byte runes
 319     if (a < 128) {
 320         putc(a, w);
 321         return;
 322     }
 323 
 324     advance(m);
 325     const int b = m->current;
 326 
 327     if (b == EOF) {
 328         handle_invalid_rune(m);
 329         return;
 330     }
 331 
 332     // handle 2-byte runes
 333     if (check_2_byte_rune(a, b)) {
 334         putc(a, w);
 335         putc(b, w);
 336         return;
 337     }
 338 
 339     advance(m);
 340     const int c = m->current;
 341 
 342     if (c == EOF) {
 343         handle_invalid_rune(m);
 344         return;
 345     }
 346 
 347     // handle 3-byte runes
 348     if (check_3_byte_rune(a, b, c)) {
 349         putc(a, w);
 350         putc(b, w);
 351         putc(c, w);
 352         return;
 353     }
 354 
 355     advance(m);
 356     const int d = m->current;
 357 
 358     if (d == EOF) {
 359         handle_invalid_rune(m);
 360         return;
 361     }
 362 
 363     // handle 4-byte runes
 364     if (check_4_byte_rune(a, b, c, d)) {
 365         putc(a, w);
 366         putc(b, w);
 367         putc(c, w);
 368         putc(d, w);
 369         return;
 370     }
 371 
 372     handle_invalid_rune(m);
 373 }
 374 
 375 // debug is available to diagnose any bug found
 376 void debug(j0_maker* s, const char* fmt, ...) {
 377     va_list args;
 378     va_start(args, fmt);
 379 
 380     if (s->in != stdin) {
 381         fclose(s->in);
 382     }
 383 
 384     write_byte(s, '\n');
 385 
 386     const unsigned long line = s->line;
 387     const unsigned long pos = s->pos;
 388     fprintf(stderr, "\x1b[46m\x1b[37mline %lu, pos %lu: ", line, pos);
 389     fprintf(stderr, fmt, args);
 390     fprintf(stderr, "\x1b[0m\n");
 391 
 392     va_end(args);
 393 
 394     exit(10);
 395 }
 396 
 397 // fail quits this app right after showing the error message given
 398 void fail(j0_maker* s, int code, const char* msg) {
 399     const unsigned long line = s->line;
 400     const unsigned long pos = s->pos;
 401 
 402     write_byte(s, '\n');
 403     fprintf(stderr, "\x1b[31mline %lu, pos %lu: %s\x1b[0m\n", line, pos, msg);
 404     exit(code);
 405 }
 406 
 407 bool demand_keyword(j0_maker* s, char* rest) {
 408     for (; rest[0] != 0; rest++) {
 409         const int lead = s->current;
 410         if (lead == EOF || lead != rest[0]) {
 411             return false;
 412         }
 413         advance(s);
 414     }
 415 
 416     return rest[0] == 0;
 417 }
 418 
 419 void handle_null(j0_maker* s) {
 420     if (!demand_keyword(s, "null")) {
 421         fail(s, 1, "expected `null` keyword");
 422     }
 423     write_bytes(s, (unsigned char*)"null", 4);
 424 }
 425 
 426 void handle_true(j0_maker* s) {
 427     if (!demand_keyword(s, "true")) {
 428         fail(s, 1, "expected `true` keyword");
 429     }
 430     write_bytes(s, (unsigned char*)"true", 4);
 431 }
 432 
 433 void handle_false(j0_maker* s) {
 434     if (!demand_keyword(s, "false")) {
 435         fail(s, 1, "expected `false` keyword");
 436     }
 437     write_bytes(s, (unsigned char*)"false", 5);
 438 }
 439 
 440 void handle_capital_none(j0_maker* s) {
 441     if (!demand_keyword(s, "None")) {
 442         fail(s, 1, "expected `None` keyword");
 443     }
 444     write_bytes(s, (unsigned char*)"null", 4);
 445 }
 446 
 447 void handle_capital_true(j0_maker* s) {
 448     if (!demand_keyword(s, "True")) {
 449         fail(s, 1, "expected `True` keyword");
 450     }
 451     write_bytes(s, (unsigned char*)"true", 4);
 452 }
 453 
 454 void handle_capital_false(j0_maker* s) {
 455     if (!demand_keyword(s, "False")) {
 456         fail(s, 1, "expected `False` keyword");
 457     }
 458     write_bytes(s, (unsigned char*)"false", 5);
 459 }
 460 
 461 void handle_digits(j0_maker* s) {
 462     if (!isdigit(s->current)) {
 463         fail(s, 1, "expected/missing digits");
 464     }
 465 
 466     while (isdigit(s->current)) {
 467         write_byte(s, s->current);
 468         advance(s);
 469     }
 470 }
 471 
 472 void handle_number(j0_maker* s) {
 473     handle_digits(s);
 474 
 475     const int lead = s->current;
 476 
 477     if (lead == '.') {
 478         write_byte(s, '.');
 479         advance(s);
 480 
 481         if (isdigit(s->current)) {
 482             handle_digits(s);
 483         } else {
 484             write_byte(s, '0');
 485         }
 486         return;
 487     }
 488 
 489     if (lead == 'e' || lead == 'E') {
 490         write_byte(s, lead);
 491         advance(s);
 492 
 493         if (s->current == '+') {
 494             advance(s);
 495         } else if (s->current == '-') {
 496             write_byte(s, '-');
 497             advance(s);
 498         }
 499 
 500         handle_digits(s);
 501     }
 502 }
 503 
 504 void handle_dot(j0_maker* s) {
 505     write_byte(s, '0');
 506     write_byte(s, '.');
 507     advance(s);
 508 
 509     if (!isdigit(s->current)) {
 510         fail(s, 1, "expected/missing digits after decimal dot");
 511     }
 512     handle_digits(s);
 513 }
 514 
 515 void handle_plus_number(j0_maker* s) {
 516     advance(s);
 517 
 518     if (s->current == '.') {
 519         handle_dot(s);
 520         return;
 521     }
 522     handle_number(s);
 523 }
 524 
 525 void handle_minus_number(j0_maker* s) {
 526     write_byte(s, '-');
 527     advance(s);
 528 
 529     if (s->current == '.') {
 530         handle_dot(s);
 531         return;
 532     }
 533     handle_number(s);
 534 }
 535 
 536 void handle_string_escape(j0_maker* s, int c) {
 537     switch (c) {
 538         case '"':
 539         case '\\':
 540         case 'b':
 541         case 'f':
 542         case 'n':
 543         case 'r':
 544         case 't':
 545             write_byte(s, '\\');
 546             write_byte(s, c);
 547             break;
 548 
 549         case 'u':
 550             write_byte(s, '\\');
 551             write_byte(s, 'u');
 552             for (size_t i = 0; i < 4; i++) {
 553                 advance(s);
 554                 const int lead = s->current;
 555                 if (lead == EOF) {
 556                     fail(s, 1, "end of input before end of string");
 557                 }
 558                 if (isdigit(lead) || isalpha(lead)) {
 559                     // write_byte(s, toupper(c));
 560                     write_byte(s, c);
 561                     continue;
 562                 }
 563                 fail(s, 1, "invalid hexadecimal digit in string");
 564             }
 565             break;
 566 
 567         case 'x':
 568             write_byte(s, '\\');
 569             write_byte(s, 'u');
 570             write_byte(s, '0');
 571             write_byte(s, '0');
 572             for (size_t i = 0; i < 2; i++) {
 573                 advance(s);
 574                 const int lead = s->current;
 575                 if (lead == EOF) {
 576                     fail(s, 1, "end of input before end of string");
 577                 }
 578                 if (isdigit(lead) || isalpha(lead)) {
 579                     // write_byte(s, toupper(c));
 580                     write_byte(s, c);
 581                     continue;
 582                 }
 583                 fail(s, 1, "invalid hexadecimal digit in string");
 584             }
 585             break;
 586 
 587         case '\'':
 588             write_byte(s, '\'');
 589             break;
 590 
 591         default:
 592             write_byte(s, s->current);
 593             break;
 594     }
 595 }
 596 
 597 // hex is only used by function handle_low_char to render hexadecimals
 598 const char* hex = "0123456789ABCDEF";
 599 
 600 // handle_low_char simplifies function handle_string
 601 void handle_low_char(j0_maker* s, int c) {
 602     switch (c) {
 603         case '\t':
 604             write_byte(s, '\\');
 605             write_byte(s, 't');
 606             break;
 607 
 608         case '\n':
 609             write_byte(s, '\\');
 610             write_byte(s, 'n');
 611             break;
 612 
 613         case '\r':
 614             write_byte(s, '\\');
 615             write_byte(s, 'r');
 616             break;
 617 
 618         case '\v':
 619             write_byte(s, '\\');
 620             write_byte(s, 'v');
 621             break;
 622 
 623         default:
 624             write_byte(s, '\\');
 625             write_byte(s, 'u');
 626             write_byte(s, '0');
 627             write_byte(s, '0');
 628             write_byte(s, hex[c / 16]);
 629             write_byte(s, hex[c % 16]);
 630             break;
 631     }
 632 }
 633 
 634 void handle_string(j0_maker* s) {
 635     const unsigned char quote = s->current;
 636     bool escaped = false;
 637 
 638     write_byte(s, '"');
 639 
 640     while (true) {
 641         advance(s);
 642 
 643         int c = s->current;
 644         if (c == EOF) {
 645             fail(s, 1, "input ended before string was close-quoted");
 646         }
 647 
 648         if (escaped) {
 649             handle_string_escape(s, c);
 650             escaped = false;
 651             continue;
 652         }
 653 
 654         switch (c) {
 655             case '\\':
 656                 escaped = true;
 657                 break;
 658 
 659             default:
 660                 if (c == quote) {
 661                     write_byte(s, '"');
 662                     advance(s);
 663                     return;
 664                 }
 665 
 666                 // write_byte(s, c);
 667                 if (c < ' ') {
 668                     handle_low_char(s, c);
 669                 } else {
 670                     copy_utf8_rune(s);
 671                 }
 672                 break;
 673         }
 674     }
 675 }
 676 
 677 void handle_token(j0_maker* s);
 678 
 679 void handle_array(j0_maker* s) {
 680     size_t items_before = 0;
 681     write_byte(s, '[');
 682     advance(s);
 683 
 684     while (true) {
 685         seek_token(s);
 686         const int lead = s->current;
 687 
 688         if (lead == EOF) {
 689             fail(s, 1, "unclosed array");
 690         }
 691 
 692         if (lead == ',') {
 693             advance(s);
 694             continue;
 695         }
 696 
 697         if (lead == ']') {
 698             write_byte(s, ']');
 699             advance(s);
 700             return;
 701         }
 702 
 703         if (items_before > 0) {
 704             write_byte(s, ',');
 705         }
 706         handle_token(s);
 707         items_before++;
 708     }
 709 }
 710 
 711 // handle_array_jsonl is a slight variation of func handle_array: this one is
 712 // used to handle top-level arrays when running in JSON Lines mode, to emit
 713 // line-feeds after each item, instead of commas between them
 714 void handle_array_jsonl(j0_maker* s) {
 715     size_t items_before = 0;
 716     advance(s);
 717 
 718     while (true) {
 719         seek_token(s);
 720         const int lead = s->current;
 721 
 722         if (lead == EOF) {
 723             fail(s, 1, "unclosed array");
 724         }
 725 
 726         if (lead == ',') {
 727             advance(s);
 728             continue;
 729         }
 730 
 731         if (items_before > 0) {
 732             write_byte(s, '\n');
 733             fflush(s->out);
 734         }
 735 
 736         if (lead == ']') {
 737             advance(s);
 738             return;
 739         }
 740 
 741         handle_token(s);
 742         items_before++;
 743     }
 744 }
 745 
 746 void handle_unquoted_key(j0_maker* s) {
 747     write_byte(s, '"');
 748 
 749     while (true) {
 750         int c = s->current;
 751         if (c == EOF) {
 752             fail(s, 1, "input ended with an object key");
 753         }
 754 
 755         write_byte(s, c);
 756         advance(s);
 757 
 758         c = s->current;
 759         if (!isalpha(c) && !isdigit(c) && c != '_') {
 760             break;
 761         }
 762     }
 763 
 764     write_byte(s, '"');
 765 }
 766 
 767 void handle_object(j0_maker* s) {
 768     size_t items_before = 0;
 769     write_byte(s, '{');
 770     advance(s);
 771 
 772     while (true) {
 773         seek_token(s);
 774         int lead = s->current;
 775 
 776         if (lead == EOF) {
 777             fail(s, 1, "unclosed object");
 778         }
 779 
 780         if (lead == ',') {
 781             advance(s);
 782             continue;
 783         }
 784 
 785         if (lead == '}') {
 786             write_byte(s, '}');
 787             advance(s);
 788             return;
 789         }
 790 
 791         if (lead == '"' || lead == '\'') {
 792             if (items_before > 0) {
 793                 write_byte(s, ',');
 794             }
 795             handle_string(s);
 796             items_before++;
 797         } else if (isalpha(lead) || lead == '_') {
 798             if (items_before > 0) {
 799                 write_byte(s, ',');
 800             }
 801             handle_unquoted_key(s);
 802             items_before++;
 803         } else {
 804             fail(s, 1, "only strings or identifiers can be object keys");
 805         }
 806 
 807         seek_token(s);
 808         lead = s->current;
 809 
 810         if (lead == EOF) {
 811             fail(s, 1, "input ended after object-key and before value");
 812         }
 813 
 814         if (lead != ':') {
 815             fail(s, 1, "a `:` must follow all object keys");
 816         }
 817 
 818         write_byte(s, ':');
 819         advance(s);
 820 
 821         seek_token(s);
 822         if (s->current == EOF) {
 823             fail(s, 1, "input ended after a `:` following an object-key");
 824         }
 825 
 826         handle_token(s);
 827     }
 828 }
 829 
 830 // dispatch ties leading bytes/chars in tokens to the funcs which handle them
 831 void (*dispatch[256])() = {
 832     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 833     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 834     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 835     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 836     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 837     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 838     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 839     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 840     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 841     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 842     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 843     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 844     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 845     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 846     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 847     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 848     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 849     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 850     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 851     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 852     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 853     NULL, NULL, NULL, NULL,
 854 };
 855 
 856 void handle_token(j0_maker* s) {
 857     dispatch[s->current](s);
 858 }
 859 
 860 // handle_invalid_token shows an error message and quits the app right after
 861 void handle_invalid_token(j0_maker* s) {
 862     char msg[64];
 863     unsigned char c = (unsigned char)s->current;
 864     sprintf(msg, "%c (%d): invalid token", c, c);
 865     fail(s, 1, msg);
 866 }
 867 
 868 void handle_array_jsonl(j0_maker* s);
 869 
 870 void handle_input(FILE* src, bool jsonl) {
 871     unsigned char ibuf[32 * 1024];
 872 
 873     j0_maker state;
 874     j0_maker* s = &state;
 875     s->ibuf = ibuf;
 876     s->icap = sizeof(ibuf);
 877     restart_state(s, stdout, src);
 878 
 879     // ignore leading whitespace/comment bytes, if present
 880     seek_token(s);
 881 
 882     if (s->current == EOF) {
 883         fail(s, 1, "empty input isn't valid JSON");
 884     }
 885 
 886     if (jsonl && s->current == '[') {
 887         handle_array_jsonl(s);
 888     } else {
 889         handle_token(s);
 890         write_byte(s, '\n');
 891         fflush(s->out);
 892     }
 893 
 894     // ignore trailing whitespace/comment bytes, if present
 895     seek_token(s);
 896 
 897     // ignore trailing semicolon, if present
 898     if (s->current == ';') {
 899         advance(s);
 900         // ignore trailing whitespace/comment bytes, if present
 901         seek_token(s);
 902     }
 903 
 904     if (!feof(src) || s->current != EOF) {
 905         fail(s, 1, "unexpected trailing JSON data");
 906     }
 907 }
 908 
 909 bool is_help_option(const char* s) {
 910     return (s[0] == '-' && s[1] != 0) && (
 911         strcmp(s, "-h") == 0 ||
 912         strcmp(s, "--h") == 0 ||
 913         strcmp(s, "-help") == 0 ||
 914         strcmp(s, "--help") == 0
 915     );
 916 }
 917 
 918 bool is_jsonl_option(const char* s) {
 919     return (s[0] == '-' && s[1] != 0) && (
 920         strcmp(s, "-jl") == 0 ||
 921         strcmp(s, "--jl") == 0 ||
 922         strcmp(s, "-jsonl") == 0 ||
 923         strcmp(s, "--jsonl") == 0
 924     );
 925 }
 926 
 927 // run returns the error code
 928 int run(int argc, char** argv) {
 929     bool jsonl = false;
 930     if (argc > 1 && is_jsonl_option(argv[1])) {
 931         jsonl = true;
 932         argc--;
 933         argv++;
 934     }
 935 
 936     if (argc > 2) {
 937         const char* msg = "can't use more than 1 named input";
 938         fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg);
 939         return 1;
 940     }
 941 
 942     // use stdin when not given a filepath, or is `-`
 943     if (argc < 2 || argv[1][0] == 0 || strcmp(argv[1], "-") == 0) {
 944         handle_input(stdin, jsonl);
 945         return 0;
 946     }
 947 
 948     const char* path = argv[1];
 949     FILE* f = fopen(path, "rb");
 950     if (f == NULL) {
 951         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 952         return 1;
 953     }
 954 
 955     handle_input(f, jsonl);
 956     fclose(f);
 957 
 958     return 0;
 959 }
 960 
 961 int main(int argc, char** argv) {
 962 #ifdef _WIN32
 963     setmode(fileno(stdin), O_BINARY);
 964     // ensure output lines end in LF instead of CRLF on windows
 965     setmode(fileno(stdout), O_BINARY);
 966     setmode(fileno(stderr), O_BINARY);
 967 #endif
 968 
 969     if (argc > 1 && is_help_option(argv[1])) {
 970         puts(info);
 971         return 0;
 972     }
 973 
 974     // the dispatch table starts as all null function-pointers
 975     for (size_t i = 0; i < sizeof(dispatch) / sizeof(dispatch[0]); i++) {
 976         dispatch[i] = handle_invalid_token;
 977     }
 978 
 979     for (size_t i = '0'; i <= '9'; i++) {
 980         dispatch[i] = handle_number;
 981     }
 982 
 983     dispatch['n'] = handle_null;
 984     dispatch['t'] = handle_true;
 985     dispatch['f'] = handle_false;
 986     dispatch['N'] = handle_capital_none;
 987     dispatch['T'] = handle_capital_true;
 988     dispatch['F'] = handle_capital_false;
 989     dispatch['.'] = handle_dot;
 990     dispatch['+'] = handle_plus_number;
 991     dispatch['-'] = handle_minus_number;
 992     dispatch['"'] = handle_string;
 993     dispatch['\''] = handle_string;
 994     dispatch['['] = handle_array;
 995     dispatch['{'] = handle_object;
 996 
 997     // enable full/block-buffering for standard output
 998     setvbuf(stdout, NULL, _IOFBF, 0);
 999 
1000     return run(argc, argv) == 0 ? 0 : 1;
1001 }