File: j0.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./j0 ./j0.c
  29 */
  30 
  31 #include <ctype.h>
  32 #include <stdarg.h>
  33 #include <stdbool.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 
  38 #ifdef _WIN32
  39 #include <fcntl.h>
  40 #include <windows.h>
  41 #endif
  42 
  43 #ifdef RED_ERRORS
  44 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  45 #ifdef __APPLE__
  46 #define ERROR_STYLE "\x1b[31m"
  47 #endif
  48 #define RESET_STYLE "\x1b[0m"
  49 #else
  50 #define ERROR_STYLE
  51 #define RESET_STYLE
  52 #endif
  53 
  54 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  55 
  56 const char* info = ""
  57 "j0 [options...] [file...]\n"
  58 "\n"
  59 "\n"
  60 "Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output.\n"
  61 "Its output is always a single line, which ends with a line-feed.\n"
  62 "\n"
  63 "Besides minimizing bytes, this tool also adapts almost-JSON input into\n"
  64 "valid JSON, since it\n"
  65 "\n"
  66 "    - ignores both rest-of-line and multi-line comments\n"
  67 "    - ignores extra/trailing commas in arrays and objects\n"
  68 "    - turns single-quoted strings/keys into double-quoted strings\n"
  69 "    - double-quotes unquoted object keys\n"
  70 "    - changes \\x 2-hex-digit into \\u 4-hex-digit string-escapes\n"
  71 "\n"
  72 "All options available can either start with a single or a double-dash\n"
  73 "\n"
  74 "    -h        show this help message\n"
  75 "    -help     show this help message\n"
  76 "    -jsonl    emit JSON Lines, when top-level value is an array\n"
  77 "";
  78 
  79 typedef struct j0_maker {
  80     FILE* in;
  81     FILE* out;
  82 
  83     unsigned char* ibuf;
  84     size_t ilen; // how many bytes are being used in the input buffer
  85     size_t icap; // the input buffer's capacity
  86     size_t ipos; // the current position in the input buffer
  87 
  88     size_t line; // the current line, used to show useful error messages
  89     size_t pos;  // the position in the current line, for error messages
  90 
  91     unsigned char* obuf;
  92     size_t ocap; // the output buffer's capacity
  93     size_t opos; // the current position in the output buffer
  94 
  95     int current;
  96     int next;
  97 } j0_maker;
  98 
  99 // advance_reader_pos helps func read_byte do its job
 100 static inline void advance_reader_pos(j0_maker* r, unsigned char b) {
 101     r->ipos++;
 102     if (b == '\n') {
 103         r->line++;
 104         r->pos = 1;
 105     } else {
 106         r->pos++;
 107     }
 108 }
 109 
 110 // read_byte does as it says: check its return for the value EOF, before
 111 // using it as the next byte
 112 static inline int read_byte(j0_maker* r) {
 113     if (r->ipos < r->ilen) {
 114         // inside current chunk
 115         const unsigned char b = r->ibuf[r->ipos];
 116         advance_reader_pos(r, b);
 117         return b;
 118     }
 119 
 120     // need to read the next block
 121     r->ipos = 0;
 122     r->ilen = fread(r->ibuf, sizeof(unsigned char), r->icap, r->in);
 123     if (r->ilen > 0) {
 124         const unsigned char b = r->ibuf[r->ipos];
 125         advance_reader_pos(r, b);
 126         return b;
 127     }
 128 
 129     // reached the end of data
 130     return EOF;
 131 }
 132 
 133 // advance is used in most of the code, instead of calling read_byte directly
 134 static inline void advance(j0_maker* r) {
 135     r->current = r->next;
 136     r->next = read_byte(r);
 137 }
 138 
 139 void fail(j0_maker* m, int code, const char* msg);
 140 
 141 void skip_line(j0_maker* r) {
 142     while (true) {
 143         advance(r);
 144         const int lead = r->current;
 145 
 146         if (lead == EOF) {
 147             break;
 148         }
 149 
 150         if (lead == '\n') {
 151             advance(r);
 152             break;
 153         }
 154     }
 155 }
 156 
 157 void skip_multiline_comment(j0_maker* r) {
 158     unsigned char prev = 0;
 159 
 160     while (true) {
 161         advance(r);
 162         const int lead = r->current;
 163 
 164         if (lead == EOF) {
 165             break;
 166         }
 167 
 168         if (prev == '*' && lead == '/') {
 169             advance(r);
 170             break;
 171         }
 172 
 173         prev = (unsigned char)lead;
 174     }
 175 }
 176 
 177 void skip_comment(j0_maker* r) {
 178     int lead = r->current;
 179 
 180     if (lead == '#') {
 181         skip_line(r);
 182         return;
 183     }
 184 
 185     if (lead != '/') {
 186         fail(r, 1, "expected a slash to start comments");
 187     }
 188 
 189     advance(r);
 190     lead = r->current;
 191 
 192     if (lead == '/') {
 193         skip_line(r);
 194         return;
 195     }
 196 
 197     if (lead == '*') {
 198         skip_multiline_comment(r);
 199         return;
 200     }
 201 
 202     fail(r, 1, "expected `//` or `/*` to start comments");
 203 }
 204 
 205 void seek_token(j0_maker* r) {
 206     while (true) {
 207         const int lead = r->current;
 208 
 209         if (lead != EOF && lead <= ' ') {
 210             advance(r);
 211             continue;
 212         }
 213 
 214         if (lead == '/' || lead == '#') {
 215             skip_comment(r);
 216             continue;
 217         }
 218 
 219         break;
 220     }
 221 }
 222 
 223 bool starts_with_bom(const unsigned char* b, const size_t n) {
 224     return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf);
 225 }
 226 
 227 void restart_state(j0_maker* m, FILE* w, FILE* r) {
 228     m->in = r;
 229     m->ilen = 0;
 230     m->ipos = 0;
 231 
 232     m->out = w;
 233     m->opos = 0;
 234 
 235     m->line = 1;
 236     m->pos = 1;
 237 
 238     m->current = EOF;
 239     m->next = EOF;
 240 
 241     m->current = read_byte(m);
 242     if (m->current == EOF) {
 243         return;
 244     }
 245     m->next = read_byte(m);
 246 
 247     // skip leading UTF-8 BOM (byte-order mark), if present
 248     if (starts_with_bom(m->ibuf, m->ilen)) {
 249         // a UTF-8 BOM has 3 bytes
 250         for (size_t i = 0; i < 3 && m->current != EOF; i++) {
 251             advance(m);
 252         }
 253     }
 254 }
 255 
 256 void write_byte(j0_maker* m, unsigned char b) {
 257     if (m->opos < m->ocap) {
 258         m->obuf[m->opos++] = b;
 259         return;
 260     }
 261 
 262     fwrite(m->obuf, m->ocap, 1, m->out);
 263     m->obuf[0] = b;
 264     m->opos = 1;
 265 }
 266 
 267 // write_bytes does as it says, minimizing the number of calls to fwrite
 268 void write_bytes(j0_maker* m, const unsigned char* src, size_t len) {
 269     for (size_t i = 0; i < len; i++) {
 270         write_byte(m, src[i]);
 271     }
 272 }
 273 
 274 void flush(j0_maker* m) {
 275     if (m->opos > 0) {
 276         fwrite(m->obuf, m->opos, 1, m->out);
 277     }
 278     m->opos = 0;
 279     fflush(m->out);
 280 }
 281 
 282 // https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/
 283 
 284 bool check_2_byte_rune(int a, int b) {
 285     return (0xc2 <= a && a <= 0xdf) && (0x80 <= b && b <= 0xbf);
 286 }
 287 
 288 bool check_3_byte_rune(int a, int b, int c) {
 289     return (
 290         (a == 0xe0) &&
 291         (0xa0 <= b && b <= 0xbf) &&
 292         (0x80 <= c && c <= 0xbf)
 293     ) || (
 294         (0xe1 <= a && a <= 0xec) &&
 295         (0x80 <= b && b <= 0xbf) &&
 296         (0x80 <= c && c <= 0xbf)
 297     ) || (
 298         (a == 0xed) &&
 299         (0x80 <= b && b <= 0x9f) &&
 300         (0x80 <= c && c <= 0xbf)
 301     ) || (
 302         (a == 0xee || a == 0xef) &&
 303         (0x80 <= b && b <= 0xbf) &&
 304         (0x80 <= c && c <= 0xbf)
 305     );
 306 }
 307 
 308 bool check_4_byte_rune(int a, int b, int c, int d) {
 309     return (
 310         (a == 0xf0) &&
 311         (0x90 <= b && b <= 0xbf) &&
 312         (0x80 <= c && c <= 0xbf) &&
 313         (0x80 <= d && d <= 0xbf)
 314     ) || (
 315         (a == 0xf1 || a == 0xf3) &&
 316         (0x80 <= b && b <= 0xbf) &&
 317         (0x80 <= c && c <= 0xbf) &&
 318         (0x80 <= d && d <= 0xbf)
 319     ) || (
 320         (a == 0xf4) &&
 321         (0x80 <= b && b <= 0xbf) &&
 322         (0x80 <= c && c <= 0x8f) &&
 323         (0x80 <= d && d <= 0xbf)
 324     );
 325 }
 326 
 327 // write_replacement_char is the recommended action to handle invalid bytes
 328 void write_replacement_char(j0_maker* m) {
 329     write_byte(m, 0xef);
 330     write_byte(m, 0xbf);
 331     write_byte(m, 0xbd);
 332 }
 333 
 334 void handle_invalid_rune(j0_maker* m) {
 335     // fail(m, 1, "invalid unicode value");
 336     write_replacement_char(m);
 337 }
 338 
 339 void copy_utf8_rune(j0_maker* m) {
 340     const int a = m->current;
 341 
 342     if (a == EOF) {
 343         return;
 344     }
 345 
 346     // handle 1-byte runes
 347     if (a < 128) {
 348         write_byte(m, a);
 349         return;
 350     }
 351 
 352     advance(m);
 353     const int b = m->current;
 354 
 355     if (b == EOF) {
 356         handle_invalid_rune(m);
 357         return;
 358     }
 359 
 360     // handle 2-byte runes
 361     if (check_2_byte_rune(a, b)) {
 362         write_byte(m, a);
 363         write_byte(m, b);
 364         return;
 365     }
 366 
 367     advance(m);
 368     const int c = m->current;
 369 
 370     if (c == EOF) {
 371         handle_invalid_rune(m);
 372         return;
 373     }
 374 
 375     // handle 3-byte runes
 376     if (check_3_byte_rune(a, b, c)) {
 377         write_byte(m, a);
 378         write_byte(m, b);
 379         write_byte(m, c);
 380         return;
 381     }
 382 
 383     advance(m);
 384     const int d = m->current;
 385 
 386     if (d == EOF) {
 387         handle_invalid_rune(m);
 388         return;
 389     }
 390 
 391     // handle 4-byte runes
 392     if (check_4_byte_rune(a, b, c, d)) {
 393         write_byte(m, a);
 394         write_byte(m, b);
 395         write_byte(m, c);
 396         write_byte(m, d);
 397         return;
 398     }
 399 
 400     handle_invalid_rune(m);
 401 }
 402 
 403 // debug is available to diagnose any bug found
 404 void debug(j0_maker* m, const char* fmt, ...) {
 405     va_list args;
 406     va_start(args, fmt);
 407 
 408     if (m->in != stdin) {
 409         fclose(m->in);
 410     }
 411 
 412     write_byte(m, '\n');
 413 
 414     const unsigned long line = m->line;
 415     const unsigned long pos = m->pos;
 416     fprintf(stderr, "\x1b[46m\x1b[37mline %lu, pos %lu: ", line, pos);
 417     fprintf(stderr, fmt, args);
 418     fprintf(stderr, "\x1b[0m\n");
 419 
 420     va_end(args);
 421 
 422     exit(10);
 423 }
 424 
 425 // fail quits this app right after showing the error message given
 426 void fail(j0_maker* m, int code, const char* msg) {
 427     const unsigned long line = m->line;
 428     const unsigned long pos = m->pos;
 429 
 430     write_byte(m, '\n');
 431     flush(m);
 432     fprintf(stderr, ERROR_LINE("line %lu, pos %lu: %s"), line, pos, msg);
 433     exit(code);
 434 }
 435 
 436 bool demand_keyword(j0_maker* m, char* rest) {
 437     for (; rest[0] != 0; rest++) {
 438         const int lead = m->current;
 439         if (lead == EOF || lead != rest[0]) {
 440             return false;
 441         }
 442         advance(m);
 443     }
 444 
 445     return rest[0] == 0;
 446 }
 447 
 448 void handle_null(j0_maker* m) {
 449     if (!demand_keyword(m, "null")) {
 450         fail(m, 1, "expected `null` keyword");
 451     }
 452     write_bytes(m, (unsigned char*)"null", 4);
 453 }
 454 
 455 void handle_true(j0_maker* m) {
 456     if (!demand_keyword(m, "true")) {
 457         fail(m, 1, "expected `true` keyword");
 458     }
 459     write_bytes(m, (unsigned char*)"true", 4);
 460 }
 461 
 462 void handle_false(j0_maker* m) {
 463     if (!demand_keyword(m, "false")) {
 464         fail(m, 1, "expected `false` keyword");
 465     }
 466     write_bytes(m, (unsigned char*)"false", 5);
 467 }
 468 
 469 void handle_capital_none(j0_maker* m) {
 470     if (!demand_keyword(m, "None")) {
 471         fail(m, 1, "expected `None` keyword");
 472     }
 473     write_bytes(m, (unsigned char*)"null", 4);
 474 }
 475 
 476 void handle_capital_true(j0_maker* m) {
 477     if (!demand_keyword(m, "True")) {
 478         fail(m, 1, "expected `True` keyword");
 479     }
 480     write_bytes(m, (unsigned char*)"true", 4);
 481 }
 482 
 483 void handle_capital_false(j0_maker* m) {
 484     if (!demand_keyword(m, "False")) {
 485         fail(m, 1, "expected `False` keyword");
 486     }
 487     write_bytes(m, (unsigned char*)"false", 5);
 488 }
 489 
 490 void handle_digits(j0_maker* m) {
 491     if (!isdigit(m->current)) {
 492         fail(m, 1, "expected/missing digits");
 493     }
 494 
 495     while (isdigit(m->current)) {
 496         write_byte(m, m->current);
 497         advance(m);
 498     }
 499 }
 500 
 501 void handle_number(j0_maker* m) {
 502     handle_digits(m);
 503 
 504     const int lead = m->current;
 505 
 506     if (lead == '.') {
 507         write_byte(m, '.');
 508         advance(m);
 509 
 510         if (isdigit(m->current)) {
 511             handle_digits(m);
 512         } else {
 513             write_byte(m, '0');
 514         }
 515         return;
 516     }
 517 
 518     if (lead == 'e' || lead == 'E') {
 519         write_byte(m, lead);
 520         advance(m);
 521 
 522         if (m->current == '+') {
 523             advance(m);
 524         } else if (m->current == '-') {
 525             write_byte(m, '-');
 526             advance(m);
 527         }
 528 
 529         handle_digits(m);
 530     }
 531 }
 532 
 533 void handle_dot(j0_maker* m) {
 534     write_byte(m, '0');
 535     write_byte(m, '.');
 536     advance(m);
 537 
 538     if (!isdigit(m->current)) {
 539         fail(m, 1, "expected/missing digits after decimal dot");
 540     }
 541     handle_digits(m);
 542 }
 543 
 544 void handle_plus_number(j0_maker* m) {
 545     advance(m);
 546 
 547     if (m->current == '.') {
 548         handle_dot(m);
 549         return;
 550     }
 551     handle_number(m);
 552 }
 553 
 554 void handle_minus_number(j0_maker* m) {
 555     write_byte(m, '-');
 556     advance(m);
 557 
 558     if (m->current == '.') {
 559         handle_dot(m);
 560         return;
 561     }
 562     handle_number(m);
 563 }
 564 
 565 void handle_string_escape(j0_maker* m, int c) {
 566     switch (c) {
 567         case '"':
 568         case '\\':
 569         case 'b':
 570         case 'f':
 571         case 'n':
 572         case 'r':
 573         case 't':
 574             write_byte(m, '\\');
 575             write_byte(m, c);
 576             break;
 577 
 578         case 'u':
 579             write_byte(m, '\\');
 580             write_byte(m, 'u');
 581             for (size_t i = 0; i < 4; i++) {
 582                 advance(m);
 583                 const int lead = m->current;
 584                 if (lead == EOF) {
 585                     fail(m, 1, "end of input before end of string");
 586                 }
 587                 if (isdigit(lead) || isalpha(lead)) {
 588                     // write_byte(m, toupper(c));
 589                     write_byte(m, c);
 590                     continue;
 591                 }
 592                 fail(m, 1, "invalid hexadecimal digit in string");
 593             }
 594             break;
 595 
 596         case 'x':
 597             write_byte(m, '\\');
 598             write_byte(m, 'u');
 599             write_byte(m, '0');
 600             write_byte(m, '0');
 601             for (size_t i = 0; i < 2; i++) {
 602                 advance(m);
 603                 const int lead = m->current;
 604                 if (lead == EOF) {
 605                     fail(m, 1, "end of input before end of string");
 606                 }
 607                 if (isdigit(lead) || isalpha(lead)) {
 608                     // write_byte(m, toupper(c));
 609                     write_byte(m, c);
 610                     continue;
 611                 }
 612                 fail(m, 1, "invalid hexadecimal digit in string");
 613             }
 614             break;
 615 
 616         case '\'':
 617             write_byte(m, '\'');
 618             break;
 619 
 620         default:
 621             write_byte(m, m->current);
 622             break;
 623     }
 624 }
 625 
 626 // hex is only used by function handle_low_char to render hexadecimals
 627 const char* hex = "0123456789ABCDEF";
 628 
 629 // handle_low_char simplifies function handle_string
 630 void handle_low_char(j0_maker* m, int c) {
 631     switch (c) {
 632         case '\t':
 633             write_byte(m, '\\');
 634             write_byte(m, 't');
 635             break;
 636 
 637         case '\n':
 638             write_byte(m, '\\');
 639             write_byte(m, 'n');
 640             break;
 641 
 642         case '\r':
 643             write_byte(m, '\\');
 644             write_byte(m, 'r');
 645             break;
 646 
 647         case '\v':
 648             write_byte(m, '\\');
 649             write_byte(m, 'v');
 650             break;
 651 
 652         default:
 653             write_byte(m, '\\');
 654             write_byte(m, 'u');
 655             write_byte(m, '0');
 656             write_byte(m, '0');
 657             write_byte(m, hex[c / 16]);
 658             write_byte(m, hex[c % 16]);
 659             break;
 660     }
 661 }
 662 
 663 void handle_string(j0_maker* m) {
 664     const unsigned char quote = m->current;
 665     bool escaped = false;
 666 
 667     write_byte(m, '"');
 668 
 669     while (true) {
 670         advance(m);
 671 
 672         int c = m->current;
 673         if (c == EOF) {
 674             fail(m, 1, "input ended before string was close-quoted");
 675         }
 676 
 677         if (escaped) {
 678             handle_string_escape(m, c);
 679             escaped = false;
 680             continue;
 681         }
 682 
 683         switch (c) {
 684             case '\\':
 685                 escaped = true;
 686                 break;
 687 
 688             default:
 689                 if (c == quote) {
 690                     write_byte(m, '"');
 691                     advance(m);
 692                     return;
 693                 }
 694 
 695                 // write_byte(m, c);
 696                 if (c < ' ') {
 697                     handle_low_char(m, c);
 698                 } else {
 699                     copy_utf8_rune(m);
 700                 }
 701                 break;
 702         }
 703     }
 704 }
 705 
 706 void handle_token(j0_maker* m);
 707 
 708 void handle_array(j0_maker* m) {
 709     size_t items_before = 0;
 710     write_byte(m, '[');
 711     advance(m);
 712 
 713     while (true) {
 714         seek_token(m);
 715         const int lead = m->current;
 716 
 717         if (lead == EOF) {
 718             fail(m, 1, "unclosed array");
 719         }
 720 
 721         if (lead == ',') {
 722             advance(m);
 723             continue;
 724         }
 725 
 726         if (lead == ']') {
 727             write_byte(m, ']');
 728             advance(m);
 729             return;
 730         }
 731 
 732         if (items_before > 0) {
 733             write_byte(m, ',');
 734         }
 735         handle_token(m);
 736         items_before++;
 737     }
 738 }
 739 
 740 // handle_array_jsonl is a slight variation of func handle_array: this one is
 741 // used to handle top-level arrays when running in JSON Lines mode, to emit
 742 // line-feeds after each item, instead of commas between them
 743 void handle_array_jsonl(j0_maker* m) {
 744     size_t items_before = 0;
 745     advance(m);
 746 
 747     while (true) {
 748         seek_token(m);
 749         const int lead = m->current;
 750 
 751         if (lead == EOF) {
 752             fail(m, 1, "unclosed array");
 753         }
 754 
 755         if (lead == ',') {
 756             advance(m);
 757             continue;
 758         }
 759 
 760         if (items_before > 0) {
 761             write_byte(m, '\n');
 762         }
 763 
 764         if (lead == ']') {
 765             advance(m);
 766             return;
 767         }
 768 
 769         handle_token(m);
 770         items_before++;
 771     }
 772 }
 773 
 774 void handle_unquoted_key(j0_maker* m) {
 775     write_byte(m, '"');
 776 
 777     while (true) {
 778         int c = m->current;
 779         if (c == EOF) {
 780             fail(m, 1, "input ended with an object key");
 781         }
 782 
 783         write_byte(m, c);
 784         advance(m);
 785 
 786         c = m->current;
 787         if (!isalpha(c) && !isdigit(c) && c != '_') {
 788             break;
 789         }
 790     }
 791 
 792     write_byte(m, '"');
 793 }
 794 
 795 void handle_object(j0_maker* m) {
 796     size_t items_before = 0;
 797     write_byte(m, '{');
 798     advance(m);
 799 
 800     while (true) {
 801         seek_token(m);
 802         int lead = m->current;
 803 
 804         if (lead == EOF) {
 805             fail(m, 1, "unclosed object");
 806         }
 807 
 808         if (lead == ',') {
 809             advance(m);
 810             continue;
 811         }
 812 
 813         if (lead == '}') {
 814             write_byte(m, '}');
 815             advance(m);
 816             return;
 817         }
 818 
 819         if (lead == '"' || lead == '\'') {
 820             if (items_before > 0) {
 821                 write_byte(m, ',');
 822             }
 823             handle_string(m);
 824             items_before++;
 825         } else if (isalpha(lead) || lead == '_') {
 826             if (items_before > 0) {
 827                 write_byte(m, ',');
 828             }
 829             handle_unquoted_key(m);
 830             items_before++;
 831         } else {
 832             fail(m, 1, "only strings or identifiers can be object keys");
 833         }
 834 
 835         seek_token(m);
 836         lead = m->current;
 837 
 838         if (lead == EOF) {
 839             fail(m, 1, "input ended after object-key and before value");
 840         }
 841 
 842         if (lead != ':') {
 843             fail(m, 1, "a `:` must follow all object keys");
 844         }
 845 
 846         write_byte(m, ':');
 847         advance(m);
 848 
 849         seek_token(m);
 850         if (m->current == EOF) {
 851             fail(m, 1, "input ended after a `:` following an object-key");
 852         }
 853 
 854         handle_token(m);
 855     }
 856 }
 857 
 858 // dispatch ties leading bytes/chars in tokens to the funcs which handle them
 859 void (*dispatch[256])() = {
 860     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 861     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 862     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 863     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 864     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 865     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 866     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 867     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 868     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 869     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 870     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 871     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 872     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 873     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 874     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 875     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 876     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 877     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 878     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 879     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 880     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 881     NULL, NULL, NULL, NULL,
 882 };
 883 
 884 void handle_token(j0_maker* m) {
 885     dispatch[m->current](m);
 886 }
 887 
 888 // handle_invalid_token shows an error message and quits the app right after
 889 void handle_invalid_token(j0_maker* m) {
 890     char msg[64];
 891     unsigned char c = (unsigned char)m->current;
 892     sprintf(msg, "%c (%d): invalid token", c, c);
 893     fail(m, 1, msg);
 894 }
 895 
 896 void handle_array_jsonl(j0_maker* m);
 897 
 898 void handle_input(FILE* src, bool jsonl) {
 899     unsigned char ibuf[32 * 1024];
 900     unsigned char obuf[8 * 1024];
 901 
 902     j0_maker m;
 903     m.ibuf = ibuf;
 904     m.icap = sizeof(ibuf);
 905     m.obuf = obuf;
 906     m.ocap = sizeof(obuf);
 907     restart_state(&m, stdout, src);
 908 
 909     // ignore leading whitespace/comment bytes, if present
 910     seek_token(&m);
 911 
 912     if (m.current == EOF) {
 913         fail(&m, 1, "empty input isn't valid JSON");
 914     }
 915 
 916     if (jsonl && m.current == '[') {
 917         handle_array_jsonl(&m);
 918     } else {
 919         handle_token(&m);
 920         write_byte(&m, '\n');
 921     }
 922     flush(&m);
 923 
 924     // ignore trailing whitespace/comment bytes, if present
 925     seek_token(&m);
 926 
 927     // ignore trailing semicolon, if present
 928     if (m.current == ';') {
 929         advance(&m);
 930         // ignore trailing whitespace/comment bytes, if present
 931         seek_token(&m);
 932     }
 933 
 934     if (!feof(src) || m.current != EOF) {
 935         fail(&m, 1, "unexpected trailing JSON data");
 936     }
 937 }
 938 
 939 bool is_help_option(const char* s) {
 940     return (s[0] == '-' && s[1] != 0) && (
 941         strcmp(s, "-h") == 0 ||
 942         strcmp(s, "--h") == 0 ||
 943         strcmp(s, "-help") == 0 ||
 944         strcmp(s, "--help") == 0
 945     );
 946 }
 947 
 948 bool is_jsonl_option(const char* s) {
 949     return (s[0] == '-' && s[1] != 0) && (
 950         strcmp(s, "-jl") == 0 ||
 951         strcmp(s, "--jl") == 0 ||
 952         strcmp(s, "-jsonl") == 0 ||
 953         strcmp(s, "--jsonl") == 0
 954     );
 955 }
 956 
 957 // run returns the error code
 958 int run(int argc, char** argv) {
 959     bool jsonl = false;
 960     if (argc > 1 && is_jsonl_option(argv[1])) {
 961         jsonl = true;
 962         argc--;
 963         argv++;
 964     }
 965 
 966     if (argc > 2) {
 967         const char* msg = "can't use more than 1 named input";
 968         fprintf(stderr, ERROR_LINE("%s"), msg);
 969         return 1;
 970     }
 971 
 972     // use stdin when not given a filepath, or is `-`
 973     if (argc < 2 || argv[1][0] == 0 || strcmp(argv[1], "-") == 0) {
 974         handle_input(stdin, jsonl);
 975         return 0;
 976     }
 977 
 978     const char* path = argv[1];
 979     FILE* f = fopen(path, "rb");
 980     if (f == NULL) {
 981         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 982         return 1;
 983     }
 984 
 985     handle_input(f, jsonl);
 986     fclose(f);
 987 
 988     return 0;
 989 }
 990 
 991 int main(int argc, char** argv) {
 992 #ifdef _WIN32
 993     setmode(fileno(stdin), O_BINARY);
 994     // ensure output lines end in LF instead of CRLF on windows
 995     setmode(fileno(stdout), O_BINARY);
 996     setmode(fileno(stderr), O_BINARY);
 997 #endif
 998 
 999     if (argc > 1 && is_help_option(argv[1])) {
1000         printf("%s", info);
1001         return 0;
1002     }
1003 
1004     // the dispatch table starts as all null function-pointers
1005     for (size_t i = 0; i < sizeof(dispatch) / sizeof(dispatch[0]); i++) {
1006         dispatch[i] = handle_invalid_token;
1007     }
1008 
1009     for (size_t i = '0'; i <= '9'; i++) {
1010         dispatch[i] = handle_number;
1011     }
1012 
1013     dispatch['n'] = handle_null;
1014     dispatch['t'] = handle_true;
1015     dispatch['f'] = handle_false;
1016     dispatch['N'] = handle_capital_none;
1017     dispatch['T'] = handle_capital_true;
1018     dispatch['F'] = handle_capital_false;
1019     dispatch['.'] = handle_dot;
1020     dispatch['+'] = handle_plus_number;
1021     dispatch['-'] = handle_minus_number;
1022     dispatch['"'] = handle_string;
1023     dispatch['\''] = handle_string;
1024     dispatch['['] = handle_array;
1025     dispatch['{'] = handle_object;
1026 
1027     // enable full/block-buffering for standard output
1028     // setvbuf(stdout, NULL, _IOFBF, 0);
1029 
1030     return run(argc, argv) == 0 ? 0 : 1;
1031 }