File: j0.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./j0 ./j0.c
  29 */
  30 
  31 #include <ctype.h>
  32 #include <stdarg.h>
  33 #include <stdbool.h>
  34 #include <stdint.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <string.h>
  38 
  39 #ifdef _WIN32
  40 #include <fcntl.h>
  41 #include <windows.h>
  42 #endif
  43 
  44 #ifdef RED_ERRORS
  45 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  46 #ifdef __APPLE__
  47 #define ERROR_STYLE "\x1b[31m"
  48 #endif
  49 #define RESET_STYLE "\x1b[0m"
  50 #else
  51 #define ERROR_STYLE
  52 #define RESET_STYLE
  53 #endif
  54 
  55 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  56 
  57 #ifndef IBUF_SIZE
  58 #define IBUF_SIZE (32 * 1024)
  59 #endif
  60 
  61 #ifndef OBUF_SIZE
  62 #define OBUF_SIZE (8 * 1024)
  63 #endif
  64 
  65 const char* info = ""
  66 "j0 [options...] [file...]\n"
  67 "\n"
  68 "\n"
  69 "Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output.\n"
  70 "Its output is always a single line, which ends with a line-feed.\n"
  71 "\n"
  72 "Besides minimizing bytes, this tool also adapts almost-JSON input into\n"
  73 "valid JSON, since it\n"
  74 "\n"
  75 "    - ignores both rest-of-line and multi-line comments\n"
  76 "    - ignores extra/trailing commas in arrays and objects\n"
  77 "    - turns single-quoted strings/keys into double-quoted strings\n"
  78 "    - double-quotes unquoted object keys\n"
  79 "    - changes \\x 2-hex-digit into \\u 4-hex-digit string-escapes\n"
  80 "\n"
  81 "All options available can either start with a single or a double-dash\n"
  82 "\n"
  83 "    -h        show this help message\n"
  84 "    -help     show this help message\n"
  85 "    -jsonl    emit JSON Lines, when top-level value is an array\n"
  86 "";
  87 
  88 typedef struct j0_maker {
  89     FILE* in;
  90     FILE* out;
  91 
  92     unsigned char* ibuf;
  93     size_t ilen; // how many bytes are being used in the input buffer
  94     size_t icap; // the input buffer's capacity
  95     size_t ipos; // the current position in the input buffer
  96 
  97     size_t line; // the current line, used to show useful error messages
  98     size_t pos;  // the position in the current line, for error messages
  99 
 100     unsigned char* obuf;
 101     size_t ocap; // the output buffer's capacity
 102     size_t opos; // the current position in the output buffer
 103 
 104     int current;
 105     int next;
 106 } j0_maker;
 107 
 108 // advance_reader_pos helps func read_byte do its job
 109 static inline void advance_reader_pos(j0_maker* r, unsigned char b) {
 110     r->ipos++;
 111     if (b == '\n') {
 112         r->line++;
 113         r->pos = 1;
 114     } else {
 115         r->pos++;
 116     }
 117 }
 118 
 119 // read_byte does as it says: check its return for the value EOF, before
 120 // using it as the next byte
 121 static inline int read_byte(j0_maker* r) {
 122     if (r->ipos < r->ilen) {
 123         // inside current chunk
 124         const unsigned char b = r->ibuf[r->ipos];
 125         advance_reader_pos(r, b);
 126         return b;
 127     }
 128 
 129     // need to read the next block
 130     r->ipos = 0;
 131     r->ilen = fread(r->ibuf, sizeof(unsigned char), r->icap, r->in);
 132     if (r->ilen > 0) {
 133         const unsigned char b = r->ibuf[r->ipos];
 134         advance_reader_pos(r, b);
 135         return b;
 136     }
 137 
 138     // reached the end of data
 139     return EOF;
 140 }
 141 
 142 // advance is used in most of the code, instead of calling read_byte directly
 143 static inline void advance(j0_maker* r) {
 144     r->current = r->next;
 145     r->next = read_byte(r);
 146 }
 147 
 148 void fail(j0_maker* m, int code, const char* msg);
 149 
 150 void skip_line(j0_maker* r) {
 151     while (true) {
 152         advance(r);
 153         const int lead = r->current;
 154 
 155         if (lead == EOF) {
 156             break;
 157         }
 158 
 159         if (lead == '\n') {
 160             advance(r);
 161             break;
 162         }
 163     }
 164 }
 165 
 166 void skip_multiline_comment(j0_maker* r) {
 167     unsigned char prev = 0;
 168 
 169     while (true) {
 170         advance(r);
 171         const int lead = r->current;
 172 
 173         if (lead == EOF) {
 174             break;
 175         }
 176 
 177         if (prev == '*' && lead == '/') {
 178             advance(r);
 179             break;
 180         }
 181 
 182         prev = (unsigned char)lead;
 183     }
 184 }
 185 
 186 void skip_comment(j0_maker* r) {
 187     int lead = r->current;
 188 
 189     if (lead == '#') {
 190         skip_line(r);
 191         return;
 192     }
 193 
 194     if (lead != '/') {
 195         fail(r, 1, "expected a slash to start comments");
 196     }
 197 
 198     advance(r);
 199     lead = r->current;
 200 
 201     if (lead == '/') {
 202         skip_line(r);
 203         return;
 204     }
 205 
 206     if (lead == '*') {
 207         skip_multiline_comment(r);
 208         return;
 209     }
 210 
 211     fail(r, 1, "expected `//` or `/*` to start comments");
 212 }
 213 
 214 void seek_token(j0_maker* r) {
 215     while (true) {
 216         const int lead = r->current;
 217 
 218         if (lead != EOF && lead <= ' ') {
 219             advance(r);
 220             continue;
 221         }
 222 
 223         if (lead == '/' || lead == '#') {
 224             skip_comment(r);
 225             continue;
 226         }
 227 
 228         break;
 229     }
 230 }
 231 
 232 bool starts_with_bom(const unsigned char* b, const size_t n) {
 233     return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf);
 234 }
 235 
 236 void restart_state(j0_maker* m, FILE* w, FILE* r) {
 237     m->in = r;
 238     m->ilen = 0;
 239     m->ipos = 0;
 240 
 241     m->out = w;
 242     m->opos = 0;
 243 
 244     m->line = 1;
 245     m->pos = 1;
 246 
 247     m->current = EOF;
 248     m->next = EOF;
 249 
 250     m->current = read_byte(m);
 251     if (m->current == EOF) {
 252         return;
 253     }
 254     m->next = read_byte(m);
 255 
 256     // skip leading UTF-8 BOM (byte-order mark), if present
 257     if (starts_with_bom(m->ibuf, m->ilen)) {
 258         // a UTF-8 BOM has 3 bytes
 259         for (size_t i = 0; i < 3 && m->current != EOF; i++) {
 260             advance(m);
 261         }
 262     }
 263 }
 264 
 265 void write_byte(j0_maker* m, unsigned char b) {
 266     if (m->opos < m->ocap) {
 267         m->obuf[m->opos++] = b;
 268         return;
 269     }
 270 
 271     fwrite(m->obuf, m->ocap, 1, m->out);
 272     m->obuf[0] = b;
 273     m->opos = 1;
 274 }
 275 
 276 // write_bytes does as it says, minimizing the number of calls to fwrite
 277 void write_bytes(j0_maker* m, const unsigned char* src, size_t len) {
 278     const size_t rem = m->ocap - m->opos;
 279     if (len < rem) {
 280         memcpy(m->obuf + m->opos, src, len);
 281         m->opos += len;
 282         return;
 283     }
 284 
 285     for (size_t i = 0; i < len; i++) {
 286         write_byte(m, src[i]);
 287     }
 288 }
 289 
 290 void flush(j0_maker* m) {
 291     if (m->opos > 0) {
 292         fwrite(m->obuf, m->opos, 1, m->out);
 293     }
 294     m->opos = 0;
 295     fflush(m->out);
 296 }
 297 
 298 // https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/
 299 
 300 static inline bool check_2_byte_rune(int a, int b) {
 301     return (0xc2 <= a && a <= 0xdf) && (0x80 <= b && b <= 0xbf);
 302 }
 303 
 304 bool check_3_byte_rune(int a, int b, int c) {
 305     return (
 306         (a == 0xe0) &&
 307         (0xa0 <= b && b <= 0xbf) &&
 308         (0x80 <= c && c <= 0xbf)
 309     ) || (
 310         (0xe1 <= a && a <= 0xec) &&
 311         (0x80 <= b && b <= 0xbf) &&
 312         (0x80 <= c && c <= 0xbf)
 313     ) || (
 314         (a == 0xed) &&
 315         (0x80 <= b && b <= 0x9f) &&
 316         (0x80 <= c && c <= 0xbf)
 317     ) || (
 318         (a == 0xee || a == 0xef) &&
 319         (0x80 <= b && b <= 0xbf) &&
 320         (0x80 <= c && c <= 0xbf)
 321     );
 322 }
 323 
 324 bool check_4_byte_rune(int a, int b, int c, int d) {
 325     return (
 326         (a == 0xf0) &&
 327         (0x90 <= b && b <= 0xbf) &&
 328         (0x80 <= c && c <= 0xbf) &&
 329         (0x80 <= d && d <= 0xbf)
 330     ) || (
 331         (a == 0xf1 || a == 0xf3) &&
 332         (0x80 <= b && b <= 0xbf) &&
 333         (0x80 <= c && c <= 0xbf) &&
 334         (0x80 <= d && d <= 0xbf)
 335     ) || (
 336         (a == 0xf4) &&
 337         (0x80 <= b && b <= 0xbf) &&
 338         (0x80 <= c && c <= 0x8f) &&
 339         (0x80 <= d && d <= 0xbf)
 340     );
 341 }
 342 
 343 // write_replacement_char is the recommended action to handle invalid bytes
 344 void write_replacement_char(j0_maker* m) {
 345     write_byte(m, 0xef);
 346     write_byte(m, 0xbf);
 347     write_byte(m, 0xbd);
 348 }
 349 
 350 void handle_invalid_rune(j0_maker* m) {
 351     // fail(m, 1, "invalid unicode value");
 352     write_replacement_char(m);
 353 }
 354 
 355 // write_rune is following the table at https://en.wikipedia.org/wiki/UTF-8
 356 void write_rune(j0_maker* m, uint32_t rune) {
 357     if (rune < (1 << 7)) {
 358         write_byte(m, rune);
 359         return;
 360     }
 361 
 362     if (rune < (1 << (5 + 6))) {
 363         const int a = 0b11000000 | (rune >> 6);
 364         const int b = 0b10000000 | (rune & 0b00111111);
 365         if (check_2_byte_rune(a, b)) {
 366             write_byte(m, a);
 367             write_byte(m, b);
 368         } else {
 369             write_replacement_char(m);
 370         }
 371         return;
 372     }
 373 
 374     if (rune < (1 << (4 + 6 + 6))) {
 375         const int a = 0b11100000 | (rune >> 12);
 376         const int b = 0b10000000 | ((rune >> 6) & 0b00111111);
 377         const int c = 0b10000000 | (rune & 0b00111111);
 378         if (check_3_byte_rune(a, b, c)) {
 379             write_byte(m, a);
 380             write_byte(m, b);
 381             write_byte(m, c);
 382         } else {
 383             write_replacement_char(m);
 384         }
 385         return;
 386     }
 387 
 388     if (rune < (1 << (3 + 6 + 6 + 6))) {
 389         const int a = 0b11110000 | (rune >> 18);
 390         const int b = 0b10000000 | ((rune >> 12) & 0b00111111);
 391         const int c = 0b10000000 | ((rune >> 6) & 0b00111111);
 392         const int d = 0b10000000 | (rune & 0b00111111);
 393         if (check_4_byte_rune(a, b, c, d)) {
 394             write_byte(m, a);
 395             write_byte(m, b);
 396             write_byte(m, c);
 397             write_byte(m, d);
 398         } else {
 399             write_replacement_char(m);
 400         }
 401         return;
 402     }
 403 
 404     write_replacement_char(m);
 405 }
 406 
 407 void copy_utf8_rune(j0_maker* m) {
 408     const int a = m->current;
 409 
 410     if (a == EOF) {
 411         return;
 412     }
 413 
 414     // handle 1-byte runes
 415     if (a < 128) {
 416         write_byte(m, a);
 417         return;
 418     }
 419 
 420     advance(m);
 421     const int b = m->current;
 422 
 423     if (b == EOF) {
 424         handle_invalid_rune(m);
 425         return;
 426     }
 427 
 428     // handle 2-byte runes
 429     if (check_2_byte_rune(a, b)) {
 430         write_byte(m, a);
 431         write_byte(m, b);
 432         return;
 433     }
 434 
 435     advance(m);
 436     const int c = m->current;
 437 
 438     if (c == EOF) {
 439         handle_invalid_rune(m);
 440         return;
 441     }
 442 
 443     // handle 3-byte runes
 444     if (check_3_byte_rune(a, b, c)) {
 445         write_byte(m, a);
 446         write_byte(m, b);
 447         write_byte(m, c);
 448         return;
 449     }
 450 
 451     advance(m);
 452     const int d = m->current;
 453 
 454     if (d == EOF) {
 455         handle_invalid_rune(m);
 456         return;
 457     }
 458 
 459     // handle 4-byte runes
 460     if (check_4_byte_rune(a, b, c, d)) {
 461         write_byte(m, a);
 462         write_byte(m, b);
 463         write_byte(m, c);
 464         write_byte(m, d);
 465         return;
 466     }
 467 
 468     handle_invalid_rune(m);
 469 }
 470 
 471 // debug is available to diagnose any bug found
 472 void debug(j0_maker* m, const char* fmt, ...) {
 473     va_list args;
 474     va_start(args, fmt);
 475 
 476     if (m->in != stdin) {
 477         fclose(m->in);
 478     }
 479 
 480     write_byte(m, '\n');
 481 
 482     const unsigned long line = m->line;
 483     const unsigned long pos = m->pos;
 484     fprintf(stderr, "\x1b[46m\x1b[37mline %lu, pos %lu: ", line, pos);
 485     fprintf(stderr, fmt, args);
 486     fprintf(stderr, "\x1b[0m\n");
 487 
 488     va_end(args);
 489 
 490     exit(10);
 491 }
 492 
 493 // fail quits this app right after showing the error message given
 494 void fail(j0_maker* m, int code, const char* msg) {
 495     const unsigned long line = m->line;
 496     const unsigned long pos = m->pos;
 497 
 498     write_byte(m, '\n');
 499     flush(m);
 500     fprintf(stderr, ERROR_LINE("line %lu, pos %lu: %s"), line, pos, msg);
 501     exit(code);
 502 }
 503 
 504 bool demand_keyword(j0_maker* m, char* rest) {
 505     for (; rest[0] != 0; rest++) {
 506         const int lead = m->current;
 507         if (lead == EOF || lead != rest[0]) {
 508             return false;
 509         }
 510         advance(m);
 511     }
 512 
 513     return rest[0] == 0;
 514 }
 515 
 516 void handle_null(j0_maker* m) {
 517     if (!demand_keyword(m, "null")) {
 518         fail(m, 1, "expected `null` keyword");
 519     }
 520     write_bytes(m, (unsigned char*)"null", 4);
 521 }
 522 
 523 void handle_true(j0_maker* m) {
 524     if (!demand_keyword(m, "true")) {
 525         fail(m, 1, "expected `true` keyword");
 526     }
 527     write_bytes(m, (unsigned char*)"true", 4);
 528 }
 529 
 530 void handle_false(j0_maker* m) {
 531     if (!demand_keyword(m, "false")) {
 532         fail(m, 1, "expected `false` keyword");
 533     }
 534     write_bytes(m, (unsigned char*)"false", 5);
 535 }
 536 
 537 void handle_capital_none(j0_maker* m) {
 538     if (!demand_keyword(m, "None")) {
 539         fail(m, 1, "expected `None` keyword");
 540     }
 541     write_bytes(m, (unsigned char*)"null", 4);
 542 }
 543 
 544 void handle_capital_true(j0_maker* m) {
 545     if (!demand_keyword(m, "True")) {
 546         fail(m, 1, "expected `True` keyword");
 547     }
 548     write_bytes(m, (unsigned char*)"true", 4);
 549 }
 550 
 551 void handle_capital_false(j0_maker* m) {
 552     if (!demand_keyword(m, "False")) {
 553         fail(m, 1, "expected `False` keyword");
 554     }
 555     write_bytes(m, (unsigned char*)"false", 5);
 556 }
 557 
 558 void handle_digits(j0_maker* m) {
 559     if (!isdigit(m->current)) {
 560         fail(m, 1, "expected/missing digits");
 561     }
 562 
 563     while (isdigit(m->current)) {
 564         write_byte(m, m->current);
 565         advance(m);
 566     }
 567 }
 568 
 569 void handle_number(j0_maker* m) {
 570     handle_digits(m);
 571 
 572     const int lead = m->current;
 573 
 574     if (lead == '.') {
 575         write_byte(m, '.');
 576         advance(m);
 577 
 578         if (isdigit(m->current)) {
 579             handle_digits(m);
 580         } else {
 581             write_byte(m, '0');
 582         }
 583         return;
 584     }
 585 
 586     if (lead == 'e' || lead == 'E') {
 587         write_byte(m, lead);
 588         advance(m);
 589 
 590         if (m->current == '+') {
 591             advance(m);
 592         } else if (m->current == '-') {
 593             write_byte(m, '-');
 594             advance(m);
 595         }
 596 
 597         handle_digits(m);
 598     }
 599 }
 600 
 601 void handle_dot(j0_maker* m) {
 602     write_byte(m, '0');
 603     write_byte(m, '.');
 604     advance(m);
 605 
 606     if (!isdigit(m->current)) {
 607         fail(m, 1, "expected/missing digits after decimal dot");
 608     }
 609     handle_digits(m);
 610 }
 611 
 612 void handle_plus_number(j0_maker* m) {
 613     advance(m);
 614 
 615     if (m->current == '.') {
 616         handle_dot(m);
 617         return;
 618     }
 619     handle_number(m);
 620 }
 621 
 622 void handle_minus_number(j0_maker* m) {
 623     write_byte(m, '-');
 624     advance(m);
 625 
 626     if (m->current == '.') {
 627         handle_dot(m);
 628         return;
 629     }
 630     handle_number(m);
 631 }
 632 
 633 // decode_hex assumes valid hex digits, checked by func is_valid_hex
 634 uint32_t decode_hex(unsigned char hex) {
 635     if ('0' <= hex && hex <= '9') {
 636         return hex - '0';
 637     }
 638     if ('A' <= hex && hex <= 'F') {
 639         return hex - 'A' + 10;
 640     }
 641     if ('a' <= hex && hex <= 'f') {
 642         return hex - 'a' + 10;
 643     }
 644     return 0xffff;
 645 }
 646 
 647 static inline bool is_valid_hex(unsigned char b) {
 648     return false ||
 649         ('0' <= b && b <= '9') ||
 650         ('A' <= b && b <= 'F') ||
 651         ('a' <= b && b <= 'f');
 652 }
 653 
 654 void handle_hex_quad(j0_maker* m) {
 655     unsigned char quad[4];
 656     for (size_t i = 0; i < 4; i++) {
 657         advance(m);
 658         const int lead = m->current;
 659         if (lead == EOF) {
 660             fail(m, 1, "end of input before end of string");
 661         }
 662         if (is_valid_hex(lead)) {
 663             quad[i] = lead;
 664             continue;
 665         }
 666         fail(m, 1, "invalid hexadecimal digit in string");
 667     }
 668 
 669     const uint32_t n = 0 +
 670         (decode_hex(quad[0]) << 12) +
 671         (decode_hex(quad[1]) << 8) +
 672         (decode_hex(quad[2]) << 4) +
 673         (decode_hex(quad[3]) << 0);
 674 
 675     if (n >= 32) {
 676         write_rune(m, n);
 677         return;
 678     }
 679 
 680     write_byte(m, '\\');
 681     write_byte(m, 'u');
 682     write_byte(m, quad[0]);
 683     write_byte(m, quad[1]);
 684     write_byte(m, quad[2]);
 685     write_byte(m, quad[3]);
 686 }
 687 
 688 void handle_hex_pair(j0_maker* m) {
 689     advance(m);
 690     const int a = m->current;
 691     advance(m);
 692     const int b = m->current;
 693     if (a == EOF || b == EOF) {
 694         fail(m, 1, "end of input before end of string");
 695     }
 696     if (!is_valid_hex(a) || !is_valid_hex(b)) {
 697         fail(m, 1, "invalid hexadecimal digit in string");
 698     }
 699 
 700     const uint32_t n = 16 * decode_hex(a) + decode_hex(b);
 701     if (n >= 32) {
 702         write_rune(m, n);
 703     } else {
 704         write_byte(m, '\\');
 705         write_byte(m, 'u');
 706         write_byte(m, '0');
 707         write_byte(m, '0');
 708         write_byte(m, a);
 709         write_byte(m, b);
 710     }
 711 }
 712 
 713 void handle_string_escape(j0_maker* m, int c) {
 714     switch (c) {
 715         case '"':
 716         case '\\':
 717         case 'b':
 718         case 'f':
 719         case 'n':
 720         case 'r':
 721         case 't':
 722             write_byte(m, '\\');
 723             write_byte(m, c);
 724             break;
 725 
 726         case 'u':
 727             handle_hex_quad(m);
 728             break;
 729 
 730         case 'x':
 731             handle_hex_pair(m);
 732             break;
 733 
 734         case '\'':
 735             write_byte(m, '\'');
 736             break;
 737 
 738         default:
 739             write_byte(m, m->current);
 740             break;
 741     }
 742 }
 743 
 744 // hex is only used by function handle_low_char to render hexadecimals
 745 const char* hex = "0123456789ABCDEF";
 746 
 747 // handle_low_char simplifies function handle_string
 748 void handle_low_char(j0_maker* m, int c) {
 749     switch (c) {
 750         case '\t':
 751             write_byte(m, '\\');
 752             write_byte(m, 't');
 753             break;
 754 
 755         case '\n':
 756             write_byte(m, '\\');
 757             write_byte(m, 'n');
 758             break;
 759 
 760         case '\r':
 761             write_byte(m, '\\');
 762             write_byte(m, 'r');
 763             break;
 764 
 765         case '\v':
 766             write_byte(m, '\\');
 767             write_byte(m, 'v');
 768             break;
 769 
 770         default:
 771             write_byte(m, '\\');
 772             write_byte(m, 'u');
 773             write_byte(m, '0');
 774             write_byte(m, '0');
 775             write_byte(m, hex[c / 16]);
 776             write_byte(m, hex[c % 16]);
 777             break;
 778     }
 779 }
 780 
 781 void handle_string(j0_maker* m) {
 782     const unsigned char quote = m->current;
 783     bool escaped = false;
 784 
 785     write_byte(m, '"');
 786 
 787     while (true) {
 788         advance(m);
 789 
 790         int c = m->current;
 791         if (c == EOF) {
 792             fail(m, 1, "input ended before string was close-quoted");
 793         }
 794 
 795         if (escaped) {
 796             handle_string_escape(m, c);
 797             escaped = false;
 798             continue;
 799         }
 800 
 801         switch (c) {
 802             case '\\':
 803                 escaped = true;
 804                 break;
 805 
 806             default:
 807                 if (c == quote) {
 808                     write_byte(m, '"');
 809                     advance(m);
 810                     return;
 811                 }
 812 
 813                 // write_byte(m, c);
 814                 if (c < ' ') {
 815                     handle_low_char(m, c);
 816                 } else {
 817                     copy_utf8_rune(m);
 818                 }
 819                 break;
 820         }
 821     }
 822 }
 823 
 824 void handle_token(j0_maker* m);
 825 
 826 void handle_array(j0_maker* m) {
 827     write_byte(m, '[');
 828     advance(m);
 829 
 830     for (size_t i = 0; true; i++) {
 831         seek_token(m);
 832         const int lead = m->current;
 833 
 834         if (lead == EOF) {
 835             fail(m, 1, "unclosed array");
 836         }
 837 
 838         if (lead == ',') {
 839             advance(m);
 840             continue;
 841         }
 842 
 843         if (lead == ']') {
 844             write_byte(m, ']');
 845             advance(m);
 846             return;
 847         }
 848 
 849         if (i > 0) {
 850             write_byte(m, ',');
 851         }
 852         if (feof(m->out)) {
 853             return;
 854         }
 855         handle_token(m);
 856     }
 857 }
 858 
 859 // handle_array_jsonl is a slight variation of func handle_array: this one is
 860 // used to handle top-level arrays when running in JSON Lines mode, to emit
 861 // line-feeds after each item, instead of commas between them
 862 void handle_array_jsonl(j0_maker* m) {
 863     advance(m);
 864 
 865     for (size_t i = 0; true; i++) {
 866         seek_token(m);
 867         const int lead = m->current;
 868 
 869         if (lead == EOF) {
 870             fail(m, 1, "unclosed array");
 871         }
 872 
 873         if (lead == ',') {
 874             advance(m);
 875             continue;
 876         }
 877 
 878         if (i > 0) {
 879             write_byte(m, '\n');
 880         }
 881 
 882         if (lead == ']') {
 883             advance(m);
 884             return;
 885         }
 886 
 887         if (feof(m->out)) {
 888             return;
 889         }
 890         handle_token(m);
 891     }
 892 }
 893 
 894 void handle_unquoted_key(j0_maker* m) {
 895     write_byte(m, '"');
 896 
 897     while (true) {
 898         int c = m->current;
 899         if (c == EOF) {
 900             fail(m, 1, "input ended with an object key");
 901         }
 902 
 903         write_byte(m, c);
 904         advance(m);
 905 
 906         c = m->current;
 907         if (!isalpha(c) && !isdigit(c) && c != '_') {
 908             break;
 909         }
 910     }
 911 
 912     write_byte(m, '"');
 913 }
 914 
 915 void handle_object(j0_maker* m) {
 916     write_byte(m, '{');
 917     advance(m);
 918 
 919     for (size_t i = 0; true; i++) {
 920         seek_token(m);
 921         int lead = m->current;
 922 
 923         if (lead == EOF) {
 924             fail(m, 1, "unclosed object");
 925         }
 926 
 927         if (lead == ',') {
 928             advance(m);
 929             continue;
 930         }
 931 
 932         if (lead == '}') {
 933             write_byte(m, '}');
 934             advance(m);
 935             return;
 936         }
 937 
 938         if (feof(m->out)) {
 939             return;
 940         }
 941 
 942         if (lead == '"' || lead == '\'') {
 943             if (i > 0) {
 944                 write_byte(m, ',');
 945             }
 946             handle_string(m);
 947         } else if (isalpha(lead) || lead == '_') {
 948             if (i > 0) {
 949                 write_byte(m, ',');
 950             }
 951             handle_unquoted_key(m);
 952         } else {
 953             fail(m, 1, "only strings or identifiers can be object keys");
 954         }
 955 
 956         seek_token(m);
 957         lead = m->current;
 958 
 959         if (lead == EOF) {
 960             fail(m, 1, "input ended after object-key and before value");
 961         }
 962 
 963         if (lead != ':') {
 964             fail(m, 1, "a `:` must follow all object keys");
 965         }
 966 
 967         write_byte(m, ':');
 968         advance(m);
 969 
 970         seek_token(m);
 971         if (m->current == EOF) {
 972             fail(m, 1, "input ended after a `:` following an object-key");
 973         }
 974 
 975         handle_token(m);
 976     }
 977 }
 978 
 979 // dispatch ties leading bytes/chars in tokens to the funcs which handle them
 980 void (*dispatch[256])() = {
 981     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 982     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 983     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 984     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 985     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 986     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 987     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 988     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 989     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 990     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 991     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 992     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 993     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 994     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 995     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 996     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 997     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 998     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 999     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1000     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1001     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1002     NULL, NULL, NULL, NULL,
1003 };
1004 
1005 void handle_token(j0_maker* m) {
1006     dispatch[m->current](m);
1007 }
1008 
1009 // handle_invalid_token shows an error message and quits the app right after
1010 void handle_invalid_token(j0_maker* m) {
1011     char msg[64];
1012     unsigned char c = (unsigned char)m->current;
1013     sprintf(msg, "%c (%d): invalid token", c, c);
1014     fail(m, 1, msg);
1015 }
1016 
1017 void handle_array_jsonl(j0_maker* m);
1018 
1019 void handle_input(FILE* src, bool jsonl) {
1020     unsigned char ibuf[IBUF_SIZE];
1021     unsigned char obuf[OBUF_SIZE];
1022 
1023     j0_maker m;
1024     m.ibuf = ibuf;
1025     m.icap = sizeof(ibuf);
1026     m.obuf = obuf;
1027     m.ocap = sizeof(obuf);
1028     restart_state(&m, stdout, src);
1029 
1030     // ignore leading whitespace/comment bytes, if present
1031     seek_token(&m);
1032 
1033     if (m.current == EOF) {
1034         fail(&m, 1, "empty input isn't valid JSON");
1035     }
1036 
1037     if (jsonl && m.current == '[') {
1038         handle_array_jsonl(&m);
1039     } else {
1040         handle_token(&m);
1041         write_byte(&m, '\n');
1042     }
1043     flush(&m);
1044 
1045     // ignore trailing whitespace/comment bytes, if present
1046     seek_token(&m);
1047 
1048     // ignore trailing semicolon, if present
1049     if (m.current == ';') {
1050         advance(&m);
1051         // ignore trailing whitespace/comment bytes, if present
1052         seek_token(&m);
1053     }
1054 
1055     if (!feof(src) || m.current != EOF) {
1056         fail(&m, 1, "unexpected trailing JSON data");
1057     }
1058 }
1059 
1060 bool is_help_option(const char* s) {
1061     return (s[0] == '-' && s[1] != 0) && (
1062         strcmp(s, "-h") == 0 ||
1063         strcmp(s, "--h") == 0 ||
1064         strcmp(s, "-help") == 0 ||
1065         strcmp(s, "--help") == 0
1066     );
1067 }
1068 
1069 bool is_jsonl_option(const char* s) {
1070     return (s[0] == '-' && s[1] != 0) && (
1071         strcmp(s, "-jl") == 0 ||
1072         strcmp(s, "--jl") == 0 ||
1073         strcmp(s, "-jsonl") == 0 ||
1074         strcmp(s, "--jsonl") == 0
1075     );
1076 }
1077 
1078 // run returns the error code
1079 int run(int nargs, char** args) {
1080     bool jsonl = false;
1081     if (nargs > 0 && is_jsonl_option(args[0])) {
1082         jsonl = true;
1083         nargs--;
1084         args++;
1085     }
1086 
1087     if (nargs > 0 && strcmp(args[0], "--") == 0) {
1088         nargs--;
1089         args++;
1090     }
1091 
1092     if (nargs > 1) {
1093         const char* msg = "can't use more than 1 named input";
1094         fprintf(stderr, ERROR_LINE("%s"), msg);
1095         return 1;
1096     }
1097 
1098     // use stdin when not given a filepath
1099     if (nargs == 0 || strcmp(args[0], "") == 0 || strcmp(args[0], "-") == 0) {
1100         handle_input(stdin, jsonl);
1101         return 0;
1102     }
1103 
1104     const char* path = args[0];
1105     FILE* f = fopen(path, "rb");
1106     if (f == NULL) {
1107         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
1108         return 1;
1109     }
1110 
1111     handle_input(f, jsonl);
1112     fclose(f);
1113 
1114     return 0;
1115 }
1116 
1117 int main(int argc, char** argv) {
1118 #ifdef _WIN32
1119     setmode(fileno(stdin), O_BINARY);
1120     // ensure output lines end in LF instead of CRLF on windows
1121     setmode(fileno(stdout), O_BINARY);
1122     setmode(fileno(stderr), O_BINARY);
1123 #endif
1124 
1125     if (argc > 1 && is_help_option(argv[1])) {
1126         printf("%s", info);
1127         return 0;
1128     }
1129 
1130     // the dispatch table starts as all null function-pointers
1131     for (size_t i = 0; i < sizeof(dispatch) / sizeof(dispatch[0]); i++) {
1132         dispatch[i] = handle_invalid_token;
1133     }
1134 
1135     for (size_t i = '0'; i <= '9'; i++) {
1136         dispatch[i] = handle_number;
1137     }
1138 
1139     dispatch['n'] = handle_null;
1140     dispatch['t'] = handle_true;
1141     dispatch['f'] = handle_false;
1142     dispatch['N'] = handle_capital_none;
1143     dispatch['T'] = handle_capital_true;
1144     dispatch['F'] = handle_capital_false;
1145     dispatch['.'] = handle_dot;
1146     dispatch['+'] = handle_plus_number;
1147     dispatch['-'] = handle_minus_number;
1148     dispatch['"'] = handle_string;
1149     dispatch['\''] = handle_string;
1150     dispatch['['] = handle_array;
1151     dispatch['{'] = handle_object;
1152 
1153     // enable full/block-buffering for standard output
1154     setvbuf(stdout, NULL, _IOFBF, 0);
1155 
1156     return run(argc - 1, argv + 1) == 0 ? 0 : 1;
1157 }