File: j0.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -o ./j0 ./j0.c 29 */ 30 31 #include <ctype.h> 32 #include <stdarg.h> 33 #include <stdbool.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 38 #ifdef _WIN32 39 #include <fcntl.h> 40 #include <windows.h> 41 #endif 42 43 const char* info = "" 44 "j0 [options...] [file...]\n" 45 "\n" 46 "\n" 47 "Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output.\n" 48 "Its output is always a single line, which ends with a line-feed.\n" 49 "\n" 50 "Besides minimizing bytes, this tool also adapts almost-JSON input into\n" 51 "valid JSON, since it\n" 52 "\n" 53 " - ignores both rest-of-line and multi-line comments\n" 54 " - ignores extra/trailing commas in arrays and objects\n" 55 " - turns single-quoted strings/keys into double-quoted strings\n" 56 " - double-quotes unquoted object keys\n" 57 " - changes \\x 2-hex-digit into \\u 4-hex-digit string-escapes\n" 58 "\n" 59 "All options available can either start with a single or a double-dash\n" 60 "\n" 61 " -h show this help message\n" 62 " -help show this help message\n" 63 " -jsonl emit JSON Lines, when top-level value is an array\n" 64 ""; 65 66 typedef struct j0_maker { 67 FILE* in; 68 FILE* out; 69 70 unsigned char* ibuf; 71 size_t ilen; // how many bytes are being used in the input buffer 72 size_t icap; // the input buffer's capacity 73 size_t ipos; // the current position in the input buffer 74 75 size_t line; // the current line, used to show useful error messages 76 size_t pos; // the position in the current line, for error messages 77 78 int current; 79 int next; 80 } j0_maker; 81 82 // advance_reader_pos helps func read_byte do its job 83 void advance_reader_pos(j0_maker* r, unsigned char b) { 84 r->ipos++; 85 if (b == '\n') { 86 r->line++; 87 r->pos = 1; 88 } else { 89 r->pos++; 90 } 91 } 92 93 // read_byte does as it says: check its return for the value EOF, before 94 // using it as the next byte 95 int read_byte(j0_maker* r) { 96 if (r->ipos < r->ilen) { 97 // inside current chunk 98 const unsigned char b = r->ibuf[r->ipos]; 99 advance_reader_pos(r, b); 100 return b; 101 } 102 103 // need to read the next block 104 r->ipos = 0; 105 r->ilen = fread(r->ibuf, sizeof(unsigned char), r->icap, r->in); 106 if (r->ilen > 0) { 107 const unsigned char b = r->ibuf[r->ipos]; 108 advance_reader_pos(r, b); 109 return b; 110 } 111 112 // reached the end of data 113 return EOF; 114 } 115 116 // advance is used in most of the code, instead of calling read_byte directly 117 void advance(j0_maker* r) { 118 r->current = r->next; 119 r->next = read_byte(r); 120 } 121 122 void fail(j0_maker* s, int code, const char* msg); 123 124 void skip_line(j0_maker* r) { 125 while (true) { 126 advance(r); 127 const int lead = r->current; 128 129 if (lead == EOF) { 130 break; 131 } 132 133 if (lead == '\n') { 134 advance(r); 135 break; 136 } 137 } 138 } 139 140 void skip_multiline_comment(j0_maker* r) { 141 unsigned char prev = 0; 142 143 while (true) { 144 advance(r); 145 const int lead = r->current; 146 147 if (lead == EOF) { 148 break; 149 } 150 151 if (prev == '*' && lead == '/') { 152 advance(r); 153 break; 154 } 155 156 prev = (unsigned char)lead; 157 } 158 } 159 160 void skip_comment(j0_maker* r) { 161 int lead = r->current; 162 163 if (lead == '#') { 164 skip_line(r); 165 return; 166 } 167 168 if (lead != '/') { 169 fail(r, 1, "expected a slash to start comments"); 170 } 171 172 advance(r); 173 lead = r->current; 174 175 if (lead == '/') { 176 skip_line(r); 177 return; 178 } 179 180 if (lead == '*') { 181 skip_multiline_comment(r); 182 return; 183 } 184 185 fail(r, 1, "expected `//` or `/*` to start comments"); 186 } 187 188 void seek_token(j0_maker* r) { 189 while (true) { 190 const int lead = r->current; 191 192 if (lead != EOF && lead <= ' ') { 193 advance(r); 194 continue; 195 } 196 197 if (lead == '/' || lead == '#') { 198 skip_comment(r); 199 continue; 200 } 201 202 break; 203 } 204 } 205 206 bool starts_with_bom(const unsigned char* b, const size_t n) { 207 return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf); 208 } 209 210 void restart_state(j0_maker* s, FILE* w, FILE* r) { 211 s->in = r; 212 s->ilen = 0; 213 s->ipos = 0; 214 215 s->out = w; 216 217 s->line = 1; 218 s->pos = 1; 219 220 s->current = EOF; 221 s->next = EOF; 222 223 s->current = read_byte(s); 224 if (s->current == EOF) { 225 return; 226 } 227 s->next = read_byte(s); 228 229 // skip leading UTF-8 BOM (byte-order mark), if present 230 if (starts_with_bom(s->ibuf, s->ilen)) { 231 // a UTF-8 BOM has 3 bytes 232 for (size_t i = 0; i < 3 && s->current != EOF; i++) { 233 advance(s); 234 } 235 } 236 } 237 238 // write_bytes does as it says, minimizing the number of calls to fwrite 239 void write_bytes(j0_maker* w, const unsigned char* src, size_t len) { 240 if (len > 0 && fwrite(src, len, 1, w->out) < 1) { 241 if (feof(w->out)) { 242 exit(0); 243 } 244 245 fail(w, 1, "failed to write more output"); 246 } 247 } 248 249 void write_byte(j0_maker* w, unsigned char b) { 250 putc(b, w->out); 251 } 252 253 // https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/ 254 255 bool check_2_byte_rune(int a, int b) { 256 return (0xc2 <= a && a <= 0xdf) && (0x80 <= b && b <= 0xbf); 257 } 258 259 bool check_3_byte_rune(int a, int b, int c) { 260 return ( 261 (a == 0xe0) && 262 (0xa0 <= b && b <= 0xbf) && 263 (0x80 <= c && c <= 0xbf) 264 ) || ( 265 (0xe1 <= a && a <= 0xec) && 266 (0x80 <= b && b <= 0xbf) && 267 (0x80 <= c && c <= 0xbf) 268 ) || ( 269 (a == 0xed) && 270 (0x80 <= b && b <= 0x9f) && 271 (0x80 <= c && c <= 0xbf) 272 ) || ( 273 (a == 0xee || a == 0xef) && 274 (0x80 <= b && b <= 0xbf) && 275 (0x80 <= c && c <= 0xbf) 276 ); 277 } 278 279 bool check_4_byte_rune(int a, int b, int c, int d) { 280 return ( 281 (a == 0xf0) && 282 (0x90 <= b && b <= 0xbf) && 283 (0x80 <= c && c <= 0xbf) && 284 (0x80 <= d && d <= 0xbf) 285 ) || ( 286 (a == 0xf1 || a == 0xf3) && 287 (0x80 <= b && b <= 0xbf) && 288 (0x80 <= c && c <= 0xbf) && 289 (0x80 <= d && d <= 0xbf) 290 ) || ( 291 (a == 0xf4) && 292 (0x80 <= b && b <= 0xbf) && 293 (0x80 <= c && c <= 0x8f) && 294 (0x80 <= d && d <= 0xbf) 295 ); 296 } 297 298 // write_replacement_char is the recommended action to handle invalid bytes 299 void write_replacement_char(FILE* w) { 300 putc(0xef, w); 301 putc(0xbf, w); 302 putc(0xbd, w); 303 } 304 305 void handle_invalid_rune(j0_maker* m) { 306 // fail(m, 1, "invalid unicode value"); 307 write_replacement_char(m->out); 308 } 309 310 void copy_utf8_rune(j0_maker* m) { 311 FILE* w = m->out; 312 const int a = m->current; 313 314 if (a == EOF) { 315 return; 316 } 317 318 // handle 1-byte runes 319 if (a < 128) { 320 putc(a, w); 321 return; 322 } 323 324 advance(m); 325 const int b = m->current; 326 327 if (b == EOF) { 328 handle_invalid_rune(m); 329 return; 330 } 331 332 // handle 2-byte runes 333 if (check_2_byte_rune(a, b)) { 334 putc(a, w); 335 putc(b, w); 336 return; 337 } 338 339 advance(m); 340 const int c = m->current; 341 342 if (c == EOF) { 343 handle_invalid_rune(m); 344 return; 345 } 346 347 // handle 3-byte runes 348 if (check_3_byte_rune(a, b, c)) { 349 putc(a, w); 350 putc(b, w); 351 putc(c, w); 352 return; 353 } 354 355 advance(m); 356 const int d = m->current; 357 358 if (d == EOF) { 359 handle_invalid_rune(m); 360 return; 361 } 362 363 // handle 4-byte runes 364 if (check_4_byte_rune(a, b, c, d)) { 365 putc(a, w); 366 putc(b, w); 367 putc(c, w); 368 putc(d, w); 369 return; 370 } 371 372 handle_invalid_rune(m); 373 } 374 375 // debug is available to diagnose any bug found 376 void debug(j0_maker* s, const char* fmt, ...) { 377 va_list args; 378 va_start(args, fmt); 379 380 if (s->in != stdin) { 381 fclose(s->in); 382 } 383 384 write_byte(s, '\n'); 385 386 const unsigned long line = s->line; 387 const unsigned long pos = s->pos; 388 fprintf(stderr, "\x1b[46m\x1b[37mline %lu, pos %lu: ", line, pos); 389 fprintf(stderr, fmt, args); 390 fprintf(stderr, "\x1b[0m\n"); 391 392 va_end(args); 393 394 exit(10); 395 } 396 397 // fail quits this app right after showing the error message given 398 void fail(j0_maker* s, int code, const char* msg) { 399 const unsigned long line = s->line; 400 const unsigned long pos = s->pos; 401 402 write_byte(s, '\n'); 403 fprintf(stderr, "\x1b[31mline %lu, pos %lu: %s\x1b[0m\n", line, pos, msg); 404 exit(code); 405 } 406 407 bool demand_keyword(j0_maker* s, char* rest) { 408 for (; rest[0] != 0; rest++) { 409 const int lead = s->current; 410 if (lead == EOF || lead != rest[0]) { 411 return false; 412 } 413 advance(s); 414 } 415 416 return rest[0] == 0; 417 } 418 419 void handle_null(j0_maker* s) { 420 if (!demand_keyword(s, "null")) { 421 fail(s, 1, "expected `null` keyword"); 422 } 423 write_bytes(s, (unsigned char*)"null", 4); 424 } 425 426 void handle_true(j0_maker* s) { 427 if (!demand_keyword(s, "true")) { 428 fail(s, 1, "expected `true` keyword"); 429 } 430 write_bytes(s, (unsigned char*)"true", 4); 431 } 432 433 void handle_false(j0_maker* s) { 434 if (!demand_keyword(s, "false")) { 435 fail(s, 1, "expected `false` keyword"); 436 } 437 write_bytes(s, (unsigned char*)"false", 5); 438 } 439 440 void handle_capital_none(j0_maker* s) { 441 if (!demand_keyword(s, "None")) { 442 fail(s, 1, "expected `None` keyword"); 443 } 444 write_bytes(s, (unsigned char*)"null", 4); 445 } 446 447 void handle_capital_true(j0_maker* s) { 448 if (!demand_keyword(s, "True")) { 449 fail(s, 1, "expected `True` keyword"); 450 } 451 write_bytes(s, (unsigned char*)"true", 4); 452 } 453 454 void handle_capital_false(j0_maker* s) { 455 if (!demand_keyword(s, "False")) { 456 fail(s, 1, "expected `False` keyword"); 457 } 458 write_bytes(s, (unsigned char*)"false", 5); 459 } 460 461 void handle_digits(j0_maker* s) { 462 if (!isdigit(s->current)) { 463 fail(s, 1, "expected/missing digits"); 464 } 465 466 while (isdigit(s->current)) { 467 write_byte(s, s->current); 468 advance(s); 469 } 470 } 471 472 void handle_number(j0_maker* s) { 473 handle_digits(s); 474 475 const int lead = s->current; 476 477 if (lead == '.') { 478 write_byte(s, '.'); 479 advance(s); 480 481 if (isdigit(s->current)) { 482 handle_digits(s); 483 } else { 484 write_byte(s, '0'); 485 } 486 return; 487 } 488 489 if (lead == 'e' || lead == 'E') { 490 write_byte(s, lead); 491 advance(s); 492 493 if (s->current == '+') { 494 advance(s); 495 } else if (s->current == '-') { 496 write_byte(s, '-'); 497 advance(s); 498 } 499 500 handle_digits(s); 501 } 502 } 503 504 void handle_dot(j0_maker* s) { 505 write_byte(s, '0'); 506 write_byte(s, '.'); 507 advance(s); 508 509 if (!isdigit(s->current)) { 510 fail(s, 1, "expected/missing digits after decimal dot"); 511 } 512 handle_digits(s); 513 } 514 515 void handle_plus_number(j0_maker* s) { 516 advance(s); 517 518 if (s->current == '.') { 519 handle_dot(s); 520 return; 521 } 522 handle_number(s); 523 } 524 525 void handle_minus_number(j0_maker* s) { 526 write_byte(s, '-'); 527 advance(s); 528 529 if (s->current == '.') { 530 handle_dot(s); 531 return; 532 } 533 handle_number(s); 534 } 535 536 void handle_string_escape(j0_maker* s, int c) { 537 switch (c) { 538 case '"': 539 case '\\': 540 case 'b': 541 case 'f': 542 case 'n': 543 case 'r': 544 case 't': 545 write_byte(s, '\\'); 546 write_byte(s, c); 547 break; 548 549 case 'u': 550 write_byte(s, '\\'); 551 write_byte(s, 'u'); 552 for (size_t i = 0; i < 4; i++) { 553 advance(s); 554 const int lead = s->current; 555 if (lead == EOF) { 556 fail(s, 1, "end of input before end of string"); 557 } 558 if (isdigit(lead) || isalpha(lead)) { 559 // write_byte(s, toupper(c)); 560 write_byte(s, c); 561 continue; 562 } 563 fail(s, 1, "invalid hexadecimal digit in string"); 564 } 565 break; 566 567 case 'x': 568 write_byte(s, '\\'); 569 write_byte(s, 'u'); 570 write_byte(s, '0'); 571 write_byte(s, '0'); 572 for (size_t i = 0; i < 2; i++) { 573 advance(s); 574 const int lead = s->current; 575 if (lead == EOF) { 576 fail(s, 1, "end of input before end of string"); 577 } 578 if (isdigit(lead) || isalpha(lead)) { 579 // write_byte(s, toupper(c)); 580 write_byte(s, c); 581 continue; 582 } 583 fail(s, 1, "invalid hexadecimal digit in string"); 584 } 585 break; 586 587 case '\'': 588 write_byte(s, '\''); 589 break; 590 591 default: 592 write_byte(s, s->current); 593 break; 594 } 595 } 596 597 // hex is only used by function handle_low_char to render hexadecimals 598 const char* hex = "0123456789ABCDEF"; 599 600 // handle_low_char simplifies function handle_string 601 void handle_low_char(j0_maker* s, int c) { 602 switch (c) { 603 case '\t': 604 write_byte(s, '\\'); 605 write_byte(s, 't'); 606 break; 607 608 case '\n': 609 write_byte(s, '\\'); 610 write_byte(s, 'n'); 611 break; 612 613 case '\r': 614 write_byte(s, '\\'); 615 write_byte(s, 'r'); 616 break; 617 618 case '\v': 619 write_byte(s, '\\'); 620 write_byte(s, 'v'); 621 break; 622 623 default: 624 write_byte(s, '\\'); 625 write_byte(s, 'u'); 626 write_byte(s, '0'); 627 write_byte(s, '0'); 628 write_byte(s, hex[c / 16]); 629 write_byte(s, hex[c % 16]); 630 break; 631 } 632 } 633 634 void handle_string(j0_maker* s) { 635 const unsigned char quote = s->current; 636 bool escaped = false; 637 638 write_byte(s, '"'); 639 640 while (true) { 641 advance(s); 642 643 int c = s->current; 644 if (c == EOF) { 645 fail(s, 1, "input ended before string was close-quoted"); 646 } 647 648 if (escaped) { 649 handle_string_escape(s, c); 650 escaped = false; 651 continue; 652 } 653 654 switch (c) { 655 case '\\': 656 escaped = true; 657 break; 658 659 default: 660 if (c == quote) { 661 write_byte(s, '"'); 662 advance(s); 663 return; 664 } 665 666 // write_byte(s, c); 667 if (c < ' ') { 668 handle_low_char(s, c); 669 } else { 670 copy_utf8_rune(s); 671 } 672 break; 673 } 674 } 675 } 676 677 void handle_token(j0_maker* s); 678 679 void handle_array(j0_maker* s) { 680 size_t items_before = 0; 681 write_byte(s, '['); 682 advance(s); 683 684 while (true) { 685 seek_token(s); 686 const int lead = s->current; 687 688 if (lead == EOF) { 689 fail(s, 1, "unclosed array"); 690 } 691 692 if (lead == ',') { 693 advance(s); 694 continue; 695 } 696 697 if (lead == ']') { 698 write_byte(s, ']'); 699 advance(s); 700 return; 701 } 702 703 if (items_before > 0) { 704 write_byte(s, ','); 705 } 706 handle_token(s); 707 items_before++; 708 } 709 } 710 711 // handle_array_jsonl is a slight variation of func handle_array: this one is 712 // used to handle top-level arrays when running in JSON Lines mode, to emit 713 // line-feeds after each item, instead of commas between them 714 void handle_array_jsonl(j0_maker* s) { 715 size_t items_before = 0; 716 advance(s); 717 718 while (true) { 719 seek_token(s); 720 const int lead = s->current; 721 722 if (lead == EOF) { 723 fail(s, 1, "unclosed array"); 724 } 725 726 if (lead == ',') { 727 advance(s); 728 continue; 729 } 730 731 if (items_before > 0) { 732 write_byte(s, '\n'); 733 fflush(s->out); 734 } 735 736 if (lead == ']') { 737 advance(s); 738 return; 739 } 740 741 handle_token(s); 742 items_before++; 743 } 744 } 745 746 void handle_unquoted_key(j0_maker* s) { 747 write_byte(s, '"'); 748 749 while (true) { 750 int c = s->current; 751 if (c == EOF) { 752 fail(s, 1, "input ended with an object key"); 753 } 754 755 write_byte(s, c); 756 advance(s); 757 758 c = s->current; 759 if (!isalpha(c) && !isdigit(c) && c != '_') { 760 break; 761 } 762 } 763 764 write_byte(s, '"'); 765 } 766 767 void handle_object(j0_maker* s) { 768 size_t items_before = 0; 769 write_byte(s, '{'); 770 advance(s); 771 772 while (true) { 773 seek_token(s); 774 int lead = s->current; 775 776 if (lead == EOF) { 777 fail(s, 1, "unclosed object"); 778 } 779 780 if (lead == ',') { 781 advance(s); 782 continue; 783 } 784 785 if (lead == '}') { 786 write_byte(s, '}'); 787 advance(s); 788 return; 789 } 790 791 if (lead == '"' || lead == '\'') { 792 if (items_before > 0) { 793 write_byte(s, ','); 794 } 795 handle_string(s); 796 items_before++; 797 } else if (isalpha(lead) || lead == '_') { 798 if (items_before > 0) { 799 write_byte(s, ','); 800 } 801 handle_unquoted_key(s); 802 items_before++; 803 } else { 804 fail(s, 1, "only strings or identifiers can be object keys"); 805 } 806 807 seek_token(s); 808 lead = s->current; 809 810 if (lead == EOF) { 811 fail(s, 1, "input ended after object-key and before value"); 812 } 813 814 if (lead != ':') { 815 fail(s, 1, "a `:` must follow all object keys"); 816 } 817 818 write_byte(s, ':'); 819 advance(s); 820 821 seek_token(s); 822 if (s->current == EOF) { 823 fail(s, 1, "input ended after a `:` following an object-key"); 824 } 825 826 handle_token(s); 827 } 828 } 829 830 // dispatch ties leading bytes/chars in tokens to the funcs which handle them 831 void (*dispatch[256])() = { 832 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 833 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 834 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 835 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 836 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 837 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 838 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 839 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 840 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 841 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 842 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 843 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 844 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 845 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 846 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 847 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 848 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 849 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 850 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 851 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 852 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 853 NULL, NULL, NULL, NULL, 854 }; 855 856 void handle_token(j0_maker* s) { 857 dispatch[s->current](s); 858 } 859 860 // handle_invalid_token shows an error message and quits the app right after 861 void handle_invalid_token(j0_maker* s) { 862 char msg[64]; 863 unsigned char c = (unsigned char)s->current; 864 sprintf(msg, "%c (%d): invalid token", c, c); 865 fail(s, 1, msg); 866 } 867 868 void handle_array_jsonl(j0_maker* s); 869 870 void handle_input(FILE* src, bool jsonl) { 871 unsigned char ibuf[32 * 1024]; 872 873 j0_maker state; 874 j0_maker* s = &state; 875 s->ibuf = ibuf; 876 s->icap = sizeof(ibuf); 877 restart_state(s, stdout, src); 878 879 // ignore leading whitespace/comment bytes, if present 880 seek_token(s); 881 882 if (s->current == EOF) { 883 fail(s, 1, "empty input isn't valid JSON"); 884 } 885 886 if (jsonl && s->current == '[') { 887 handle_array_jsonl(s); 888 } else { 889 handle_token(s); 890 write_byte(s, '\n'); 891 fflush(s->out); 892 } 893 894 // ignore trailing whitespace/comment bytes, if present 895 seek_token(s); 896 897 // ignore trailing semicolon, if present 898 if (s->current == ';') { 899 advance(s); 900 // ignore trailing whitespace/comment bytes, if present 901 seek_token(s); 902 } 903 904 if (!feof(src) || s->current != EOF) { 905 fail(s, 1, "unexpected trailing JSON data"); 906 } 907 } 908 909 bool is_help_option(const char* s) { 910 return (s[0] == '-' && s[1] != 0) && ( 911 strcmp(s, "-h") == 0 || 912 strcmp(s, "--h") == 0 || 913 strcmp(s, "-help") == 0 || 914 strcmp(s, "--help") == 0 915 ); 916 } 917 918 bool is_jsonl_option(const char* s) { 919 return (s[0] == '-' && s[1] != 0) && ( 920 strcmp(s, "-jl") == 0 || 921 strcmp(s, "--jl") == 0 || 922 strcmp(s, "-jsonl") == 0 || 923 strcmp(s, "--jsonl") == 0 924 ); 925 } 926 927 // run returns the error code 928 int run(int argc, char** argv) { 929 bool jsonl = false; 930 if (argc > 1 && is_jsonl_option(argv[1])) { 931 jsonl = true; 932 argc--; 933 argv++; 934 } 935 936 if (argc > 2) { 937 const char* msg = "can't use more than 1 named input"; 938 fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg); 939 return 1; 940 } 941 942 // use stdin when not given a filepath, or is `-` 943 if (argc < 2 || argv[1][0] == 0 || strcmp(argv[1], "-") == 0) { 944 handle_input(stdin, jsonl); 945 return 0; 946 } 947 948 const char* path = argv[1]; 949 FILE* f = fopen(path, "rb"); 950 if (f == NULL) { 951 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 952 return 1; 953 } 954 955 handle_input(f, jsonl); 956 fclose(f); 957 958 return 0; 959 } 960 961 int main(int argc, char** argv) { 962 #ifdef _WIN32 963 setmode(fileno(stdin), O_BINARY); 964 // ensure output lines end in LF instead of CRLF on windows 965 setmode(fileno(stdout), O_BINARY); 966 setmode(fileno(stderr), O_BINARY); 967 #endif 968 969 if (argc > 1 && is_help_option(argv[1])) { 970 puts(info); 971 return 0; 972 } 973 974 // the dispatch table starts as all null function-pointers 975 for (size_t i = 0; i < sizeof(dispatch) / sizeof(dispatch[0]); i++) { 976 dispatch[i] = handle_invalid_token; 977 } 978 979 for (size_t i = '0'; i <= '9'; i++) { 980 dispatch[i] = handle_number; 981 } 982 983 dispatch['n'] = handle_null; 984 dispatch['t'] = handle_true; 985 dispatch['f'] = handle_false; 986 dispatch['N'] = handle_capital_none; 987 dispatch['T'] = handle_capital_true; 988 dispatch['F'] = handle_capital_false; 989 dispatch['.'] = handle_dot; 990 dispatch['+'] = handle_plus_number; 991 dispatch['-'] = handle_minus_number; 992 dispatch['"'] = handle_string; 993 dispatch['\''] = handle_string; 994 dispatch['['] = handle_array; 995 dispatch['{'] = handle_object; 996 997 // enable full/block-buffering for standard output 998 setvbuf(stdout, NULL, _IOFBF, 0); 999 1000 return run(argc, argv) == 0 ? 0 : 1; 1001 }