File: j0.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./j0 ./j0.c 29 */ 30 31 #include <ctype.h> 32 #include <stdarg.h> 33 #include <stdbool.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 38 #ifdef _WIN32 39 #include <fcntl.h> 40 #include <windows.h> 41 #endif 42 43 #ifdef RED_ERRORS 44 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 45 #ifdef __APPLE__ 46 #define ERROR_STYLE "\x1b[31m" 47 #endif 48 #define RESET_STYLE "\x1b[0m" 49 #else 50 #define ERROR_STYLE 51 #define RESET_STYLE 52 #endif 53 54 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n") 55 56 const char* info = "" 57 "j0 [options...] [file...]\n" 58 "\n" 59 "\n" 60 "Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output.\n" 61 "Its output is always a single line, which ends with a line-feed.\n" 62 "\n" 63 "Besides minimizing bytes, this tool also adapts almost-JSON input into\n" 64 "valid JSON, since it\n" 65 "\n" 66 " - ignores both rest-of-line and multi-line comments\n" 67 " - ignores extra/trailing commas in arrays and objects\n" 68 " - turns single-quoted strings/keys into double-quoted strings\n" 69 " - double-quotes unquoted object keys\n" 70 " - changes \\x 2-hex-digit into \\u 4-hex-digit string-escapes\n" 71 "\n" 72 "All options available can either start with a single or a double-dash\n" 73 "\n" 74 " -h show this help message\n" 75 " -help show this help message\n" 76 " -jsonl emit JSON Lines, when top-level value is an array\n" 77 ""; 78 79 typedef struct j0_maker { 80 FILE* in; 81 FILE* out; 82 83 unsigned char* ibuf; 84 size_t ilen; // how many bytes are being used in the input buffer 85 size_t icap; // the input buffer's capacity 86 size_t ipos; // the current position in the input buffer 87 88 size_t line; // the current line, used to show useful error messages 89 size_t pos; // the position in the current line, for error messages 90 91 unsigned char* obuf; 92 size_t ocap; // the output buffer's capacity 93 size_t opos; // the current position in the output buffer 94 95 int current; 96 int next; 97 } j0_maker; 98 99 // advance_reader_pos helps func read_byte do its job 100 static inline void advance_reader_pos(j0_maker* r, unsigned char b) { 101 r->ipos++; 102 if (b == '\n') { 103 r->line++; 104 r->pos = 1; 105 } else { 106 r->pos++; 107 } 108 } 109 110 // read_byte does as it says: check its return for the value EOF, before 111 // using it as the next byte 112 static inline int read_byte(j0_maker* r) { 113 if (r->ipos < r->ilen) { 114 // inside current chunk 115 const unsigned char b = r->ibuf[r->ipos]; 116 advance_reader_pos(r, b); 117 return b; 118 } 119 120 // need to read the next block 121 r->ipos = 0; 122 r->ilen = fread(r->ibuf, sizeof(unsigned char), r->icap, r->in); 123 if (r->ilen > 0) { 124 const unsigned char b = r->ibuf[r->ipos]; 125 advance_reader_pos(r, b); 126 return b; 127 } 128 129 // reached the end of data 130 return EOF; 131 } 132 133 // advance is used in most of the code, instead of calling read_byte directly 134 static inline void advance(j0_maker* r) { 135 r->current = r->next; 136 r->next = read_byte(r); 137 } 138 139 void fail(j0_maker* m, int code, const char* msg); 140 141 void skip_line(j0_maker* r) { 142 while (true) { 143 advance(r); 144 const int lead = r->current; 145 146 if (lead == EOF) { 147 break; 148 } 149 150 if (lead == '\n') { 151 advance(r); 152 break; 153 } 154 } 155 } 156 157 void skip_multiline_comment(j0_maker* r) { 158 unsigned char prev = 0; 159 160 while (true) { 161 advance(r); 162 const int lead = r->current; 163 164 if (lead == EOF) { 165 break; 166 } 167 168 if (prev == '*' && lead == '/') { 169 advance(r); 170 break; 171 } 172 173 prev = (unsigned char)lead; 174 } 175 } 176 177 void skip_comment(j0_maker* r) { 178 int lead = r->current; 179 180 if (lead == '#') { 181 skip_line(r); 182 return; 183 } 184 185 if (lead != '/') { 186 fail(r, 1, "expected a slash to start comments"); 187 } 188 189 advance(r); 190 lead = r->current; 191 192 if (lead == '/') { 193 skip_line(r); 194 return; 195 } 196 197 if (lead == '*') { 198 skip_multiline_comment(r); 199 return; 200 } 201 202 fail(r, 1, "expected `//` or `/*` to start comments"); 203 } 204 205 void seek_token(j0_maker* r) { 206 while (true) { 207 const int lead = r->current; 208 209 if (lead != EOF && lead <= ' ') { 210 advance(r); 211 continue; 212 } 213 214 if (lead == '/' || lead == '#') { 215 skip_comment(r); 216 continue; 217 } 218 219 break; 220 } 221 } 222 223 bool starts_with_bom(const unsigned char* b, const size_t n) { 224 return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf); 225 } 226 227 void restart_state(j0_maker* m, FILE* w, FILE* r) { 228 m->in = r; 229 m->ilen = 0; 230 m->ipos = 0; 231 232 m->out = w; 233 m->opos = 0; 234 235 m->line = 1; 236 m->pos = 1; 237 238 m->current = EOF; 239 m->next = EOF; 240 241 m->current = read_byte(m); 242 if (m->current == EOF) { 243 return; 244 } 245 m->next = read_byte(m); 246 247 // skip leading UTF-8 BOM (byte-order mark), if present 248 if (starts_with_bom(m->ibuf, m->ilen)) { 249 // a UTF-8 BOM has 3 bytes 250 for (size_t i = 0; i < 3 && m->current != EOF; i++) { 251 advance(m); 252 } 253 } 254 } 255 256 void write_byte(j0_maker* m, unsigned char b) { 257 if (m->opos < m->ocap) { 258 m->obuf[m->opos++] = b; 259 return; 260 } 261 262 fwrite(m->obuf, m->ocap, 1, m->out); 263 m->obuf[0] = b; 264 m->opos = 1; 265 } 266 267 // write_bytes does as it says, minimizing the number of calls to fwrite 268 void write_bytes(j0_maker* m, const unsigned char* src, size_t len) { 269 for (size_t i = 0; i < len; i++) { 270 write_byte(m, src[i]); 271 } 272 } 273 274 void flush(j0_maker* m) { 275 if (m->opos > 0) { 276 fwrite(m->obuf, m->opos, 1, m->out); 277 } 278 m->opos = 0; 279 fflush(m->out); 280 } 281 282 // https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/ 283 284 bool check_2_byte_rune(int a, int b) { 285 return (0xc2 <= a && a <= 0xdf) && (0x80 <= b && b <= 0xbf); 286 } 287 288 bool check_3_byte_rune(int a, int b, int c) { 289 return ( 290 (a == 0xe0) && 291 (0xa0 <= b && b <= 0xbf) && 292 (0x80 <= c && c <= 0xbf) 293 ) || ( 294 (0xe1 <= a && a <= 0xec) && 295 (0x80 <= b && b <= 0xbf) && 296 (0x80 <= c && c <= 0xbf) 297 ) || ( 298 (a == 0xed) && 299 (0x80 <= b && b <= 0x9f) && 300 (0x80 <= c && c <= 0xbf) 301 ) || ( 302 (a == 0xee || a == 0xef) && 303 (0x80 <= b && b <= 0xbf) && 304 (0x80 <= c && c <= 0xbf) 305 ); 306 } 307 308 bool check_4_byte_rune(int a, int b, int c, int d) { 309 return ( 310 (a == 0xf0) && 311 (0x90 <= b && b <= 0xbf) && 312 (0x80 <= c && c <= 0xbf) && 313 (0x80 <= d && d <= 0xbf) 314 ) || ( 315 (a == 0xf1 || a == 0xf3) && 316 (0x80 <= b && b <= 0xbf) && 317 (0x80 <= c && c <= 0xbf) && 318 (0x80 <= d && d <= 0xbf) 319 ) || ( 320 (a == 0xf4) && 321 (0x80 <= b && b <= 0xbf) && 322 (0x80 <= c && c <= 0x8f) && 323 (0x80 <= d && d <= 0xbf) 324 ); 325 } 326 327 // write_replacement_char is the recommended action to handle invalid bytes 328 void write_replacement_char(j0_maker* m) { 329 write_byte(m, 0xef); 330 write_byte(m, 0xbf); 331 write_byte(m, 0xbd); 332 } 333 334 void handle_invalid_rune(j0_maker* m) { 335 // fail(m, 1, "invalid unicode value"); 336 write_replacement_char(m); 337 } 338 339 void copy_utf8_rune(j0_maker* m) { 340 const int a = m->current; 341 342 if (a == EOF) { 343 return; 344 } 345 346 // handle 1-byte runes 347 if (a < 128) { 348 write_byte(m, a); 349 return; 350 } 351 352 advance(m); 353 const int b = m->current; 354 355 if (b == EOF) { 356 handle_invalid_rune(m); 357 return; 358 } 359 360 // handle 2-byte runes 361 if (check_2_byte_rune(a, b)) { 362 write_byte(m, a); 363 write_byte(m, b); 364 return; 365 } 366 367 advance(m); 368 const int c = m->current; 369 370 if (c == EOF) { 371 handle_invalid_rune(m); 372 return; 373 } 374 375 // handle 3-byte runes 376 if (check_3_byte_rune(a, b, c)) { 377 write_byte(m, a); 378 write_byte(m, b); 379 write_byte(m, c); 380 return; 381 } 382 383 advance(m); 384 const int d = m->current; 385 386 if (d == EOF) { 387 handle_invalid_rune(m); 388 return; 389 } 390 391 // handle 4-byte runes 392 if (check_4_byte_rune(a, b, c, d)) { 393 write_byte(m, a); 394 write_byte(m, b); 395 write_byte(m, c); 396 write_byte(m, d); 397 return; 398 } 399 400 handle_invalid_rune(m); 401 } 402 403 // debug is available to diagnose any bug found 404 void debug(j0_maker* m, const char* fmt, ...) { 405 va_list args; 406 va_start(args, fmt); 407 408 if (m->in != stdin) { 409 fclose(m->in); 410 } 411 412 write_byte(m, '\n'); 413 414 const unsigned long line = m->line; 415 const unsigned long pos = m->pos; 416 fprintf(stderr, "\x1b[46m\x1b[37mline %lu, pos %lu: ", line, pos); 417 fprintf(stderr, fmt, args); 418 fprintf(stderr, "\x1b[0m\n"); 419 420 va_end(args); 421 422 exit(10); 423 } 424 425 // fail quits this app right after showing the error message given 426 void fail(j0_maker* m, int code, const char* msg) { 427 const unsigned long line = m->line; 428 const unsigned long pos = m->pos; 429 430 write_byte(m, '\n'); 431 flush(m); 432 fprintf(stderr, ERROR_LINE("line %lu, pos %lu: %s"), line, pos, msg); 433 exit(code); 434 } 435 436 bool demand_keyword(j0_maker* m, char* rest) { 437 for (; rest[0] != 0; rest++) { 438 const int lead = m->current; 439 if (lead == EOF || lead != rest[0]) { 440 return false; 441 } 442 advance(m); 443 } 444 445 return rest[0] == 0; 446 } 447 448 void handle_null(j0_maker* m) { 449 if (!demand_keyword(m, "null")) { 450 fail(m, 1, "expected `null` keyword"); 451 } 452 write_bytes(m, (unsigned char*)"null", 4); 453 } 454 455 void handle_true(j0_maker* m) { 456 if (!demand_keyword(m, "true")) { 457 fail(m, 1, "expected `true` keyword"); 458 } 459 write_bytes(m, (unsigned char*)"true", 4); 460 } 461 462 void handle_false(j0_maker* m) { 463 if (!demand_keyword(m, "false")) { 464 fail(m, 1, "expected `false` keyword"); 465 } 466 write_bytes(m, (unsigned char*)"false", 5); 467 } 468 469 void handle_capital_none(j0_maker* m) { 470 if (!demand_keyword(m, "None")) { 471 fail(m, 1, "expected `None` keyword"); 472 } 473 write_bytes(m, (unsigned char*)"null", 4); 474 } 475 476 void handle_capital_true(j0_maker* m) { 477 if (!demand_keyword(m, "True")) { 478 fail(m, 1, "expected `True` keyword"); 479 } 480 write_bytes(m, (unsigned char*)"true", 4); 481 } 482 483 void handle_capital_false(j0_maker* m) { 484 if (!demand_keyword(m, "False")) { 485 fail(m, 1, "expected `False` keyword"); 486 } 487 write_bytes(m, (unsigned char*)"false", 5); 488 } 489 490 void handle_digits(j0_maker* m) { 491 if (!isdigit(m->current)) { 492 fail(m, 1, "expected/missing digits"); 493 } 494 495 while (isdigit(m->current)) { 496 write_byte(m, m->current); 497 advance(m); 498 } 499 } 500 501 void handle_number(j0_maker* m) { 502 handle_digits(m); 503 504 const int lead = m->current; 505 506 if (lead == '.') { 507 write_byte(m, '.'); 508 advance(m); 509 510 if (isdigit(m->current)) { 511 handle_digits(m); 512 } else { 513 write_byte(m, '0'); 514 } 515 return; 516 } 517 518 if (lead == 'e' || lead == 'E') { 519 write_byte(m, lead); 520 advance(m); 521 522 if (m->current == '+') { 523 advance(m); 524 } else if (m->current == '-') { 525 write_byte(m, '-'); 526 advance(m); 527 } 528 529 handle_digits(m); 530 } 531 } 532 533 void handle_dot(j0_maker* m) { 534 write_byte(m, '0'); 535 write_byte(m, '.'); 536 advance(m); 537 538 if (!isdigit(m->current)) { 539 fail(m, 1, "expected/missing digits after decimal dot"); 540 } 541 handle_digits(m); 542 } 543 544 void handle_plus_number(j0_maker* m) { 545 advance(m); 546 547 if (m->current == '.') { 548 handle_dot(m); 549 return; 550 } 551 handle_number(m); 552 } 553 554 void handle_minus_number(j0_maker* m) { 555 write_byte(m, '-'); 556 advance(m); 557 558 if (m->current == '.') { 559 handle_dot(m); 560 return; 561 } 562 handle_number(m); 563 } 564 565 void handle_string_escape(j0_maker* m, int c) { 566 switch (c) { 567 case '"': 568 case '\\': 569 case 'b': 570 case 'f': 571 case 'n': 572 case 'r': 573 case 't': 574 write_byte(m, '\\'); 575 write_byte(m, c); 576 break; 577 578 case 'u': 579 write_byte(m, '\\'); 580 write_byte(m, 'u'); 581 for (size_t i = 0; i < 4; i++) { 582 advance(m); 583 const int lead = m->current; 584 if (lead == EOF) { 585 fail(m, 1, "end of input before end of string"); 586 } 587 if (isdigit(lead) || isalpha(lead)) { 588 // write_byte(m, toupper(c)); 589 write_byte(m, c); 590 continue; 591 } 592 fail(m, 1, "invalid hexadecimal digit in string"); 593 } 594 break; 595 596 case 'x': 597 write_byte(m, '\\'); 598 write_byte(m, 'u'); 599 write_byte(m, '0'); 600 write_byte(m, '0'); 601 for (size_t i = 0; i < 2; i++) { 602 advance(m); 603 const int lead = m->current; 604 if (lead == EOF) { 605 fail(m, 1, "end of input before end of string"); 606 } 607 if (isdigit(lead) || isalpha(lead)) { 608 // write_byte(m, toupper(c)); 609 write_byte(m, c); 610 continue; 611 } 612 fail(m, 1, "invalid hexadecimal digit in string"); 613 } 614 break; 615 616 case '\'': 617 write_byte(m, '\''); 618 break; 619 620 default: 621 write_byte(m, m->current); 622 break; 623 } 624 } 625 626 // hex is only used by function handle_low_char to render hexadecimals 627 const char* hex = "0123456789ABCDEF"; 628 629 // handle_low_char simplifies function handle_string 630 void handle_low_char(j0_maker* m, int c) { 631 switch (c) { 632 case '\t': 633 write_byte(m, '\\'); 634 write_byte(m, 't'); 635 break; 636 637 case '\n': 638 write_byte(m, '\\'); 639 write_byte(m, 'n'); 640 break; 641 642 case '\r': 643 write_byte(m, '\\'); 644 write_byte(m, 'r'); 645 break; 646 647 case '\v': 648 write_byte(m, '\\'); 649 write_byte(m, 'v'); 650 break; 651 652 default: 653 write_byte(m, '\\'); 654 write_byte(m, 'u'); 655 write_byte(m, '0'); 656 write_byte(m, '0'); 657 write_byte(m, hex[c / 16]); 658 write_byte(m, hex[c % 16]); 659 break; 660 } 661 } 662 663 void handle_string(j0_maker* m) { 664 const unsigned char quote = m->current; 665 bool escaped = false; 666 667 write_byte(m, '"'); 668 669 while (true) { 670 advance(m); 671 672 int c = m->current; 673 if (c == EOF) { 674 fail(m, 1, "input ended before string was close-quoted"); 675 } 676 677 if (escaped) { 678 handle_string_escape(m, c); 679 escaped = false; 680 continue; 681 } 682 683 switch (c) { 684 case '\\': 685 escaped = true; 686 break; 687 688 default: 689 if (c == quote) { 690 write_byte(m, '"'); 691 advance(m); 692 return; 693 } 694 695 // write_byte(m, c); 696 if (c < ' ') { 697 handle_low_char(m, c); 698 } else { 699 copy_utf8_rune(m); 700 } 701 break; 702 } 703 } 704 } 705 706 void handle_token(j0_maker* m); 707 708 void handle_array(j0_maker* m) { 709 size_t items_before = 0; 710 write_byte(m, '['); 711 advance(m); 712 713 while (true) { 714 seek_token(m); 715 const int lead = m->current; 716 717 if (lead == EOF) { 718 fail(m, 1, "unclosed array"); 719 } 720 721 if (lead == ',') { 722 advance(m); 723 continue; 724 } 725 726 if (lead == ']') { 727 write_byte(m, ']'); 728 advance(m); 729 return; 730 } 731 732 if (items_before > 0) { 733 write_byte(m, ','); 734 } 735 handle_token(m); 736 items_before++; 737 } 738 } 739 740 // handle_array_jsonl is a slight variation of func handle_array: this one is 741 // used to handle top-level arrays when running in JSON Lines mode, to emit 742 // line-feeds after each item, instead of commas between them 743 void handle_array_jsonl(j0_maker* m) { 744 size_t items_before = 0; 745 advance(m); 746 747 while (true) { 748 seek_token(m); 749 const int lead = m->current; 750 751 if (lead == EOF) { 752 fail(m, 1, "unclosed array"); 753 } 754 755 if (lead == ',') { 756 advance(m); 757 continue; 758 } 759 760 if (items_before > 0) { 761 write_byte(m, '\n'); 762 } 763 764 if (lead == ']') { 765 advance(m); 766 return; 767 } 768 769 handle_token(m); 770 items_before++; 771 } 772 } 773 774 void handle_unquoted_key(j0_maker* m) { 775 write_byte(m, '"'); 776 777 while (true) { 778 int c = m->current; 779 if (c == EOF) { 780 fail(m, 1, "input ended with an object key"); 781 } 782 783 write_byte(m, c); 784 advance(m); 785 786 c = m->current; 787 if (!isalpha(c) && !isdigit(c) && c != '_') { 788 break; 789 } 790 } 791 792 write_byte(m, '"'); 793 } 794 795 void handle_object(j0_maker* m) { 796 size_t items_before = 0; 797 write_byte(m, '{'); 798 advance(m); 799 800 while (true) { 801 seek_token(m); 802 int lead = m->current; 803 804 if (lead == EOF) { 805 fail(m, 1, "unclosed object"); 806 } 807 808 if (lead == ',') { 809 advance(m); 810 continue; 811 } 812 813 if (lead == '}') { 814 write_byte(m, '}'); 815 advance(m); 816 return; 817 } 818 819 if (lead == '"' || lead == '\'') { 820 if (items_before > 0) { 821 write_byte(m, ','); 822 } 823 handle_string(m); 824 items_before++; 825 } else if (isalpha(lead) || lead == '_') { 826 if (items_before > 0) { 827 write_byte(m, ','); 828 } 829 handle_unquoted_key(m); 830 items_before++; 831 } else { 832 fail(m, 1, "only strings or identifiers can be object keys"); 833 } 834 835 seek_token(m); 836 lead = m->current; 837 838 if (lead == EOF) { 839 fail(m, 1, "input ended after object-key and before value"); 840 } 841 842 if (lead != ':') { 843 fail(m, 1, "a `:` must follow all object keys"); 844 } 845 846 write_byte(m, ':'); 847 advance(m); 848 849 seek_token(m); 850 if (m->current == EOF) { 851 fail(m, 1, "input ended after a `:` following an object-key"); 852 } 853 854 handle_token(m); 855 } 856 } 857 858 // dispatch ties leading bytes/chars in tokens to the funcs which handle them 859 void (*dispatch[256])() = { 860 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 861 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 862 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 863 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 864 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 865 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 866 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 867 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 868 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 869 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 870 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 871 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 872 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 873 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 874 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 875 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 876 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 877 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 878 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 879 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 880 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 881 NULL, NULL, NULL, NULL, 882 }; 883 884 void handle_token(j0_maker* m) { 885 dispatch[m->current](m); 886 } 887 888 // handle_invalid_token shows an error message and quits the app right after 889 void handle_invalid_token(j0_maker* m) { 890 char msg[64]; 891 unsigned char c = (unsigned char)m->current; 892 sprintf(msg, "%c (%d): invalid token", c, c); 893 fail(m, 1, msg); 894 } 895 896 void handle_array_jsonl(j0_maker* m); 897 898 void handle_input(FILE* src, bool jsonl) { 899 unsigned char ibuf[32 * 1024]; 900 unsigned char obuf[8 * 1024]; 901 902 j0_maker m; 903 m.ibuf = ibuf; 904 m.icap = sizeof(ibuf); 905 m.obuf = obuf; 906 m.ocap = sizeof(obuf); 907 restart_state(&m, stdout, src); 908 909 // ignore leading whitespace/comment bytes, if present 910 seek_token(&m); 911 912 if (m.current == EOF) { 913 fail(&m, 1, "empty input isn't valid JSON"); 914 } 915 916 if (jsonl && m.current == '[') { 917 handle_array_jsonl(&m); 918 } else { 919 handle_token(&m); 920 write_byte(&m, '\n'); 921 } 922 flush(&m); 923 924 // ignore trailing whitespace/comment bytes, if present 925 seek_token(&m); 926 927 // ignore trailing semicolon, if present 928 if (m.current == ';') { 929 advance(&m); 930 // ignore trailing whitespace/comment bytes, if present 931 seek_token(&m); 932 } 933 934 if (!feof(src) || m.current != EOF) { 935 fail(&m, 1, "unexpected trailing JSON data"); 936 } 937 } 938 939 bool is_help_option(const char* s) { 940 return (s[0] == '-' && s[1] != 0) && ( 941 strcmp(s, "-h") == 0 || 942 strcmp(s, "--h") == 0 || 943 strcmp(s, "-help") == 0 || 944 strcmp(s, "--help") == 0 945 ); 946 } 947 948 bool is_jsonl_option(const char* s) { 949 return (s[0] == '-' && s[1] != 0) && ( 950 strcmp(s, "-jl") == 0 || 951 strcmp(s, "--jl") == 0 || 952 strcmp(s, "-jsonl") == 0 || 953 strcmp(s, "--jsonl") == 0 954 ); 955 } 956 957 // run returns the error code 958 int run(int argc, char** argv) { 959 bool jsonl = false; 960 if (argc > 1 && is_jsonl_option(argv[1])) { 961 jsonl = true; 962 argc--; 963 argv++; 964 } 965 966 if (argc > 2) { 967 const char* msg = "can't use more than 1 named input"; 968 fprintf(stderr, ERROR_LINE("%s"), msg); 969 return 1; 970 } 971 972 // use stdin when not given a filepath, or is `-` 973 if (argc < 2 || argv[1][0] == 0 || strcmp(argv[1], "-") == 0) { 974 handle_input(stdin, jsonl); 975 return 0; 976 } 977 978 const char* path = argv[1]; 979 FILE* f = fopen(path, "rb"); 980 if (f == NULL) { 981 fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path); 982 return 1; 983 } 984 985 handle_input(f, jsonl); 986 fclose(f); 987 988 return 0; 989 } 990 991 int main(int argc, char** argv) { 992 #ifdef _WIN32 993 setmode(fileno(stdin), O_BINARY); 994 // ensure output lines end in LF instead of CRLF on windows 995 setmode(fileno(stdout), O_BINARY); 996 setmode(fileno(stderr), O_BINARY); 997 #endif 998 999 if (argc > 1 && is_help_option(argv[1])) { 1000 printf("%s", info); 1001 return 0; 1002 } 1003 1004 // the dispatch table starts as all null function-pointers 1005 for (size_t i = 0; i < sizeof(dispatch) / sizeof(dispatch[0]); i++) { 1006 dispatch[i] = handle_invalid_token; 1007 } 1008 1009 for (size_t i = '0'; i <= '9'; i++) { 1010 dispatch[i] = handle_number; 1011 } 1012 1013 dispatch['n'] = handle_null; 1014 dispatch['t'] = handle_true; 1015 dispatch['f'] = handle_false; 1016 dispatch['N'] = handle_capital_none; 1017 dispatch['T'] = handle_capital_true; 1018 dispatch['F'] = handle_capital_false; 1019 dispatch['.'] = handle_dot; 1020 dispatch['+'] = handle_plus_number; 1021 dispatch['-'] = handle_minus_number; 1022 dispatch['"'] = handle_string; 1023 dispatch['\''] = handle_string; 1024 dispatch['['] = handle_array; 1025 dispatch['{'] = handle_object; 1026 1027 // enable full/block-buffering for standard output 1028 // setvbuf(stdout, NULL, _IOFBF, 0); 1029 1030 return run(argc, argv) == 0 ? 0 : 1; 1031 }