File: json2.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./json2 ./json2.c 29 */ 30 31 #include <ctype.h> 32 #include <stdarg.h> 33 #include <stdbool.h> 34 #include <stdint.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 39 #ifdef _WIN32 40 #include <fcntl.h> 41 #include <windows.h> 42 #endif 43 44 #ifdef RED_ERRORS 45 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 46 #ifdef __APPLE__ 47 #define ERROR_STYLE "\x1b[31m" 48 #endif 49 #define RESET_STYLE "\x1b[0m" 50 #else 51 #define ERROR_STYLE 52 #define RESET_STYLE 53 #endif 54 55 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n") 56 57 #ifndef IBUF_SIZE 58 #define IBUF_SIZE (32 * 1024) 59 #endif 60 61 #ifndef OBUF_SIZE 62 #define OBUF_SIZE (8 * 1024) 63 #endif 64 65 const char* info = "" 66 "json2 [options...] [file...]\n" 67 "\n" 68 "\n" 69 "JSON-2 converts/fixes JSON/pseudo-JSON input into indented multi-line JSON\n" 70 "which uses 2 spaces for each indentation level.\n" 71 "\n" 72 "Besides formatting JSON, this tool also adapts almost-JSON input into\n" 73 "valid JSON, since it\n" 74 "\n" 75 " - ignores both rest-of-line and multi-line comments\n" 76 " - ignores extra/trailing commas in arrays and objects\n" 77 " - turns single-quoted strings/keys into double-quoted strings\n" 78 " - double-quotes unquoted object keys\n" 79 " - changes \\x 2-hex-digit into \\u 4-hex-digit string-escapes\n" 80 "\n" 81 "All options available can either start with a single or a double-dash\n" 82 "\n" 83 " -h show this help message\n" 84 " -help show this help message\n" 85 ""; 86 87 typedef struct j2_maker { 88 FILE* in; 89 FILE* out; 90 91 unsigned char* ibuf; 92 size_t ilen; // how many bytes are being used in the input buffer 93 size_t icap; // the input buffer's capacity 94 size_t ipos; // the current position in the input buffer 95 96 size_t line; // the current line, used to show useful error messages 97 size_t pos; // the position in the current line, for error messages 98 99 unsigned char* obuf; 100 size_t ocap; // the output buffer's capacity 101 size_t opos; // the current position in the output buffer 102 103 ssize_t level; // the current indentation/nesting level 104 105 int current; 106 int next; 107 } j2_maker; 108 109 // advance_reader_pos helps func read_byte do its job 110 static inline void advance_reader_pos(j2_maker* r, unsigned char b) { 111 r->ipos++; 112 if (b == '\n') { 113 r->line++; 114 r->pos = 1; 115 } else { 116 r->pos++; 117 } 118 } 119 120 // read_byte does as it says: check its return for the value EOF, before 121 // using it as the next byte 122 static inline int read_byte(j2_maker* r) { 123 if (r->ipos < r->ilen) { 124 // inside current chunk 125 const unsigned char b = r->ibuf[r->ipos]; 126 advance_reader_pos(r, b); 127 return b; 128 } 129 130 // need to read the next block 131 r->ipos = 0; 132 r->ilen = fread(r->ibuf, sizeof(unsigned char), r->icap, r->in); 133 if (r->ilen > 0) { 134 const unsigned char b = r->ibuf[r->ipos]; 135 advance_reader_pos(r, b); 136 return b; 137 } 138 139 // reached the end of data 140 return EOF; 141 } 142 143 // advance is used in most of the code, instead of calling read_byte directly 144 static inline void advance(j2_maker* r) { 145 r->current = r->next; 146 r->next = read_byte(r); 147 } 148 149 void fail(j2_maker* m, int code, const char* msg); 150 151 void skip_line(j2_maker* r) { 152 while (true) { 153 advance(r); 154 const int lead = r->current; 155 156 if (lead == EOF) { 157 break; 158 } 159 160 if (lead == '\n') { 161 advance(r); 162 break; 163 } 164 } 165 } 166 167 void skip_multiline_comment(j2_maker* r) { 168 unsigned char prev = 0; 169 170 while (true) { 171 advance(r); 172 const int lead = r->current; 173 174 if (lead == EOF) { 175 break; 176 } 177 178 if (prev == '*' && lead == '/') { 179 advance(r); 180 break; 181 } 182 183 prev = (unsigned char)lead; 184 } 185 } 186 187 void skip_comment(j2_maker* r) { 188 int lead = r->current; 189 190 if (lead == '#') { 191 skip_line(r); 192 return; 193 } 194 195 if (lead != '/') { 196 fail(r, 1, "expected a slash to start comments"); 197 } 198 199 advance(r); 200 lead = r->current; 201 202 if (lead == '/') { 203 skip_line(r); 204 return; 205 } 206 207 if (lead == '*') { 208 skip_multiline_comment(r); 209 return; 210 } 211 212 fail(r, 1, "expected `//` or `/*` to start comments"); 213 } 214 215 static inline void seek_token(j2_maker* r) { 216 while (true) { 217 const int lead = r->current; 218 219 if (lead != EOF && lead <= ' ') { 220 advance(r); 221 continue; 222 } 223 224 if (lead == '/' || lead == '#') { 225 skip_comment(r); 226 continue; 227 } 228 229 break; 230 } 231 } 232 233 bool starts_with_bom(const unsigned char* b, const size_t n) { 234 return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf); 235 } 236 237 void restart_state(j2_maker* m, FILE* w, FILE* r) { 238 m->in = r; 239 m->ilen = 0; 240 m->ipos = 0; 241 242 m->out = w; 243 m->opos = 0; 244 245 m->line = 1; 246 m->pos = 1; 247 248 m->current = EOF; 249 m->next = EOF; 250 251 m->current = read_byte(m); 252 if (m->current == EOF) { 253 return; 254 } 255 m->next = read_byte(m); 256 257 m->level = 0; 258 259 // skip leading UTF-8 BOM (byte-order mark), if present 260 if (starts_with_bom(m->ibuf, m->ilen)) { 261 // a UTF-8 BOM has 3 bytes 262 for (size_t i = 0; i < 3 && m->current != EOF; i++) { 263 advance(m); 264 } 265 } 266 } 267 268 void write_byte(j2_maker* m, unsigned char b) { 269 if (m->opos < m->ocap) { 270 m->obuf[m->opos++] = b; 271 return; 272 } 273 274 fwrite(m->obuf, 1, m->ocap, m->out); 275 m->obuf[0] = b; 276 m->opos = 1; 277 } 278 279 // write_bytes does as it says, minimizing the number of calls to fwrite 280 void write_bytes(j2_maker* m, const unsigned char* src, size_t len) { 281 const size_t rem = m->ocap - m->opos; 282 if (len < rem) { 283 memcpy(m->obuf + m->opos, src, len); 284 m->opos += len; 285 return; 286 } 287 288 for (size_t i = 0; i < len; i++) { 289 write_byte(m, src[i]); 290 } 291 } 292 293 void write_spaces(j2_maker* m, ssize_t n) { 294 const unsigned char spaces[32] = " "; 295 while (n > sizeof(spaces)) { 296 write_bytes(m, spaces, sizeof(spaces)); 297 n -= sizeof(spaces); 298 } 299 if (n > 0) { 300 write_bytes(m, spaces, n); 301 } 302 } 303 304 static inline void indent(j2_maker* m) { 305 write_spaces(m, 2 * m->level); 306 } 307 308 void flush(j2_maker* m) { 309 if (m->opos > 0) { 310 fwrite(m->obuf, 1, m->opos, m->out); 311 } 312 m->opos = 0; 313 fflush(m->out); 314 } 315 316 // https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/ 317 318 static inline bool check_2_byte_rune(int a, int b) { 319 return (0xc2 <= a && a <= 0xdf) && (0x80 <= b && b <= 0xbf); 320 } 321 322 bool check_3_byte_rune(int a, int b, int c) { 323 return ( 324 (a == 0xe0) && 325 (0xa0 <= b && b <= 0xbf) && 326 (0x80 <= c && c <= 0xbf) 327 ) || ( 328 (0xe1 <= a && a <= 0xec) && 329 (0x80 <= b && b <= 0xbf) && 330 (0x80 <= c && c <= 0xbf) 331 ) || ( 332 (a == 0xed) && 333 (0x80 <= b && b <= 0x9f) && 334 (0x80 <= c && c <= 0xbf) 335 ) || ( 336 (a == 0xee || a == 0xef) && 337 (0x80 <= b && b <= 0xbf) && 338 (0x80 <= c && c <= 0xbf) 339 ); 340 } 341 342 bool check_4_byte_rune(int a, int b, int c, int d) { 343 return ( 344 (a == 0xf0) && 345 (0x90 <= b && b <= 0xbf) && 346 (0x80 <= c && c <= 0xbf) && 347 (0x80 <= d && d <= 0xbf) 348 ) || ( 349 (a == 0xf1 || a == 0xf3) && 350 (0x80 <= b && b <= 0xbf) && 351 (0x80 <= c && c <= 0xbf) && 352 (0x80 <= d && d <= 0xbf) 353 ) || ( 354 (a == 0xf4) && 355 (0x80 <= b && b <= 0xbf) && 356 (0x80 <= c && c <= 0x8f) && 357 (0x80 <= d && d <= 0xbf) 358 ); 359 } 360 361 // write_replacement_char is the recommended action to handle invalid bytes 362 void write_replacement_char(j2_maker* m) { 363 write_byte(m, 0xef); 364 write_byte(m, 0xbf); 365 write_byte(m, 0xbd); 366 } 367 368 void handle_invalid_rune(j2_maker* m) { 369 // fail(m, 1, "invalid unicode value"); 370 write_replacement_char(m); 371 } 372 373 // write_rune is following the table at https://en.wikipedia.org/wiki/UTF-8 374 void write_rune(j2_maker* m, uint32_t rune) { 375 if (rune < (1 << 7)) { 376 write_byte(m, rune); 377 return; 378 } 379 380 if (rune < (1 << (5 + 6))) { 381 const int a = 0b11000000 | (rune >> 6); 382 const int b = 0b10000000 | (rune & 0b00111111); 383 if (check_2_byte_rune(a, b)) { 384 write_byte(m, a); 385 write_byte(m, b); 386 } else { 387 write_replacement_char(m); 388 } 389 return; 390 } 391 392 if (rune < (1 << (4 + 6 + 6))) { 393 const int a = 0b11100000 | (rune >> 12); 394 const int b = 0b10000000 | ((rune >> 6) & 0b00111111); 395 const int c = 0b10000000 | (rune & 0b00111111); 396 if (check_3_byte_rune(a, b, c)) { 397 write_byte(m, a); 398 write_byte(m, b); 399 write_byte(m, c); 400 } else { 401 write_replacement_char(m); 402 } 403 return; 404 } 405 406 if (rune < (1 << (3 + 6 + 6 + 6))) { 407 const int a = 0b11110000 | (rune >> 18); 408 const int b = 0b10000000 | ((rune >> 12) & 0b00111111); 409 const int c = 0b10000000 | ((rune >> 6) & 0b00111111); 410 const int d = 0b10000000 | (rune & 0b00111111); 411 if (check_4_byte_rune(a, b, c, d)) { 412 write_byte(m, a); 413 write_byte(m, b); 414 write_byte(m, c); 415 write_byte(m, d); 416 } else { 417 write_replacement_char(m); 418 } 419 return; 420 } 421 422 write_replacement_char(m); 423 } 424 425 void copy_utf8_rune(j2_maker* m) { 426 const int a = m->current; 427 428 if (a == EOF) { 429 return; 430 } 431 432 // handle 1-byte runes 433 if (a < 128) { 434 write_byte(m, a); 435 return; 436 } 437 438 advance(m); 439 const int b = m->current; 440 441 if (b == EOF) { 442 handle_invalid_rune(m); 443 return; 444 } 445 446 // handle 2-byte runes 447 if (check_2_byte_rune(a, b)) { 448 write_byte(m, a); 449 write_byte(m, b); 450 return; 451 } 452 453 advance(m); 454 const int c = m->current; 455 456 if (c == EOF) { 457 handle_invalid_rune(m); 458 return; 459 } 460 461 // handle 3-byte runes 462 if (check_3_byte_rune(a, b, c)) { 463 write_byte(m, a); 464 write_byte(m, b); 465 write_byte(m, c); 466 return; 467 } 468 469 advance(m); 470 const int d = m->current; 471 472 if (d == EOF) { 473 handle_invalid_rune(m); 474 return; 475 } 476 477 // handle 4-byte runes 478 if (check_4_byte_rune(a, b, c, d)) { 479 write_byte(m, a); 480 write_byte(m, b); 481 write_byte(m, c); 482 write_byte(m, d); 483 return; 484 } 485 486 handle_invalid_rune(m); 487 } 488 489 // debug is available to diagnose any bug found 490 void debug(j2_maker* m, const char* fmt, ...) { 491 va_list args; 492 va_start(args, fmt); 493 494 if (m->in != stdin) { 495 fclose(m->in); 496 } 497 498 write_byte(m, '\n'); 499 500 const unsigned long line = m->line; 501 const unsigned long pos = m->pos; 502 fprintf(stderr, "\x1b[46m\x1b[37mline %lu, pos %lu: ", line, pos); 503 fprintf(stderr, fmt, args); 504 fprintf(stderr, "\x1b[0m\n"); 505 506 va_end(args); 507 508 exit(10); 509 } 510 511 // fail quits this app right after showing the error message given 512 void fail(j2_maker* m, int code, const char* msg) { 513 const unsigned long line = m->line; 514 const unsigned long pos = m->pos; 515 516 write_byte(m, '\n'); 517 flush(m); 518 fprintf(stderr, ERROR_LINE("line %lu, pos %lu: %s"), line, pos, msg); 519 exit(code); 520 } 521 522 bool demand_keyword(j2_maker* m, char* rest) { 523 for (; rest[0] != 0; rest++) { 524 const int lead = m->current; 525 if (lead == EOF || lead != rest[0]) { 526 return false; 527 } 528 advance(m); 529 } 530 531 return rest[0] == 0; 532 } 533 534 void handle_null(j2_maker* m) { 535 if (!demand_keyword(m, "null")) { 536 fail(m, 1, "expected `null` keyword"); 537 } 538 write_bytes(m, (unsigned char*)"null", 4); 539 } 540 541 void handle_true(j2_maker* m) { 542 if (!demand_keyword(m, "true")) { 543 fail(m, 1, "expected `true` keyword"); 544 } 545 write_bytes(m, (unsigned char*)"true", 4); 546 } 547 548 void handle_false(j2_maker* m) { 549 if (!demand_keyword(m, "false")) { 550 fail(m, 1, "expected `false` keyword"); 551 } 552 write_bytes(m, (unsigned char*)"false", 5); 553 } 554 555 void handle_capital_none(j2_maker* m) { 556 if (!demand_keyword(m, "None")) { 557 fail(m, 1, "expected `None` keyword"); 558 } 559 write_bytes(m, (unsigned char*)"null", 4); 560 } 561 562 void handle_capital_true(j2_maker* m) { 563 if (!demand_keyword(m, "True")) { 564 fail(m, 1, "expected `True` keyword"); 565 } 566 write_bytes(m, (unsigned char*)"true", 4); 567 } 568 569 void handle_capital_false(j2_maker* m) { 570 if (!demand_keyword(m, "False")) { 571 fail(m, 1, "expected `False` keyword"); 572 } 573 write_bytes(m, (unsigned char*)"false", 5); 574 } 575 576 void handle_digits(j2_maker* m) { 577 if (!isdigit(m->current)) { 578 fail(m, 1, "expected/missing digits"); 579 } 580 581 while (isdigit(m->current)) { 582 write_byte(m, m->current); 583 advance(m); 584 } 585 } 586 587 void handle_number(j2_maker* m) { 588 handle_digits(m); 589 590 const int lead = m->current; 591 592 if (lead == '.') { 593 write_byte(m, '.'); 594 advance(m); 595 596 if (isdigit(m->current)) { 597 handle_digits(m); 598 } else { 599 write_byte(m, '0'); 600 } 601 return; 602 } 603 604 if (lead == 'e' || lead == 'E') { 605 write_byte(m, lead); 606 advance(m); 607 608 if (m->current == '+') { 609 advance(m); 610 } else if (m->current == '-') { 611 write_byte(m, '-'); 612 advance(m); 613 } 614 615 handle_digits(m); 616 } 617 } 618 619 void handle_dot(j2_maker* m) { 620 write_byte(m, '0'); 621 write_byte(m, '.'); 622 advance(m); 623 624 if (!isdigit(m->current)) { 625 fail(m, 1, "expected/missing digits after decimal dot"); 626 } 627 handle_digits(m); 628 } 629 630 void handle_plus_number(j2_maker* m) { 631 advance(m); 632 633 if (m->current == '.') { 634 handle_dot(m); 635 return; 636 } 637 handle_number(m); 638 } 639 640 void handle_minus_number(j2_maker* m) { 641 write_byte(m, '-'); 642 advance(m); 643 644 if (m->current == '.') { 645 handle_dot(m); 646 return; 647 } 648 handle_number(m); 649 } 650 651 // decode_hex assumes valid hex digits, checked by func is_valid_hex 652 uint32_t decode_hex(unsigned char hex) { 653 if ('0' <= hex && hex <= '9') { 654 return hex - '0'; 655 } 656 if ('A' <= hex && hex <= 'F') { 657 return hex - 'A' + 10; 658 } 659 if ('a' <= hex && hex <= 'f') { 660 return hex - 'a' + 10; 661 } 662 return 0xffff; 663 } 664 665 static inline bool is_valid_hex(unsigned char b) { 666 return false || 667 ('0' <= b && b <= '9') || 668 ('A' <= b && b <= 'F') || 669 ('a' <= b && b <= 'f'); 670 } 671 672 // handle_low_char ensures characters whose ASCII codes are lower than spaces 673 // are properly escaped for strings 674 void handle_low_char(j2_maker* m, int c) { 675 const char* hex = "0123456789ABCDEF"; 676 677 switch (c) { 678 case '\t': 679 write_byte(m, '\\'); 680 write_byte(m, 't'); 681 break; 682 case '\n': 683 write_byte(m, '\\'); 684 write_byte(m, 'n'); 685 break; 686 case '\r': 687 write_byte(m, '\\'); 688 write_byte(m, 'r'); 689 break; 690 case '\b': 691 write_byte(m, '\\'); 692 write_byte(m, 'b'); 693 break; 694 case '\f': 695 write_byte(m, '\\'); 696 write_byte(m, 'f'); 697 break; 698 case '\v': 699 write_byte(m, '\\'); 700 write_byte(m, 'v'); 701 break; 702 default: 703 write_byte(m, '\\'); 704 write_byte(m, 'u'); 705 write_byte(m, '0'); 706 write_byte(m, '0'); 707 write_byte(m, hex[c / 16]); 708 write_byte(m, hex[c % 16]); 709 break; 710 } 711 } 712 713 void write_inner_string_hex_quad(j2_maker* m, const unsigned char quad[4]) { 714 const uint32_t n = 0 + 715 (decode_hex(quad[0]) << 12) + 716 (decode_hex(quad[1]) << 8) + 717 (decode_hex(quad[2]) << 4) + 718 (decode_hex(quad[3]) << 0); 719 720 switch (n) { 721 case '"': 722 write_byte(m, '\\'); 723 write_byte(m, '"'); 724 return; 725 case '\\': 726 write_byte(m, '\\'); 727 write_byte(m, '\\'); 728 return; 729 } 730 731 if (n >= ' ') { 732 write_rune(m, n); 733 } else { 734 handle_low_char(m, n); 735 } 736 } 737 738 void handle_hex_quad(j2_maker* m) { 739 unsigned char quad[4]; 740 for (size_t i = 0; i < 4; i++) { 741 advance(m); 742 const int lead = m->current; 743 if (lead == EOF) { 744 fail(m, 1, "end of input before end of string"); 745 } 746 if (is_valid_hex(lead)) { 747 quad[i] = lead; 748 continue; 749 } 750 fail(m, 1, "invalid hexadecimal digit in string"); 751 } 752 753 write_inner_string_hex_quad(m, quad); 754 } 755 756 void handle_hex_pair(j2_maker* m) { 757 unsigned char quad[4] = {'0', '0', '0', '0'}; 758 advance(m); 759 const int a = m->current; 760 advance(m); 761 const int b = m->current; 762 if (a == EOF || b == EOF) { 763 fail(m, 1, "end of input before end of string"); 764 } 765 if (!is_valid_hex(a) || !is_valid_hex(b)) { 766 fail(m, 1, "invalid hexadecimal digit in string"); 767 } 768 769 quad[2] = a; 770 quad[3] = b; 771 write_inner_string_hex_quad(m, quad); 772 } 773 774 void handle_string_escape(j2_maker* m, int c) { 775 switch (c) { 776 case '"': 777 case '\\': 778 case 'b': 779 case 'f': 780 case 'n': 781 case 'r': 782 case 't': 783 write_byte(m, '\\'); 784 write_byte(m, c); 785 break; 786 case 'u': 787 handle_hex_quad(m); 788 break; 789 case 'x': 790 handle_hex_pair(m); 791 break; 792 case '\'': 793 write_byte(m, '\''); 794 break; 795 default: 796 write_byte(m, m->current); 797 break; 798 } 799 } 800 801 void handle_string(j2_maker* m) { 802 const unsigned char quote = m->current; 803 bool escaped = false; 804 805 write_byte(m, '"'); 806 807 while (true) { 808 advance(m); 809 810 int c = m->current; 811 if (c == EOF) { 812 fail(m, 1, "input ended before string was close-quoted"); 813 } 814 815 if (escaped) { 816 handle_string_escape(m, c); 817 escaped = false; 818 continue; 819 } 820 821 switch (c) { 822 case '\\': 823 escaped = true; 824 break; 825 default: 826 if (c == quote) { 827 write_byte(m, '"'); 828 advance(m); 829 return; 830 } 831 832 // write_byte(m, c); 833 if (c < ' ') { 834 handle_low_char(m, c); 835 } else { 836 copy_utf8_rune(m); 837 } 838 break; 839 } 840 } 841 } 842 843 void handle_token(j2_maker* m, ssize_t lead_level); 844 845 void handle_array(j2_maker* m) { 846 m->level++; 847 write_byte(m, '['); 848 advance(m); 849 850 for (size_t i = 0; true; i++) { 851 seek_token(m); 852 const int lead = m->current; 853 854 if (lead == EOF) { 855 fail(m, 1, "unclosed array"); 856 } 857 858 if (lead == ',') { 859 advance(m); 860 continue; 861 } 862 863 if (lead == ']') { 864 m->level--; 865 if (i > 0) { 866 write_byte(m, '\n'); 867 indent(m); 868 } 869 write_byte(m, ']'); 870 advance(m); 871 return; 872 } 873 874 if (i > 0) { 875 write_byte(m, ','); 876 } 877 write_byte(m, '\n'); 878 if (feof(m->out)) { 879 return; 880 } 881 handle_token(m, m->level); 882 } 883 } 884 885 void handle_unquoted_key(j2_maker* m) { 886 write_byte(m, '"'); 887 888 while (true) { 889 int c = m->current; 890 if (c == EOF) { 891 fail(m, 1, "input ended with an object key"); 892 } 893 894 write_byte(m, c); 895 advance(m); 896 897 c = m->current; 898 if (!isalpha(c) && !isdigit(c) && c != '_') { 899 break; 900 } 901 } 902 903 write_byte(m, '"'); 904 } 905 906 void handle_object(j2_maker* m) { 907 m->level++; 908 write_byte(m, '{'); 909 advance(m); 910 911 for (size_t i = 0; true; i++) { 912 seek_token(m); 913 int lead = m->current; 914 915 if (lead == EOF) { 916 fail(m, 1, "unclosed object"); 917 } 918 919 if (lead == ',') { 920 advance(m); 921 continue; 922 } 923 924 if (lead == '}') { 925 m->level--; 926 if (i > 0) { 927 write_byte(m, '\n'); 928 indent(m); 929 } 930 write_byte(m, '}'); 931 advance(m); 932 return; 933 } 934 935 if (feof(m->out)) { 936 return; 937 } 938 939 if (lead == '"' || lead == '\'') { 940 if (i > 0) { 941 write_byte(m, ','); 942 } 943 write_byte(m, '\n'); 944 indent(m); 945 handle_string(m); 946 } else if (isalpha(lead) || lead == '_') { 947 if (i > 0) { 948 write_byte(m, ','); 949 } 950 write_byte(m, '\n'); 951 indent(m); 952 handle_unquoted_key(m); 953 } else { 954 fail(m, 1, "only strings or identifiers can be object keys"); 955 } 956 957 seek_token(m); 958 lead = m->current; 959 960 if (lead == EOF) { 961 fail(m, 1, "input ended after object-key and before value"); 962 } 963 964 if (lead != ':') { 965 fail(m, 1, "a `:` must follow all object keys"); 966 } 967 968 write_byte(m, ':'); 969 write_byte(m, ' '); 970 advance(m); 971 972 seek_token(m); 973 if (m->current == EOF) { 974 fail(m, 1, "input ended after a `:` following an object-key"); 975 } 976 977 handle_token(m, 0); 978 } 979 } 980 981 // dispatch ties leading bytes/chars in tokens to the funcs which handle them 982 void (*dispatch[256])() = { 983 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 984 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 985 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 986 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 987 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 988 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 989 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 990 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 991 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 992 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 993 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 994 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 995 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 996 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 997 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 998 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 999 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1000 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1001 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1002 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1003 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1004 NULL, NULL, NULL, NULL, 1005 }; 1006 1007 void handle_token(j2_maker* m, ssize_t lead_level) { 1008 write_spaces(m, 2 * lead_level); 1009 dispatch[m->current](m); 1010 } 1011 1012 // handle_invalid_token shows an error message and quits the app right after 1013 void handle_invalid_token(j2_maker* m) { 1014 char msg[64]; 1015 unsigned char c = (unsigned char)m->current; 1016 sprintf(msg, "%c (%d): invalid token", c, c); 1017 fail(m, 1, msg); 1018 } 1019 1020 void handle_input(FILE* src) { 1021 unsigned char ibuf[IBUF_SIZE]; 1022 unsigned char obuf[OBUF_SIZE]; 1023 1024 j2_maker m; 1025 m.ibuf = ibuf; 1026 m.icap = sizeof(ibuf); 1027 m.obuf = obuf; 1028 m.ocap = sizeof(obuf); 1029 restart_state(&m, stdout, src); 1030 1031 // ignore leading whitespace/comment bytes, if present 1032 seek_token(&m); 1033 1034 if (m.current == EOF) { 1035 fail(&m, 1, "empty input isn't valid JSON"); 1036 } 1037 1038 handle_token(&m, 0); 1039 write_byte(&m, '\n'); 1040 flush(&m); 1041 1042 // ignore trailing whitespace/comment bytes, if present 1043 seek_token(&m); 1044 1045 // ignore trailing semicolon, if present 1046 if (m.current == ';') { 1047 advance(&m); 1048 // ignore trailing whitespace/comment bytes, if present 1049 seek_token(&m); 1050 } 1051 1052 if (!feof(src) || m.current != EOF) { 1053 fail(&m, 1, "unexpected trailing JSON data"); 1054 } 1055 } 1056 1057 bool is_help_option(const char* s) { 1058 return (s[0] == '-' && s[1] != 0) && ( 1059 strcmp(s, "-h") == 0 || 1060 strcmp(s, "--h") == 0 || 1061 strcmp(s, "-help") == 0 || 1062 strcmp(s, "--help") == 0 1063 ); 1064 } 1065 1066 // run returns the error code 1067 int run(int nargs, char** args) { 1068 if (nargs > 0 && strcmp(args[0], "--") == 0) { 1069 nargs--; 1070 args++; 1071 } 1072 1073 if (nargs > 1) { 1074 const char* msg = "can't use more than 1 named input"; 1075 fprintf(stderr, ERROR_LINE("%s"), msg); 1076 return 1; 1077 } 1078 1079 // use stdin when not given a filepath 1080 if (nargs == 0 || strcmp(args[0], "") == 0 || strcmp(args[0], "-") == 0) { 1081 handle_input(stdin); 1082 return 0; 1083 } 1084 1085 const char* path = args[0]; 1086 FILE* f = fopen(path, "rb"); 1087 if (f == NULL) { 1088 fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path); 1089 return 1; 1090 } 1091 1092 handle_input(f); 1093 fclose(f); 1094 1095 return 0; 1096 } 1097 1098 int main(int argc, char** argv) { 1099 #ifdef _WIN32 1100 setmode(fileno(stdin), O_BINARY); 1101 // ensure output lines end in LF instead of CRLF on windows 1102 setmode(fileno(stdout), O_BINARY); 1103 setmode(fileno(stderr), O_BINARY); 1104 #endif 1105 1106 if (argc > 1 && is_help_option(argv[1])) { 1107 printf("%s", info); 1108 return 0; 1109 } 1110 1111 // the dispatch table starts as all null function-pointers 1112 for (size_t i = 0; i < sizeof(dispatch) / sizeof(dispatch[0]); i++) { 1113 dispatch[i] = handle_invalid_token; 1114 } 1115 1116 for (size_t i = '0'; i <= '9'; i++) { 1117 dispatch[i] = handle_number; 1118 } 1119 1120 dispatch['n'] = handle_null; 1121 dispatch['t'] = handle_true; 1122 dispatch['f'] = handle_false; 1123 dispatch['N'] = handle_capital_none; 1124 dispatch['T'] = handle_capital_true; 1125 dispatch['F'] = handle_capital_false; 1126 dispatch['.'] = handle_dot; 1127 dispatch['+'] = handle_plus_number; 1128 dispatch['-'] = handle_minus_number; 1129 dispatch['"'] = handle_string; 1130 dispatch['\''] = handle_string; 1131 dispatch['['] = handle_array; 1132 dispatch['{'] = handle_object; 1133 1134 // enable full/block-buffering for standard output 1135 setvbuf(stdout, NULL, _IOFBF, 0); 1136 1137 return run(argc - 1, argv + 1) == 0 ? 0 : 1; 1138 }