/* The MIT License (MIT) Copyright © 2020-2025 pacman64 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* You can build this command-line app by running cc -Wall -s -O2 -o ./j0 ./j0.c */ #include #include #include #include #include #include #include #include #ifdef _WIN32 #include #endif // info is the message shown when this app is given any of its help options const char* info = "" "j0 [options...] [file...]\n" "\n" "\n" "Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output.\n" "Its output is always a single line, which ends with a line-feed.\n" "\n" "Besides minimizing bytes, this tool also adapts almost-JSON input into\n" "valid JSON, since it\n" "\n" " - ignores both rest-of-line and multi-line comments\n" " - ignores extra/trailing commas in arrays and objects\n" " - turns single-quoted strings/keys into double-quoted strings\n" " - double-quotes unquoted object keys\n" " - changes \\x 2-hex-digit into \\u 4-hex-digit string-escapes\n" "\n" "All options available can either start with a single or a double-dash\n" "\n" " -h show this help message\n" " -help show this help message\n" " -jsonl emit JSON Lines, when top-level value is an array\n" ""; typedef struct j0_maker { FILE* in; unsigned char* ibuf; size_t ilen; // how many bytes are being used in the input buffer size_t icap; // the input buffer's capacity size_t ipos; // the current position in the input buffer FILE* out; size_t line; // the current line, used to show useful error messages size_t pos; // the position in the current line, for error messages int current; int next; } j0_maker; // advance_reader_pos helps func read_byte do its job void advance_reader_pos(j0_maker* r, unsigned char b) { r->ipos++; if (b == '\n') { r->line++; r->pos = 1; } else { r->pos++; } } // read_byte does as it says: check its return for the value EOF, before // using it as the next byte int read_byte(j0_maker* r) { if (r->ipos < r->ilen) { // inside current chunk const unsigned char b = r->ibuf[r->ipos]; advance_reader_pos(r, b); return b; } // need to read the next block r->ipos = 0; r->ilen = fread(r->ibuf, sizeof(unsigned char), r->icap, r->in); if (r->ilen > 0) { const unsigned char b = r->ibuf[r->ipos]; advance_reader_pos(r, b); return b; } // reached the end of data return EOF; } // advance is used in most of the code, instead of calling read_byte directly void advance(j0_maker* r) { r->current = r->next; r->next = read_byte(r); } void fail(j0_maker* s, int code, const char* msg); void skip_line(j0_maker* r) { while (true) { advance(r); if (r->current == EOF) { break; } if (r->current == '\n') { advance(r); break; } } } void skip_multiline_comment(j0_maker* r) { unsigned char prev = 0; while (true) { advance(r); if (r->current == EOF) { break; } if (prev == '*' && r->current == '/') { advance(r); break; } prev = (unsigned char)r->current; } } void skip_comment(j0_maker* r) { if (r->current != '/') { fail(r, 1, "expected a slash to start comments"); } advance(r); if (r->current == '/') { skip_line(r); return; } if (r->current == '*') { skip_multiline_comment(r); return; } fail(r, 1, "expected `//` or `/*` to start comments"); } void seek_token(j0_maker* r) { while (true) { if (r->current != EOF && r->current <= ' ') { advance(r); continue; } if (r->current == '/') { skip_comment(r); continue; } break; } } bool starts_with_bom(const unsigned char* b, const size_t n) { return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf); } void restart_state(j0_maker* s, FILE* w, FILE* r) { s->in = r; s->ilen = 0; s->ipos = 0; s->out = w; s->line = 1; s->pos = 1; s->current = EOF; s->next = EOF; s->current = read_byte(s); if (s->current == EOF) { return; } s->next = read_byte(s); // skip leading UTF-8 BOM (byte-order mark), if present if (starts_with_bom(s->ibuf, s->ilen)) { // a UTF-8 BOM has 3 bytes for (size_t i = 0; i < 3 && s->current != EOF; i++) { advance(s); } } } // write_bytes does as it says, minimizing the number of calls to fwrite void write_bytes(j0_maker* w, const unsigned char* src, size_t len) { if (len > 0 && fwrite(src, len, 1, w->out) < 1) { if (feof(w->out)) { exit(0); } fail(w, 1, "failed to write more output"); } } inline void write_byte(j0_maker* w, unsigned char b) { putc(b, w->out); } // debug is available to diagnose any bug found void debug(j0_maker* s, const char* fmt, ...) { va_list args; va_start(args, fmt); if (s->in != stdin) { fclose(s->in); } write_byte(s, '\n'); const unsigned long line = s->line; const unsigned long pos = s->pos; fprintf(stderr, "\x1b[46m\x1b[37mline %lu, pos %lu: ", line, pos); fprintf(stderr, fmt, args); fprintf(stderr, "\x1b[0m\n"); va_end(args); exit(10); } // fail quits this app with the printf-style formatted error message given void fail(j0_maker* s, int code, const char* msg) { if (s->in != stdin) { fclose(s->in); } write_byte(s, '\n'); const unsigned long line = s->line; const unsigned long pos = s->pos; fprintf(stderr, "\x1b[31mline %lu, pos %lu: %s\x1b[0m\n", line, pos, msg); exit(code); } bool demand_keyword(j0_maker* s, char* rest) { for (; rest[0] != 0; rest++) { if (s->current == EOF || s->current != rest[0]) { return false; } advance(s); } return rest[0] == 0; } void handle_null(j0_maker* s) { if (!demand_keyword(s, "null")) { fail(s, 1, "expected `null` keyword"); } write_bytes(s, (unsigned char*)"null", 4); } void handle_true(j0_maker* s) { if (!demand_keyword(s, "true")) { fail(s, 1, "expected `true` keyword"); } write_bytes(s, (unsigned char*)"true", 4); } void handle_false(j0_maker* s) { if (!demand_keyword(s, "false")) { fail(s, 1, "expected `false` keyword"); } write_bytes(s, (unsigned char*)"false", 5); } void handle_capital_none(j0_maker* s) { if (!demand_keyword(s, "None")) { fail(s, 1, "expected `None` keyword"); } write_bytes(s, (unsigned char*)"null", 4); } void handle_capital_true(j0_maker* s) { if (!demand_keyword(s, "True")) { fail(s, 1, "expected `True` keyword"); } write_bytes(s, (unsigned char*)"true", 4); } void handle_capital_false(j0_maker* s) { if (!demand_keyword(s, "False")) { fail(s, 1, "expected `False` keyword"); } write_bytes(s, (unsigned char*)"false", 5); } void handle_digits(j0_maker* s) { if (!isdigit(s->current)) { fail(s, 1, "expected/missing digits"); } while (isdigit(s->current)) { write_byte(s, s->current); advance(s); } } void handle_number(j0_maker* s) { handle_digits(s); if (s->current == '.') { write_byte(s, '.'); advance(s); if (isdigit(s->current)) { handle_digits(s); } else { write_byte(s, '0'); } return; } if (s->current == 'e' || s->current == 'E') { write_byte(s, s->current); advance(s); if (s->current == '+') { advance(s); } else if (s->current == '-') { write_byte(s, '-'); advance(s); } handle_digits(s); } } void handle_dot(j0_maker* s) { write_byte(s, '0'); write_byte(s, '.'); advance(s); if (!isdigit(s->current)) { fail(s, 1, "expected/missing digits after decimal dot"); } handle_digits(s); } void handle_plus_number(j0_maker* s) { advance(s); if (s->current == '.') { handle_dot(s); return; } handle_number(s); } void handle_minus_number(j0_maker* s) { write_byte(s, '-'); advance(s); if (s->current == '.') { handle_dot(s); return; } handle_number(s); } void handle_string_escape(j0_maker* s, int c) { switch (c) { case '"': case '\\': case 'b': case 'f': case 'n': case 'r': case 't': write_byte(s, '\\'); write_byte(s, c); break; case 'u': write_byte(s, '\\'); write_byte(s, 'u'); for (size_t i = 0; i < 4; i++) { advance(s); if (s->current == EOF) { fail(s, 1, "end of input before end of string"); } if (isdigit(s->current) || isalpha(s->current)) { // write_byte(s, toupper(c)); write_byte(s, c); continue; } fail(s, 1, "invalid hexadecimal digit in string"); } break; case 'x': write_byte(s, '\\'); write_byte(s, 'u'); write_byte(s, '0'); write_byte(s, '0'); for (size_t i = 0; i < 2; i++) { advance(s); if (s->current == EOF) { fail(s, 1, "end of input before end of string"); } if (isdigit(s->current) || isalpha(s->current)) { // write_byte(s, toupper(c)); write_byte(s, c); continue; } fail(s, 1, "invalid hexadecimal digit in string"); } break; case '\'': write_byte(s, '\''); break; default: write_byte(s, s->current); break; } } void handle_string(j0_maker* s) { const unsigned char quote = s->current; bool escaped = false; write_byte(s, '"'); while (true) { advance(s); int c = s->current; if (c == EOF) { fail(s, 1, "input ended before string was close-quoted"); } if (escaped) { handle_string_escape(s, c); escaped = false; continue; } switch (c) { case '\\': escaped = true; break; default: if (c == quote) { write_byte(s, '"'); advance(s); return; } write_byte(s, c); break; } } } void handle_token(j0_maker* s); void handle_array(j0_maker* s) { size_t items_before = 0; write_byte(s, '['); advance(s); while (true) { seek_token(s); if (s->current == EOF) { fail(s, 1, "unclosed array"); } if (s->current == ',') { advance(s); continue; } if (s->current == ']') { write_byte(s, ']'); advance(s); return; } if (items_before > 0) { write_byte(s, ','); } handle_token(s); items_before++; } } // handle_array_jsonl is a slight variation of func handle_array: this one is // used to handle top-level arrays when running in JSON Lines mode, to emit // line-feeds after each item, instead of commas between them void handle_array_jsonl(j0_maker* s) { size_t items_before = 0; advance(s); while (true) { seek_token(s); if (s->current == EOF) { fail(s, 1, "unclosed array"); } if (s->current == ',') { advance(s); continue; } if (items_before > 0) { write_byte(s, '\n'); } if (s->current == ']') { advance(s); return; } handle_token(s); items_before++; } } void handle_unquoted_key(j0_maker* s) { write_byte(s, '"'); while (true) { int c = s->current; if (c == EOF) { fail(s, 1, "input ended with an object key"); } write_byte(s, c); advance(s); c = s->current; if (!isalpha(c) && !isdigit(c) && c != '_') { break; } } write_byte(s, '"'); } void handle_object(j0_maker* s) { size_t items_before = 0; write_byte(s, '{'); advance(s); while (true) { seek_token(s); if (s->current == EOF) { fail(s, 1, "unclosed object"); } if (s->current == ',') { advance(s); continue; } if (s->current == '}') { write_byte(s, '}'); advance(s); return; } if (s->current == '"' || s->current == '\'') { if (items_before > 0) { write_byte(s, ','); } handle_string(s); items_before++; } else if (isalpha(s->current) || s->current == '_') { if (items_before > 0) { write_byte(s, ','); } handle_unquoted_key(s); items_before++; } else { fail(s, 1, "only strings or identifiers can be object keys"); } seek_token(s); if (s->current == EOF) { fail(s, 1, "input ended after object-key and before value"); } if (s->current != ':') { fail(s, 1, "a `:` must follow all object keys"); } write_byte(s, ':'); advance(s); seek_token(s); if (s->current == EOF) { fail(s, 1, "input ended after a `:` following an object-key"); } handle_token(s); } } // dispatch ties leading bytes/chars in tokens to the funcs which handle them void (*dispatch[256])() = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; void handle_token(j0_maker* s) { void (*fn)(j0_maker*) = NULL; // seek_token(s); // if (s->current == EOF) { // fail(s, 1, "expected a token"); // } fn = dispatch[s->current]; if (fn != NULL) { fn(s); } else { unsigned char c = (unsigned char)s->current; fprintf(stderr, "%c\n", c); fail(s, 1, "invalid token"); } } void handle_array_jsonl(j0_maker* s); void handle_input(FILE* src, bool jsonl) { unsigned char ibuf[32 * 1024]; j0_maker state; j0_maker* s = &state; s->ibuf = ibuf; s->icap = sizeof(ibuf); restart_state(s, stdout, src); // ignore leading whitespace/comment bytes, if present seek_token(s); if (s->current == EOF) { fail(s, 1, "empty input isn't valid JSON"); } if (jsonl && s->current == '[') { handle_array_jsonl(s); } else { handle_token(s); write_byte(s, '\n'); } // ignore trailing whitespace/comment bytes, if present seek_token(s); // ignore trailing semicolon, if present if (s->current == ';') { advance(s); // ignore trailing whitespace/comment bytes, if present seek_token(s); } if (!feof(src)) { fail(s, 1, "unexpected trailing JSON data"); } } bool is_help_option(const char* s) { return (s[0] == '-' && s[1] != 0) && ( strcmp(s, "-h") == 0 || strcmp(s, "--h") == 0 || strcmp(s, "-help") == 0 || strcmp(s, "--help") == 0 ); } bool is_jsonl_option(const char* s) { return (s[0] == '-' && s[1] != 0) && ( strcmp(s, "-jsonl") == 0 || strcmp(s, "--jsonl") == 0 ); } // run returns the error code int run(int argc, char** argv) { bool jsonl = false; if (argc > 1 && is_jsonl_option(argv[1])) { jsonl = true; argc--; argv++; } if (argc > 2) { const char* msg = "can't use more than 1 named input"; fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg); return 1; } // use stdin when not given a filepath, or is `-` if (argc < 2 || argv[1][0] == 0 || strcmp(argv[1], "-") == 0) { handle_input(stdin, jsonl); return 0; } const char* path = argv[1]; FILE* f = fopen(path, "rb"); if (f == NULL) { fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); return 1; } handle_input(f, jsonl); fclose(f); return 0; } int main(int argc, char** argv) { #ifdef _WIN32 setmode(fileno(stdin), O_BINARY); // ensure output lines end in LF instead of CRLF on windows setmode(fileno(stdout), O_BINARY); setmode(fileno(stderr), O_BINARY); #endif if (argc > 1 && is_help_option(argv[1])) { puts(info); return 0; } dispatch['0'] = handle_number; dispatch['1'] = handle_number; dispatch['2'] = handle_number; dispatch['3'] = handle_number; dispatch['4'] = handle_number; dispatch['5'] = handle_number; dispatch['6'] = handle_number; dispatch['7'] = handle_number; dispatch['8'] = handle_number; dispatch['9'] = handle_number; dispatch['n'] = handle_null; dispatch['t'] = handle_true; dispatch['f'] = handle_false; dispatch['N'] = handle_capital_none; dispatch['T'] = handle_capital_true; dispatch['F'] = handle_capital_false; dispatch['.'] = handle_dot; dispatch['+'] = handle_plus_number; dispatch['-'] = handle_minus_number; dispatch['"'] = handle_string; dispatch['\''] = handle_string; dispatch['['] = handle_array; dispatch['{'] = handle_object; return run(argc, argv) == 0 ? 0 : 1; }