/* The MIT License (MIT) Copyright © 2024 pacman64 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* j0 [options...] [file...] Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output. Its output is always a single line, which ends with a line-feed. Besides minimizing bytes, this tool also adapts almost-JSON input into valid JSON, since it - ignores both rest-of-line and multi-line comments - ignores extra/trailing commas in arrays and objects - turns single-quoted strings/keys into double-quoted strings - double-quotes unquoted object keys - changes \x 2-hex-digit into \u 4-hex-digit string-escapes The only option available can either start with a single or a double-dash -h -help show this help message */ /* You can build this command-line app by running cc -Wall -s -O2 -o ./j0 ./j0.c */ #include #include #include #include #include #include #include #include #ifdef _WIN32 #include #endif // info is the message shown when this app is given any of its help options const char* info = "" "j0 [options...] [file...]\n" "\n" "\n" "Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output.\n" "Its output is always a single line, which ends with a line-feed.\n" "\n" "Besides minimizing bytes, this tool also adapts almost-JSON input into\n" "valid JSON, since it\n" "\n" " - ignores both rest-of-line and multi-line comments\n" " - ignores extra/trailing commas in arrays and objects\n" " - turns single-quoted strings/keys into double-quoted strings\n" " - double-quotes unquoted object keys\n" " - changes \\x 2-hex-digit into \\u 4-hex-digit string-escapes\n" "\n" "The only option available can either start with a single or a double-dash\n" "\n" " -h -help show this help message\n" ""; typedef struct j0_maker { FILE* in; unsigned char* ibuf; size_t ilen; // how many bytes are being used in the input buffer size_t icap; // the input buffer's capacity size_t ipos; // the current position in the input buffer FILE* out; unsigned char* obuf; size_t olen; // how many bytes are being used in the output buffer size_t ocap; // the output buffer's capacity size_t opos; // the current position in the output buffer size_t line; // the current line, used to show useful error messages size_t pos; // the position in the current line, for error messages int current; int next; } j0_maker; void advance_reader_pos(j0_maker* r, unsigned char b) { r->ipos++; if (b == '\n') { r->line++; r->pos = 1; } else { r->pos++; } } // read_byte does as it says: check its return for the value EOF, before // using it as the next byte int read_byte(j0_maker* r) { if (r->ipos < r->ilen) { // inside current chunk const unsigned char b = r->ibuf[r->ipos]; advance_reader_pos(r, b); return b; } // need to read the next block r->ipos = 0; r->ilen = fread(r->ibuf, sizeof(unsigned char), r->icap, r->in); if (r->ilen > 0) { const unsigned char b = r->ibuf[r->ipos]; advance_reader_pos(r, b); return b; } // reached the end of data return EOF; } void advance(j0_maker* r) { r->current = r->next; r->next = read_byte(r); } void fail(j0_maker* s, size_t code, const char* fmt, ...); void skip_line(j0_maker* r) { while (true) { advance(r); if (r->current == EOF) { break; } if (r->current == '\n') { advance(r); break; } } } void skip_multiline_comment(j0_maker* r) { unsigned char prev = 0; while (true) { advance(r); if (r->current == EOF) { break; } if (prev == '*' && r->current == '/') { advance(r); break; } prev = (unsigned char)r->current; } } void skip_comment(j0_maker* r) { if (r->current != '/') { fail(r, 1, "expected a slash to start comments"); } advance(r); if (r->current == '/') { skip_line(r); return; } if (r->current == '*') { skip_multiline_comment(r); return; } fail(r, 1, "expected `//` or `/*` to start comments"); } void seek_token(j0_maker* r) { while (true) { if (r->current != EOF && r->current <= 32) { advance(r); continue; } if (r->current == '/') { skip_comment(r); continue; } break; } } bool starts_with_bom(const unsigned char* b, const size_t n) { return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf); } void restart_state(j0_maker* s, FILE* w, FILE* r) { s->in = r; s->ilen = 0; s->ipos = 0; s->out = w; s->olen = 0; s->line = 1; s->pos = 1; s->current = EOF; s->next = EOF; s->current = read_byte(s); if (s->current == EOF) { return; } s->next = read_byte(s); // skip leading UTF-8 BOM (byte-order mark), if present if (starts_with_bom(s->ibuf, s->ilen)) { // a UTF-8 BOM has 3 bytes for (size_t i = 0; i < 3 && s->current != EOF; i++) { advance(s); } } } // flush does as it says: it empties the buffer after ensuring its bytes end // on their intended destination void flush(j0_maker* w) { if (w->olen > 0 && fwrite(w->obuf, w->olen, 1, w->out) < 1) { exit(0); } w->olen = 0; } // write_bytes does as it says, minimizing the number of calls to fwrite void write_bytes(j0_maker* w, const unsigned char* src, size_t len) { if (w->olen + len < w->ocap) { // all bytes fit into buffer memcpy(w->obuf + w->olen, src, len); w->olen += len; return; } // ensure current buffer bytes go out, before crossing strides flush(w); // emit all chunks striding beyond/at the buffer's capacity for (; len >= w->ocap; src += w->ocap, len -= w->ocap) { if (fwrite(src, w->ocap, 1, w->out) < 1) { if (feof(w->out)) { exit(0); } return; } } // now all, if any, remaining bytes will fit into the buffer memcpy(w->obuf, src, len); w->olen += len; } // write_byte does as it says void write_byte(j0_maker* w, unsigned char b) { if (w->olen >= w->ocap) { flush(w); } w->obuf[w->olen] = b; w->olen++; } void debug(j0_maker* s, const char* fmt, ...) { va_list args; va_start(args, fmt); flush(s); fflush(s->out); if (s->in != stdin) { fclose(s->in); } write_byte(s, '\n'); flush(s); fflush(stdout); fprintf(stderr, "\x1b[46m\x1b[37mline %lu, pos %lu: ", s->line, s->pos); fprintf(stderr, fmt, args); fprintf(stderr, "\x1b[0m\n"); va_end(args); exit(10); } void fail(j0_maker* s, size_t code, const char* fmt, ...) { va_list args; va_start(args, fmt); if (s->in != stdin) { fclose(s->in); } write_byte(s, '\n'); flush(s); fflush(stdout); fprintf(stderr, "\x1b[31mline %lu, pos %lu: ", s->line, s->pos); fprintf(stderr, fmt, args); fprintf(stderr, "\x1b[0m\n"); va_end(args); exit(code); } bool demand_keyword(j0_maker* s, char* rest) { for (; rest[0] != 0; rest++) { if (s->current == EOF || s->current != rest[0]) { return false; } advance(s); } return rest[0] == 0; } void handle_null(j0_maker* s) { if (!demand_keyword(s, "null")) { fail(s, 1, "expected null keyword"); } write_bytes(s, (unsigned char*)"null", 4); } void handle_true(j0_maker* s) { if (!demand_keyword(s, "true")) { fail(s, 1, "expected `true` keyword"); } write_bytes(s, (unsigned char*)"true", 4); } void handle_false(j0_maker* s) { if (!demand_keyword(s, "false")) { fail(s, 1, "expected `false` keyword"); } write_bytes(s, (unsigned char*)"false", 5); } void handle_digits(j0_maker* s) { if (!isdigit(s->current)) { fail(s, 1, "expected/missing digits"); } while (isdigit(s->current)) { write_byte(s, s->current); flush(s); fflush(stdout); advance(s); } } void handle_number(j0_maker* s) { handle_digits(s); if (s->current == '.') { write_byte(s, '.'); advance(s); if (isdigit(s->current)) { handle_digits(s); } else { write_byte(s, '0'); } return; } if (s->current == 'e' || s->current == 'E') { if (s->current == '+') { advance(s); } else if (s->current == '-') { write_byte(s, '-'); advance(s); } handle_digits(s); } } void handle_dot(j0_maker* s) { write_byte(s, '0'); write_byte(s, '.'); advance(s); if (!isdigit(s->current)) { fail(s, 1, "expected/missing digits after decimal dot"); } handle_digits(s); } void handle_plus_number(j0_maker* s) { advance(s); if (s->current == '.') { handle_dot(s); return; } handle_number(s); } void handle_minus_number(j0_maker* s) { write_byte(s, '-'); advance(s); if (s->current == '.') { handle_dot(s); return; } handle_number(s); } void handle_string_escape(j0_maker* s, int c) { switch (c) { case '"': case '\\': case 'b': case 'f': case 'n': case 'r': case 't': write_byte(s, '\\'); write_byte(s, c); break; case 'u': write_byte(s, '\\'); write_byte(s, 'u'); for (size_t i = 0; i < 4; i++) { advance(s); if (s->current == EOF) { fail(s, 1, "end of input before end of string"); } if (isdigit(s->current) || isalpha(s->current)) { // write_byte(s, toupper(c)); write_byte(s, c); continue; } fail(s, 1, "invalid hexadecimal digit in string"); } break; case 'x': write_byte(s, '\\'); write_byte(s, 'u'); write_byte(s, '0'); write_byte(s, '0'); for (size_t i = 0; i < 2; i++) { advance(s); if (s->current == EOF) { fail(s, 1, "end of input before end of string"); } if (isdigit(s->current) || isalpha(s->current)) { // write_byte(s, toupper(c)); write_byte(s, c); continue; } fail(s, 1, "invalid hexadecimal digit in string"); } break; case '\'': write_byte(s, '\''); break; default: write_byte(s, s->current); break; } } void handle_string(j0_maker* s) { const unsigned char quote = s->current; bool escaped = false; write_byte(s, '"'); while (true) { advance(s); int c = s->current; if (c == EOF) { fail(s, 1, "input ended before string was close-quoted"); } if (escaped) { handle_string_escape(s, c); escaped = false; continue; } switch (c) { case '\\': escaped = true; break; default: if (c == quote) { write_byte(s, '"'); advance(s); return; } write_byte(s, c); break; } } } void handle_token(j0_maker* s); void handle_array(j0_maker* s) { size_t items_before = 0; write_byte(s, '['); advance(s); while (true) { seek_token(s); if (s->current == EOF) { fail(s, 1, "unclosed array"); } if (s->current == ',') { advance(s); continue; } if (s->current == ']') { write_byte(s, ']'); advance(s); return; } if (items_before > 0) { write_byte(s, ','); } handle_token(s); items_before++; } } void handle_unquoted_key(j0_maker* s) { write_byte(s, '"'); while (true) { int c = s->current; if (c == EOF) { fail(s, 1, "input ended with an object key"); } write_byte(s, c); advance(s); c = s->current; if (!isalpha(c) && !isdigit(c) && c != '_') { break; } } write_byte(s, '"'); } void handle_object(j0_maker* s) { size_t items_before = 0; write_byte(s, '{'); advance(s); while (true) { seek_token(s); if (s->current == EOF) { fail(s, 1, "unclosed object"); } if (s->current == ',') { advance(s); continue; } if (s->current == '}') { write_byte(s, '}'); advance(s); return; } if (s->current == '"' || s->current == '\'') { if (items_before > 0) { write_byte(s, ','); } handle_string(s); items_before++; } else if (isalpha(s->current) || s->current == '_') { if (items_before > 0) { write_byte(s, ','); } handle_unquoted_key(s); items_before++; } else { fail(s, 1, "only strings or identifiers can be object keys"); } seek_token(s); if (s->current == EOF) { fail(s, 1, "input ended after object-key and before value"); } if (s->current != ':') { fail(s, 1, "a `:` must follow all object keys"); } write_byte(s, ':'); advance(s); seek_token(s); if (s->current == EOF) { fail(s, 1, "input ended after a `:` following an object-key"); } handle_token(s); } } void (*dispatch[256])() = {}; void handle_token(j0_maker* s) { void (*fn)(j0_maker*) = NULL; seek_token(s); if (s->current == EOF) { fail(s, 1, "expected a token"); } fn = dispatch[s->current]; if (fn != NULL) { fn(s); } else { unsigned char c = (unsigned char)s->current; fprintf(stderr, "%c\n", c); fail(s, 1, "invalid token"); } } void handle_input(FILE* src) { unsigned char ibuf[48 * 1024]; unsigned char obuf[48 * 1024]; j0_maker state; j0_maker* s = &state; s->ibuf = ibuf; s->icap = sizeof(ibuf); s->obuf = obuf; s->ocap = sizeof(obuf); restart_state(s, stdout, src); if (s->current == EOF) { fail(s, 1, "empty input isn't valid JSON"); } handle_token(s); seek_token(s); if (!feof(src)) { fail(s, 1, "unexpected trailing JSON data"); } write_byte(s, '\n'); flush(s); // fflush(stdout); } // run returns the error code size_t run(int argc, char** argv) { if (argc > 2) { const char* msg = "can't use more than 1 named input"; fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg); return 1; } const char* fname = argv[1]; // use stdin when not given a filepath, when the path is empty, or is `-` if (argc <= 1 || fname[0] == 0 || (fname[0] == '-' && fname[1] == 0)) { handle_input(stdin); return 0; } FILE* f = fopen(fname, "rb"); if (f == NULL) { fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", fname); return 1; } handle_input(f); fclose(f); return 0; } void init_dispatch() { for (size_t i = 0; i < 256; i++) { dispatch[i] = NULL; } for (size_t i = '0'; i <= '9'; i++) { dispatch[i] = handle_number; } dispatch['n'] = handle_null; dispatch['t'] = handle_true; dispatch['f'] = handle_false; dispatch['.'] = handle_dot; dispatch['+'] = handle_plus_number; dispatch['-'] = handle_minus_number; dispatch['"'] = handle_string; dispatch['\''] = handle_string; dispatch['['] = handle_array; dispatch['{'] = handle_object; } int main(int argc, char** argv) { #ifdef _WIN32 setmode(fileno(stdin), O_BINARY); // ensure output lines end in LF instead of CRLF on windows setmode(fileno(stdout), O_BINARY); setmode(fileno(stderr), O_BINARY); #endif // handle any of the help options, if given if (argc > 1 && argv[1][0] == '-') { const char* s = argv[1] + (argv[1][1] == '-' ? 2 : 1); if (strcmp(s, "h") == 0 || strcmp(s, "help") == 0) { puts(info); return 0; } } // disable automatic stdio buffering, in favor of explicit buffering setvbuf(stdin, NULL, _IONBF, 0); setvbuf(stdout, NULL, _IONBF, 0); setvbuf(stderr, NULL, _IONBF, 0); init_dispatch(); return run(argc, argv) == 0 ? 0 : 1; }