/*
The MIT License (MIT)

Copyright © 2020-2025 pacman64

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the “Software”), to deal
in the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/

/*
You can build this command-line app by running

cc -Wall -s -O2 -o ./j0 ./j0.c
*/

#include <ctype.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#ifdef _WIN32
#include <windows.h>
#endif

// info is the message shown when this app is given any of its help options
const char* info = ""
"j0 [options...] [file...]\n"
"\n"
"\n"
"Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output.\n"
"Its output is always a single line, which ends with a line-feed.\n"
"\n"
"Besides minimizing bytes, this tool also adapts almost-JSON input into\n"
"valid JSON, since it\n"
"\n"
"    - ignores both rest-of-line and multi-line comments\n"
"    - ignores extra/trailing commas in arrays and objects\n"
"    - turns single-quoted strings/keys into double-quoted strings\n"
"    - double-quotes unquoted object keys\n"
"    - changes \\x 2-hex-digit into \\u 4-hex-digit string-escapes\n"
"\n"
"All options available can either start with a single or a double-dash\n"
"\n"
"    -h        show this help message\n"
"    -help     show this help message\n"
"    -jsonl    emit JSON Lines, when top-level value is an array\n"
"";

typedef struct j0_maker {
    FILE* in;
    unsigned char* ibuf;
    size_t ilen; // how many bytes are being used in the input buffer
    size_t icap; // the input buffer's capacity
    size_t ipos; // the current position in the input buffer

    FILE* out;

    size_t line; // the current line, used to show useful error messages
    size_t pos;  // the position in the current line, for error messages

    int current;
    int next;
} j0_maker;

// advance_reader_pos helps func read_byte do its job
void advance_reader_pos(j0_maker* r, unsigned char b) {
    r->ipos++;
    if (b == '\n') {
        r->line++;
        r->pos = 1;
    } else {
        r->pos++;
    }
}

// read_byte does as it says: check its return for the value EOF, before
// using it as the next byte
int read_byte(j0_maker* r) {
    if (r->ipos < r->ilen) {
        // inside current chunk
        const unsigned char b = r->ibuf[r->ipos];
        advance_reader_pos(r, b);
        return b;
    }

    // need to read the next block
    r->ipos = 0;
    r->ilen = fread(r->ibuf, sizeof(unsigned char), r->icap, r->in);
    if (r->ilen > 0) {
        const unsigned char b = r->ibuf[r->ipos];
        advance_reader_pos(r, b);
        return b;
    }

    // reached the end of data
    return EOF;
}

// advance is used in most of the code, instead of calling read_byte directly
void advance(j0_maker* r) {
    r->current = r->next;
    r->next = read_byte(r);
}

void fail(j0_maker* s, int code, const char* msg);

void skip_line(j0_maker* r) {
    while (true) {
        advance(r);
        if (r->current == EOF) {
            break;
        }

        if (r->current == '\n') {
            advance(r);
            break;
        }
    }
}

void skip_multiline_comment(j0_maker* r) {
    unsigned char prev = 0;

    while (true) {
        advance(r);

        if (r->current == EOF) {
            break;
        }

        if (prev == '*' && r->current == '/') {
            advance(r);
            break;
        }

        prev = (unsigned char)r->current;
    }
}

void skip_comment(j0_maker* r) {
    if (r->current != '/') {
        fail(r, 1, "expected a slash to start comments");
    }
    advance(r);

    if (r->current == '/') {
        skip_line(r);
        return;
    }

    if (r->current == '*') {
        skip_multiline_comment(r);
        return;
    }

    fail(r, 1, "expected `//` or `/*` to start comments");
}

void seek_token(j0_maker* r) {
    while (true) {
        if (r->current != EOF && r->current <= ' ') {
            advance(r);
            continue;
        }

        if (r->current == '/') {
            skip_comment(r);
            continue;
        }

        break;
    }
}

bool starts_with_bom(const unsigned char* b, const size_t n) {
    return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf);
}

void restart_state(j0_maker* s, FILE* w, FILE* r) {
    s->in = r;
    s->ilen = 0;
    s->ipos = 0;

    s->out = w;

    s->line = 1;
    s->pos = 1;

    s->current = EOF;
    s->next = EOF;

    s->current = read_byte(s);
    if (s->current == EOF) {
        return;
    }
    s->next = read_byte(s);

    // skip leading UTF-8 BOM (byte-order mark), if present
    if (starts_with_bom(s->ibuf, s->ilen)) {
        // a UTF-8 BOM has 3 bytes
        for (size_t i = 0; i < 3 && s->current != EOF; i++) {
            advance(s);
        }
    }
}

// write_bytes does as it says, minimizing the number of calls to fwrite
void write_bytes(j0_maker* w, const unsigned char* src, size_t len) {
    if (len > 0 && fwrite(src, len, 1, w->out) < 1) {
        if (feof(w->out)) {
            exit(0);
        }

        fail(w, 1, "failed to write more output");
    }
}

inline void write_byte(j0_maker* w, unsigned char b) {
    putc(b, w->out);
}

// debug is available to diagnose any bug found
void debug(j0_maker* s, const char* fmt, ...) {
    va_list args;
    va_start(args, fmt);

    if (s->in != stdin) {
        fclose(s->in);
    }

    write_byte(s, '\n');

    const unsigned long line = s->line;
    const unsigned long pos = s->pos;
    fprintf(stderr, "\x1b[46m\x1b[37mline %lu, pos %lu: ", line, pos);
    fprintf(stderr, fmt, args);
    fprintf(stderr, "\x1b[0m\n");

    va_end(args);

    exit(10);
}

// fail quits this app with the printf-style formatted error message given
void fail(j0_maker* s, int code, const char* msg) {
    if (s->in != stdin) {
        fclose(s->in);
    }

    write_byte(s, '\n');

    const unsigned long line = s->line;
    const unsigned long pos = s->pos;
    fprintf(stderr, "\x1b[31mline %lu, pos %lu: %s\x1b[0m\n", line, pos, msg);

    exit(code);
}

bool demand_keyword(j0_maker* s, char* rest) {
    for (; rest[0] != 0; rest++) {
        if (s->current == EOF || s->current != rest[0]) {
            return false;
        }
        advance(s);
    }

    return rest[0] == 0;
}

void handle_null(j0_maker* s) {
    if (!demand_keyword(s, "null")) {
        fail(s, 1, "expected `null` keyword");
    }
    write_bytes(s, (unsigned char*)"null", 4);
}

void handle_true(j0_maker* s) {
    if (!demand_keyword(s, "true")) {
        fail(s, 1, "expected `true` keyword");
    }
    write_bytes(s, (unsigned char*)"true", 4);
}

void handle_false(j0_maker* s) {
    if (!demand_keyword(s, "false")) {
        fail(s, 1, "expected `false` keyword");
    }
    write_bytes(s, (unsigned char*)"false", 5);
}

void handle_capital_none(j0_maker* s) {
    if (!demand_keyword(s, "None")) {
        fail(s, 1, "expected `None` keyword");
    }
    write_bytes(s, (unsigned char*)"null", 4);
}

void handle_capital_true(j0_maker* s) {
    if (!demand_keyword(s, "True")) {
        fail(s, 1, "expected `True` keyword");
    }
    write_bytes(s, (unsigned char*)"true", 4);
}

void handle_capital_false(j0_maker* s) {
    if (!demand_keyword(s, "False")) {
        fail(s, 1, "expected `False` keyword");
    }
    write_bytes(s, (unsigned char*)"false", 5);
}

void handle_digits(j0_maker* s) {
    if (!isdigit(s->current)) {
        fail(s, 1, "expected/missing digits");
    }

    while (isdigit(s->current)) {
        write_byte(s, s->current);
        advance(s);
    }
}

void handle_number(j0_maker* s) {
    handle_digits(s);

    if (s->current == '.') {
        write_byte(s, '.');
        advance(s);

        if (isdigit(s->current)) {
            handle_digits(s);
        } else {
            write_byte(s, '0');
        }
        return;
    }

    if (s->current == 'e' || s->current == 'E') {
        write_byte(s, s->current);
        advance(s);

        if (s->current == '+') {
            advance(s);
        } else if (s->current == '-') {
            write_byte(s, '-');
            advance(s);
        }

        handle_digits(s);
    }
}

void handle_dot(j0_maker* s) {
    write_byte(s, '0');
    write_byte(s, '.');
    advance(s);

    if (!isdigit(s->current)) {
        fail(s, 1, "expected/missing digits after decimal dot");
    }
    handle_digits(s);
}

void handle_plus_number(j0_maker* s) {
    advance(s);

    if (s->current == '.') {
        handle_dot(s);
        return;
    }
    handle_number(s);
}

void handle_minus_number(j0_maker* s) {
    write_byte(s, '-');
    advance(s);

    if (s->current == '.') {
        handle_dot(s);
        return;
    }
    handle_number(s);
}

void handle_string_escape(j0_maker* s, int c) {
    switch (c) {
        case '"':
        case '\\':
        case 'b':
        case 'f':
        case 'n':
        case 'r':
        case 't':
            write_byte(s, '\\');
            write_byte(s, c);
            break;

        case 'u':
            write_byte(s, '\\');
            write_byte(s, 'u');
            for (size_t i = 0; i < 4; i++) {
                advance(s);
                if (s->current == EOF) {
                    fail(s, 1, "end of input before end of string");
                }
                if (isdigit(s->current) || isalpha(s->current)) {
                    // write_byte(s, toupper(c));
                    write_byte(s, c);
                    continue;
                }
                fail(s, 1, "invalid hexadecimal digit in string");
            }
            break;

        case 'x':
            write_byte(s, '\\');
            write_byte(s, 'u');
            write_byte(s, '0');
            write_byte(s, '0');
            for (size_t i = 0; i < 2; i++) {
                advance(s);
                if (s->current == EOF) {
                    fail(s, 1, "end of input before end of string");
                }
                if (isdigit(s->current) || isalpha(s->current)) {
                    // write_byte(s, toupper(c));
                    write_byte(s, c);
                    continue;
                }
                fail(s, 1, "invalid hexadecimal digit in string");
            }
            break;

        case '\'':
            write_byte(s, '\'');
            break;

        default:
            write_byte(s, s->current);
            break;
    }
}

void handle_string(j0_maker* s) {
    const unsigned char quote = s->current;
    bool escaped = false;

    write_byte(s, '"');

    while (true) {
        advance(s);

        int c = s->current;
        if (c == EOF) {
            fail(s, 1, "input ended before string was close-quoted");
        }

        if (escaped) {
            handle_string_escape(s, c);
            escaped = false;
            continue;
        }

        switch (c) {
            case '\\':
                escaped = true;
                break;

            default:
                if (c == quote) {
                    write_byte(s, '"');
                    advance(s);
                    return;
                }

                write_byte(s, c);
                break;
        }
    }
}

void handle_token(j0_maker* s);

void handle_array(j0_maker* s) {
    size_t items_before = 0;
    write_byte(s, '[');
    advance(s);

    while (true) {
        seek_token(s);
        if (s->current == EOF) {
            fail(s, 1, "unclosed array");
        }

        if (s->current == ',') {
            advance(s);
            continue;
        }

        if (s->current == ']') {
            write_byte(s, ']');
            advance(s);
            return;
        }

        if (items_before > 0) {
            write_byte(s, ',');
        }
        handle_token(s);
        items_before++;
    }
}

// handle_array_jsonl is a slight variation of func handle_array: this one is
// used to handle top-level arrays when running in JSON Lines mode, to emit
// line-feeds after each item, instead of commas between them
void handle_array_jsonl(j0_maker* s) {
    size_t items_before = 0;
    advance(s);

    while (true) {
        seek_token(s);
        if (s->current == EOF) {
            fail(s, 1, "unclosed array");
        }

        if (s->current == ',') {
            advance(s);
            continue;
        }

        if (items_before > 0) {
            write_byte(s, '\n');
        }

        if (s->current == ']') {
            advance(s);
            return;
        }

        handle_token(s);
        items_before++;
    }
}

void handle_unquoted_key(j0_maker* s) {
    write_byte(s, '"');

    while (true) {
        int c = s->current;
        if (c == EOF) {
            fail(s, 1, "input ended with an object key");
        }

        write_byte(s, c);
        advance(s);

        c = s->current;
        if (!isalpha(c) && !isdigit(c) && c != '_') {
            break;
        }
    }

    write_byte(s, '"');
}

void handle_object(j0_maker* s) {
    size_t items_before = 0;
    write_byte(s, '{');
    advance(s);

    while (true) {
        seek_token(s);
        if (s->current == EOF) {
            fail(s, 1, "unclosed object");
        }

        if (s->current == ',') {
            advance(s);
            continue;
        }

        if (s->current == '}') {
            write_byte(s, '}');
            advance(s);
            return;
        }

        if (s->current == '"' || s->current == '\'') {
            if (items_before > 0) {
                write_byte(s, ',');
            }
            handle_string(s);
            items_before++;
        } else if (isalpha(s->current) || s->current == '_') {
            if (items_before > 0) {
                write_byte(s, ',');
            }
            handle_unquoted_key(s);
            items_before++;
        } else {
            fail(s, 1, "only strings or identifiers can be object keys");
        }

        seek_token(s);
        if (s->current == EOF) {
            fail(s, 1, "input ended after object-key and before value");
        }

        if (s->current != ':') {
            fail(s, 1, "a `:` must follow all object keys");
        }

        write_byte(s, ':');
        advance(s);

        seek_token(s);
        if (s->current == EOF) {
            fail(s, 1, "input ended after a `:` following an object-key");
        }

        handle_token(s);
    }
}

// dispatch ties leading bytes/chars in tokens to the funcs which handle them
void (*dispatch[256])() = {
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL,
};

void handle_token(j0_maker* s) {
    void (*fn)(j0_maker*) = NULL;

    // seek_token(s);
    // if (s->current == EOF) {
    //     fail(s, 1, "expected a token");
    // }

    fn = dispatch[s->current];
    if (fn != NULL) {
        fn(s);
    } else {
        unsigned char c = (unsigned char)s->current;
        fprintf(stderr, "%c\n", c);
        fail(s, 1, "invalid token");
    }
}

void handle_array_jsonl(j0_maker* s);

void handle_input(FILE* src, bool jsonl) {
    unsigned char ibuf[32 * 1024];

    j0_maker state;
    j0_maker* s = &state;
    s->ibuf = ibuf;
    s->icap = sizeof(ibuf);
    restart_state(s, stdout, src);

    // ignore leading whitespace/comment bytes, if present
    seek_token(s);

    if (s->current == EOF) {
        fail(s, 1, "empty input isn't valid JSON");
    }

    if (jsonl && s->current == '[') {
        handle_array_jsonl(s);
    } else {
        handle_token(s);
        write_byte(s, '\n');
    }

    // ignore trailing whitespace/comment bytes, if present
    seek_token(s);

    // ignore trailing semicolon, if present
    if (s->current == ';') {
        advance(s);
        // ignore trailing whitespace/comment bytes, if present
        seek_token(s);
    }

    if (!feof(src)) {
        fail(s, 1, "unexpected trailing JSON data");
    }
}

bool is_help_option(const char* s) {
    return (s[0] == '-' && s[1] != 0) && (
        strcmp(s, "-h") == 0 || strcmp(s, "--h") == 0 ||
        strcmp(s, "-help") == 0 || strcmp(s, "--help") == 0
    );
}

bool is_jsonl_option(const char* s) {
    return (s[0] == '-' && s[1] != 0) && (
        strcmp(s, "-jsonl") == 0 || strcmp(s, "--jsonl") == 0
    );
}

// run returns the error code
int run(int argc, char** argv) {
    bool jsonl = false;
    if (argc > 1 && is_jsonl_option(argv[1])) {
        jsonl = true;
        argc--;
        argv++;
    }

    if (argc > 2) {
        const char* msg = "can't use more than 1 named input";
        fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg);
        return 1;
    }

    // use stdin when not given a filepath, or is `-`
    if (argc < 2 || argv[1][0] == 0 || strcmp(argv[1], "-") == 0) {
        handle_input(stdin, jsonl);
        return 0;
    }

    const char* path = argv[1];
    FILE* f = fopen(path, "rb");
    if (f == NULL) {
        fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
        return 1;
    }

    handle_input(f, jsonl);
    fclose(f);

    return 0;
}

int main(int argc, char** argv) {
#ifdef _WIN32
    setmode(fileno(stdin), O_BINARY);
    // ensure output lines end in LF instead of CRLF on windows
    setmode(fileno(stdout), O_BINARY);
    setmode(fileno(stderr), O_BINARY);
#endif

    if (argc > 1 && is_help_option(argv[1])) {
        puts(info);
        return 0;
    }

    dispatch['0'] = handle_number;
    dispatch['1'] = handle_number;
    dispatch['2'] = handle_number;
    dispatch['3'] = handle_number;
    dispatch['4'] = handle_number;
    dispatch['5'] = handle_number;
    dispatch['6'] = handle_number;
    dispatch['7'] = handle_number;
    dispatch['8'] = handle_number;
    dispatch['9'] = handle_number;
    dispatch['n'] = handle_null;
    dispatch['t'] = handle_true;
    dispatch['f'] = handle_false;
    dispatch['N'] = handle_capital_none;
    dispatch['T'] = handle_capital_true;
    dispatch['F'] = handle_capital_false;
    dispatch['.'] = handle_dot;
    dispatch['+'] = handle_plus_number;
    dispatch['-'] = handle_minus_number;
    dispatch['"'] = handle_string;
    dispatch['\''] = handle_string;
    dispatch['['] = handle_array;
    dispatch['{'] = handle_object;

    return run(argc, argv) == 0 ? 0 : 1;
}