/*
The MIT License (MIT)

Copyright © 2024 pacman64

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the “Software”), to deal
in the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/

/*
You can build this command-line app by running
    cc -Wall -s -O2 -o ./nn ./nn.c
*/

#include <fcntl.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// info is the message shown when this app is given any of its help options
const char* info =
    ""
    "nn [options...] [filepaths...]\n"
    "\n"
    "\n"
    "Nice Numbers is an app which renders the plain text it's given to make long\n"
    "numbers much easier to read, by alternating 3-digit groups which are colored\n"
    "using ANSI-codes with unstyled ones.\n"
    "\n"
    "Unlike the common practice of inserting commas between 3-digit groups, this\n"
    "alternative doesn't widen the original text, keeping any alignments the same.\n"
    "\n"
    "All input is assumed to be UTF-8. When not given any filepaths, input is read\n"
    "from the standard input.\n"
    "\n"
    "\n"
    "Options, all of which can start with either 1 or 2 dashes:\n"
    "\n"
    "\n"
    "  -blue     use a blue-like color to alternate-style runs of digits\n"
    "  -bold     use a bold style/effect to alternate-style runs of digits\n"
    "  -gray     use a gray color to alternate-style runs of digits\n"
    "  -green    use a green color to alternate-style runs of digits\n"
    "  -inverse  invert/swap colors to alternate-style runs of digits\n"
    "  -orange   use an orange color to alternate-style runs of digits\n"
    "  -purple   use a purple color to alternate-style runs of digits\n"
    "  -red      use a red color to alternate-style runs of digits\n"
    "\n"
    "  -h          show this help message\n"
    "  -help       show this help message\n"
    "\n"
    "  -highlight  same as option -inverse\n"
    "  -hilite     same as option -inverse\n"
    "";

// slice is a growable region of bytes in memory
typedef struct slice {
    // ptr is the starting place of the region
    unsigned char* ptr;

    // len is how many bytes are currently being used
    size_t len;

    // cap is how many bytes the memory region has available
    size_t cap;
} slice;

// new_slice is the constructor for type slice
slice new_slice(size_t cap) {
    slice res;
    res.cap = cap;
    res.len = 0;
    res.ptr = malloc(res.cap);
    return res;
}

// advance updates a slice so it starts after the number of bytes given
inline void advance(slice* src, size_t n) {
    src->ptr += n;
    src->len -= n;
}

// first creates a slice ending at the number of bytes given
slice first(slice src, size_t n) {
    src.len = n;
    return src;
}

// append_byte does as it says, potentially reallocating the memory area
// backing the slice given
void append_byte(slice* s, unsigned char b) {
    if (s->len < s->cap) {
        // under capacity, so it's ok to append directly
        s->ptr[s->len] = b;
        s->len++;
        return;
    }

    // slice is full, so double it and reallocate
    s->cap *= 2;
    s->ptr = realloc(s->ptr, s->cap);

    // now append directly to the larger array
    s->ptr[s->len] = b;
    s->len++;
}

// find_lf returns the index of the first line-feed found, or a negative value
// on failure
long long int find_lf(slice s) {
    for (size_t i = 0; i < s.len; i++) {
        if (s.ptr[i] == '\n') {
            return i;
        }
    }
    return -1;
}

// find_digit returns the index of the first digit found, or a negative value
// on failure
long long int find_digit(slice s) {
    for (size_t i = 0; i < s.len; i++) {
        const unsigned char b = s.ptr[i];
        if ('0' <= b && b <= '9') {
            return i;
        }
    }
    return -1;
}

// find_non_digit returns the index of the first non-digit found, or a negative
// value on failure
long long int find_non_digit(slice s) {
    for (size_t i = 0; i < s.len; i++) {
        const unsigned char b = s.ptr[i];
        if (b < '0' || b > '9') {
            return i;
        }
    }
    return -1;
}

const unsigned char reset_style[] = "\x1b[0m";

// bufreader is a way to speed up reading data by reducing the frequency of
// data reads from the a data source, while still allowing reading 1 byte at
// a time
typedef struct bufreader {
    // buf is the buffer, (re)filled periodically as needed
    unsigned char* buf;

    // len is how many buffer bytes are being used, out of its max capacity
    size_t len;

    // cap is the buffer's capacity, or the most bytes it can hold at once
    size_t cap;

    // pos is the current position, up to the current buffer length
    size_t pos;

    // src is the data source used to fill the buffer
    FILE* src;
} bufreader;

// new_bufreader is the constructor for type bufreader
bufreader new_bufreader(FILE* src, size_t cap) {
    bufreader res;
    res.cap = cap;
    res.len = 0;
    res.pos = 0;
    res.src = src;
    res.buf = malloc(res.cap);
    return res;
}

// close_bufreader deallocates the buffer
void close_bufreader(bufreader* r) {
    free(r->buf);
    r->buf = NULL;
    r->len = 0;
}

// read_byte does as it says: check its return for the value EOF, before
// using it as the next byte
int read_byte(bufreader* r) {
    if (r->pos < r->len) {
        // inside current chunk
        const unsigned char b = r->buf[r->pos];
        r->pos++;
        return b;
    }

    // need to read the next block
    r->pos = 0;
    r->len = fread(r->buf, sizeof(unsigned char), r->cap, r->src);
    if (r->len > 0) {
        const unsigned char b = r->buf[r->pos];
        r->pos++;
        return b;
    }

    // reached the end of data
    return EOF;
}

// bufwriter is, as the name implies, a buffered-writer: when it's aimed at
// stdout, it considerably speeds up this app, as intended
typedef struct bufwriter {
    // buf is the buffer proper
    unsigned char* buf;

    // len is how many bytes of the buffer are currently being used
    size_t len;

    // cap is the capacity of the buffer, or the most bytes it can hold
    size_t cap;

    // out is the destination of all that's written into the buffer
    FILE* out;

    // done signals when/if no more output is accepted at the destination
    bool done;
} bufwriter;

// new_bufwriter is the constructor for type bufwriter
bufwriter new_bufwriter(FILE* dst, size_t cap) {
    bufwriter res;
    res.cap = cap;
    res.done = false;
    res.len = 0;
    res.out = dst;
    res.buf = malloc(res.cap);
    return res;
}

// flush does as it says: it empties the buffer after ensuring its bytes end
// on their intended destination
void flush(bufwriter* w) {
    if (w->len > 0 && fwrite(w->buf, w->len, 1, w->out) < 1) {
        w->done = true;
    }
    w->len = 0;
}

// close_bufwriter ensures all output is shown and deallocates the buffer
void close_bufwriter(bufwriter* w) {
    flush(w);
    free(w->buf);
    w->buf = NULL;
}

// write_bytes does as it says, minimizing the number of calls to fwrite
void write_bytes(bufwriter* w, const unsigned char* src, size_t len) {
    if (w->len + len < w->cap) {
        // all bytes fit into buffer
        memcpy(w->buf + w->len, src, len);
        w->len += len;
        return;
    }

    // ensure current buffer bytes go out, before crossing strides
    flush(w);

    // emit all chunks striding beyond/at the buffer's capacity
    for (; len >= w->cap; src += w->cap, len -= w->cap) {
        if (fwrite(src, w->cap, 1, w->out) < 1) {
            w->done = true;
            return;
        }
    }

    // now all, if any, remaining bytes will fit into the buffer
    memcpy(w->buf, src, len);
    w->len += len;
}

// write_byte does as it says
void write_byte(bufwriter* w, unsigned char b) {
    if (w->len >= w->cap) {
        flush(w);
    }
    w->buf[w->len] = b;
    w->len++;
}

// restyle_digits renders a run of digits as alternating styled/unstyled runs
// of 3 digits, which greatly improves readability, and is the only purpose
// of this app; string is assumed to be all decimal digits
void restyle_digits(bufwriter* w, slice digits, const unsigned char* style) {
    if (digits.len < 4) {
        // digit sequence is short, so emit it as is
        write_bytes(w, digits.ptr, digits.len);
        return;
    }

    // separate leading 0..2 digits which don't align with the 3-digit groups
    size_t lead = digits.len % 3;
    // emit leading digits unstyled, if there are any
    write_bytes(w, digits.ptr, lead);
    // the rest is guaranteed to have a length which is a multiple of 3
    advance(&digits, lead);

    size_t style_len = strlen((const char*)style);
    // start with the alternate style, unless there were no leading digits
    bool style_now = lead != 0;

    while (digits.len > 0) {
        if (style_now) {
            write_bytes(w, style, style_len);
            write_bytes(w, digits.ptr, 3);
            write_bytes(w, reset_style, sizeof(reset_style) - 1);
        } else {
            write_bytes(w, digits.ptr, 3);
        }

        advance(&digits, 3);
        // alternate between styled and unstyled 3-digit groups
        style_now = !style_now;
    }
}

// restyle_line renders the line given, using ANSI-styles to make any long
// numbers in it more legible
void restyle_line(bufwriter* w, slice line, const unsigned char* alt_style) {
    while (!w->done && line.len > 0) {
        long int i = find_digit(line);
        if (i < 0) {
            // no (more) digits for sure
            write_bytes(w, line.ptr, line.len);
            return;
        }

        // some ANSI-style sequences use 4-digit numbers, which are long
        // enough for this app to mangle
        const unsigned char* p = line.ptr;
        bool is_ansi = i >= 2 && p[i - 2] == '\x1b' && p[i - 1] == '[';

        // emit line before current digit-run
        write_bytes(w, line.ptr, i);

        advance(&line, i);

        // see where the digit-run ends
        long int j = find_non_digit(line);
        if (j < 0) {
            // the digit-run goes until the end
            if (!is_ansi) {
                restyle_digits(w, line, alt_style);
            } else {
                write_bytes(w, line.ptr, line.len);
            }
            return;
        }

        // emit styled digit-run... maybe
        if (!is_ansi) {
            slice s;
            s.ptr = line.ptr;
            s.len = j;
            restyle_digits(w, s, alt_style);
        } else {
            write_bytes(w, line.ptr, j);
        }

        // skip right past the end of the digit-run
        advance(&line, j);
    }
}

/*
The info-message string below was made by running the command

awk 'BEGIN { print "const char* info = \"\"" }
     { printf "\"%s\\n\"\n", $0 }
     END { print "\"\";" }' info.txt
*/

// default_digits_style makes it easy to change the built-in default style
const unsigned char default_digits_style[] = "\x1b[38;5;248m";

// buffer_size is trying to be a good value for modern CPU cores
const size_t buffer_size = 32 * 1024;

// handle_reader loops over input lines, restyling all digit-runs as more
// readable `nice numbers`, fulfilling the app's purpose
void handle_reader(bufwriter* w, FILE* src, const unsigned char* style) {
    unsigned char prev = 0;
    bufreader r = new_bufreader(src, buffer_size);
    slice line = new_slice(buffer_size);

    while (!w->done) {
        int v = read_byte(&r);
        if (v != EOF) {
            // still more bytes to go
            unsigned char b = v;
            prev = b;

            if (b != '\n') {
                // no end of line yet
                append_byte(&line, b);
                continue;
            }

            // end of line
            append_byte(&line, b);
            restyle_line(w, line, style);
            line.len = 0;
            continue;
        }

        // input is over
        break;
    }

    // don't forget the last line
    restyle_line(w, line, style);

    // ensure last output line ends with a line-feed since, at least on
    // msys/windows, `less` hangs when lines with millions of symbols
    // don't end with a lf
    if (prev != '\n') {
        write_byte(w, '\n');
    }

    close_bufreader(&r);
    free(line.ptr);
}

// handle_file handles data from the filename given; returns false only when
// the file can't be opened
bool handle_file(bufwriter* w, char* fname, const unsigned char* style) {
    FILE* f = fopen(fname, "rb");
    if (f == NULL) {
        // ensure currently-buffered/deferred output shows up right now: not
        // doing so may scramble results in the common case where stdout and
        // stderr are the same, thus confusing users
        flush(w);

        fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", fname);
        return false;
    }

    handle_reader(w, f, style);
    fclose(f);
    return true;
}

// run returns the number of errors
size_t run(int argc, char** argv) {
    char* style = (char*)default_digits_style;
    bufwriter w = new_bufwriter(stdout, buffer_size);

    // handle leading options to change the ANSI-style used
    size_t start = 1;
    if (argc > 1 && argv[start][0] == '-') {
        char* s = argv[start] + (argv[start][1] == '-' ? 2 : 1);
        if (strcmp(s, "blue") == 0) {
            style = "\x1b[38;5;26m";
            start++;
        } else if (strcmp(s, "bold") == 0) {
            style = "\x1b[1m";
            start++;
        } else if (strcmp(s, "green") == 0) {
            style = "\x1b[38;5;29m";
            start++;
        } else if (strcmp(s, "gray") == 0) {
            style = "\x1b[38;5;248m";
            start++;
        } else if (strcmp(s, "highlight") == 0) {
            style = "\x1b[7m";
            start++;
        } else if (strcmp(s, "hilite") == 0) {
            style = "\x1b[7m";
            start++;
        } else if (strcmp(s, "inverse") == 0) {
            style = "\x1b[7m";
            start++;
        } else if (strcmp(s, "invert") == 0) {
            style = "\x1b[7m";
            start++;
        } else if (strcmp(s, "orange") == 0) {
            style = "\x1b[38;5;166m";
            start++;
        } else if (strcmp(s, "purple") == 0) {
            style = "\x1b[38;5;99m";
            start++;
        } else if (strcmp(s, "red") == 0) {
            style = "\x1b[31m";
            start++;
        }
    }

    const unsigned char* alt_style = (const unsigned char*)style;

    // use stdin when not given any filepaths
    if ((size_t)argc <= start) {
        handle_reader(&w, stdin, alt_style);
        close_bufwriter(&w);
        return 0;
    }

    size_t errors = 0;
    for (size_t i = start; i < (size_t)argc && !w.done; i++) {
        if (i > start) {
            // put an extra empty line between adjacent outputs
            write_byte(&w, '\n');
        }

        if (!handle_file(&w, argv[i], alt_style)) {
            errors++;
        }
    }

    close_bufwriter(&w);
    return errors;
}

int main(int argc, char** argv) {
#ifdef _WIN32
    setmode(fileno(stdin), O_BINARY);
    // ensure output lines end in LF instead of CRLF on windows
    setmode(fileno(stdout), O_BINARY);
    setmode(fileno(stderr), O_BINARY);
#endif

    // handle any of the help options, if given
    if (argc > 1 && argv[1][0] == '-') {
        const char* s = argv[1] + (argv[1][1] == '-' ? 2 : 1);
        if (strcmp(s, "h") == 0 || strcmp(s, "help") == 0) {
            puts(info);
            return 0;
        }
    }

    // disable automatic stdio buffering, in favor of explicit buffering
    setvbuf(stdin, NULL, _IONBF, 0);
    setvbuf(stdout, NULL, _IONBF, 0);
    setvbuf(stderr, NULL, _IONBF, 0);

    return run(argc, argv) == 0 ? 0 : 1;
}