File: debase64.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./debase64 ./debase64.c
  29 */
  30 
  31 #include <stdbool.h>
  32 #include <stdint.h>
  33 #include <stdio.h>
  34 #include <string.h>
  35 
  36 #ifdef _WIN32
  37 #include <windows.h>
  38 #endif
  39 
  40 #ifdef RED_ERRORS
  41 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  42 #ifdef __APPLE__
  43 #define ERROR_STYLE "\x1b[31m"
  44 #endif
  45 #define RESET_STYLE "\x1b[0m"
  46 #else
  47 #define ERROR_STYLE
  48 #define RESET_STYLE
  49 #endif
  50 
  51 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  52 
  53 #ifndef IBUF_SIZE
  54 #define IBUF_SIZE (32 * 1024)
  55 #endif
  56 
  57 #ifndef OBUF_SIZE
  58 #define OBUF_SIZE (8 * 1024)
  59 #endif
  60 
  61 const char* info = ""
  62 "debase64 [options...] [filename...]\n"
  63 "\n"
  64 "\n"
  65 "Decode base64-encoded data: these include data-URIs, which start with a\n"
  66 "MIME declaration before their base64 payload starts.\n"
  67 "\n"
  68 "\n"
  69 "Options\n"
  70 "\n"
  71 "    -h, -help, --h, --help              show this help message\n"
  72 "";
  73 
  74 const char* stdin_name = "<stdin>";
  75 
  76 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at
  77 // stdout, it considerably speeds up this app, as intended
  78 typedef struct bufwriter {
  79     // buf is the buffer proper
  80     unsigned char* buf;
  81 
  82     // len is how many bytes of the buffer are currently being used
  83     size_t len;
  84 
  85     // cap is the capacity of the buffer, or the most bytes it can hold
  86     size_t cap;
  87 
  88     // out is the destination of all that's written into the buffer
  89     FILE* out;
  90 } bufwriter;
  91 
  92 void init_bufwriter(bufwriter* w, FILE* out, unsigned char* b, size_t cap) {
  93     w->buf = b;
  94     w->len = 0;
  95     w->cap = cap;
  96     w->out = out;
  97 }
  98 
  99 void write_byte(bufwriter* w, unsigned char b) {
 100     if (w->len < w->cap) {
 101         w->buf[w->len++] = b;
 102         return;
 103     }
 104 
 105     fwrite(w->buf, 1, w->cap, w->out);
 106     w->buf[0] = b;
 107     w->len = 1;
 108 }
 109 
 110 void flush(bufwriter* w) {
 111     if (w->len > 0) {
 112         fwrite(w->buf, 1, w->len, w->out);
 113     }
 114     w->len = 0;
 115     fflush(w->out);
 116 }
 117 
 118 bool match_lead(unsigned char* buf, size_t n, char* to) {
 119     for (; n > 0 && *to != 0; buf++, to++, n--) {
 120         if (*buf != *to) {
 121             return false;
 122         }
 123     }
 124     return true;
 125 }
 126 
 127 size_t skip_data_uri(unsigned char* buf, size_t n) {
 128     for (size_t i = 0; i < n; i++) {
 129         if (match_lead(buf + i, n - i, ";base64,")) {
 130             return i + (sizeof(";base64,") - 1);
 131         }
 132     }
 133     return 0;
 134 }
 135 
 136 // INVALID signals an input byte isn't allowed in a base64 stream
 137 #define INVALID 0xff
 138 
 139 const unsigned char base64_rev_lookup[256] = {
 140     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 141     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 142     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 143     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 144     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 145     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 146     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 147     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 148     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 149     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 150     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 151     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 152     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 153     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 154     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 155     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 156     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 157     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 158     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 159     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 160     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 161     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 162     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 163     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 164     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 165     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 166     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 167     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 168     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 169     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 170     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 171     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 172 };
 173 
 174 // unsigned char rev_lookup_byte(unsigned char b) {
 175 //     if ('A' <= b && b <= 'Z') {
 176 //         return b - 'A';
 177 //     }
 178 //     if ('a' <= b && b <= 'z') {
 179 //         return (b - 'a') + 26;
 180 //     }
 181 //     if ('0' <= b && b <= '9') {
 182 //         return (b - '0') + 52;
 183 //     }
 184 //
 185 //     switch (b) {
 186 //     case '+':
 187 //         return 62;
 188 //     case '/':
 189 //         return 63;
 190 //     default:
 191 //         return INVALID;
 192 //     }
 193 // }
 194 
 195 unsigned char rev_lookup_byte(unsigned char b) {
 196     return base64_rev_lookup[b];
 197 }
 198 
 199 void show_invalid_byte(unsigned char b, size_t line, size_t pos) {
 200     const char* msg = "invalid base64 data";
 201     const char* fmt = ERROR_LINE("%s (byte %d, line: %ld, pos: %ld)");
 202     fprintf(stderr, fmt, msg, b, (long)line, (long)pos);
 203 }
 204 
 205 bool handle_reader(bufwriter* w, FILE* src, const char* path) {
 206     unsigned char buf[IBUF_SIZE];
 207 
 208     uint64_t line = 1;
 209     uint64_t pos = 1;
 210     uint64_t payload = 0;
 211     uint64_t padding = 0;
 212 
 213     unsigned char quad[4];
 214     quad[0] = 0;
 215     quad[1] = 0;
 216     quad[2] = 0;
 217     quad[3] = 0;
 218 
 219     while (!feof(w->out)) {
 220         size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src);
 221         if (n < 1) {
 222             // assume input is over when no bytes were read
 223             break;
 224         }
 225 
 226         unsigned char* chunk = buf;
 227 
 228         // skip leading utf-8 byte-order-mark bytes, if present
 229         if (payload == 0 && n >= 3 && match_lead(buf, n, "\xef\xbb\xbf")) {
 230             chunk += 3;
 231             n -= 3;
 232         }
 233 
 234         // skip leading data-URI prelude, if present
 235         if (payload == 0 && match_lead(buf, n, "data:")) {
 236             const int skip = skip_data_uri(buf, n);
 237             chunk += skip;
 238             n -= skip;
 239         }
 240 
 241         for (size_t i = 0; i < n; i++) {
 242             const unsigned char v = chunk[i];
 243 
 244             // ignore carriage-returns to support CRLF lines
 245             if (v == '\r') {
 246                 continue;
 247             }
 248 
 249             // base64 streams can span multiple lines
 250             if (v == '\n') {
 251                 line++;
 252                 pos = 1;
 253                 continue;
 254             }
 255 
 256             pos++;
 257 
 258             if (v == '=') {
 259                 padding++;
 260                 continue;
 261             }
 262 
 263             if (padding > 0 && v != '=') {
 264                 write_byte(w, '\n');
 265                 const char* msg = "equal signs are only valid at the end";
 266                 const char* fmt = ERROR_LINE("%s (line %ld, pos %ld)");
 267                 fprintf(stderr, fmt, msg, (long)line, (long)pos);
 268                 return false;
 269             }
 270 
 271             unsigned char b = rev_lookup_byte(v);
 272 
 273             if (b == INVALID) {
 274                 show_invalid_byte(v, line, pos);
 275                 return false;
 276             }
 277 
 278             const size_t step = payload % 4;
 279             quad[step] = b;
 280             payload++;
 281 
 282             if (step == 3) {
 283                 // 01234567 01234567 01234567 01234567
 284                 // 00000000 11111111 22222222 33333333
 285                 // xx000000 xx001111 xx111122 xx222222
 286                 write_byte(w, (quad[0] << 2) | (quad[1] >> 4));
 287                 write_byte(w, (quad[1] << 4) | (quad[2] >> 2));
 288                 write_byte(w, (quad[2] << 6) | (quad[3] >> 0));
 289             }
 290         }
 291     }
 292 
 293     // try to be resilient to missing trailing/padding equals
 294     // if (padding == 0 && payload > 0) {
 295     //     padding = 4 - (payload % 4);
 296     // }
 297 
 298     // don't forget unemitted trailing bytes, if any
 299     switch (padding) {
 300     case 1:
 301         write_byte(w, (quad[0] << 2) | (quad[1] >> 4));
 302         write_byte(w, (quad[1] << 4) | (quad[2] >> 2));
 303         break;
 304     case 2:
 305         write_byte(w, (quad[0] << 2) | (quad[1] >> 4));
 306         break;
 307     }
 308 
 309     flush(w);
 310     return true;
 311 }
 312 
 313 // handle_file handles data from the filename given; returns false only when
 314 // an error happened
 315 bool handle_file(bufwriter* w, const char* path) {
 316     // a `-` filename stands for the standard input
 317     if (path[0] == '-' && path[1] == 0) {
 318         return handle_reader(w, stdin, stdin_name);
 319     }
 320 
 321     FILE* f = fopen(path, "rb");
 322     if (f == NULL) {
 323         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 324         return false;
 325     }
 326 
 327     const bool ok = handle_reader(w, f, path);
 328     fclose(f);
 329     return ok;
 330 }
 331 
 332 // is_help_option simplifies control-flow for func run
 333 bool is_help_option(const char* s) {
 334     return s[0] == '-' && (
 335         strcmp(s, "-h") == 0 ||
 336         strcmp(s, "-help") == 0 ||
 337         strcmp(s, "--h") == 0 ||
 338         strcmp(s, "--help") == 0
 339     );
 340 }
 341 
 342 int main(int argc, char** argv) {
 343 #ifdef _WIN32
 344     setmode(fileno(stdin), O_BINARY);
 345     // ensure output lines end in LF instead of CRLF on windows
 346     setmode(fileno(stdout), O_BINARY);
 347     setmode(fileno(stderr), O_BINARY);
 348 #endif
 349 
 350     // emit first-step byte-decoding table for base64 symbols;
 351     // who needs scripts/interpreters when you have compilers?
 352 
 353     // for (unsigned int i = 0; i < 256; i++) {
 354     //     if (i % 8 == 0) {
 355     //         fprintf(stdout, "    ");
 356     //     }
 357     //     fprintf(stdout, "0x%02x,", rev_lookup_byte(i));
 358     //     fprintf(stdout, (i % 8 == 7 && i > 0) ? "\n" : " ");
 359     // }
 360     // return 0;
 361 
 362     if (argc > 1 && is_help_option(argv[1])) {
 363         printf("%s", info);
 364         return 0;
 365     }
 366 
 367     if (argc > 2) {
 368         fprintf(stderr, ERROR_LINE("multiple files not allowed"));
 369         return 1;
 370     }
 371 
 372     // enable full/block-buffering for standard output
 373     // setvbuf(stdout, NULL, _IOFBF, 0);
 374 
 375     unsigned char outbuf[OBUF_SIZE];
 376     bufwriter bw;
 377     init_bufwriter(&bw, stdout, outbuf, sizeof(outbuf));
 378 
 379     const char* name = (argc < 2) ? "-" : argv[1];
 380     const int res = handle_file(&bw, name) ? 0 : 1;
 381     flush(&bw);
 382     return res;
 383 }