File: debase64.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -march=native -mtune=native -flto -o ./debase64 ./debase64.c
  29 */
  30 
  31 #include <stdbool.h>
  32 #include <stdint.h>
  33 #include <stdio.h>
  34 #include <string.h>
  35 
  36 #ifdef _WIN32
  37 #include <windows.h>
  38 #endif
  39 
  40 #ifdef RED_ERRORS
  41 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  42 #ifdef __APPLE__
  43 #define ERROR_STYLE "\x1b[31m"
  44 #endif
  45 #define RESET_STYLE "\x1b[0m"
  46 #else
  47 #define ERROR_STYLE
  48 #define RESET_STYLE
  49 #endif
  50 
  51 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  52 
  53 #ifndef IBUF_SIZE
  54 #define IBUF_SIZE (32 * 1024)
  55 #endif
  56 
  57 #ifndef OBUF_SIZE
  58 #define OBUF_SIZE (8 * 1024)
  59 #endif
  60 
  61 const char* info = ""
  62 "debase64 [options...] [filename...]\n"
  63 "\n"
  64 "Decode base64-encoded data: these include data-URIs, which start with a\n"
  65 "MIME declaration before their base64 payload starts.\n"
  66 "\n"
  67 "Options\n"
  68 "\n"
  69 "    -h, -help, --h, --help              show this help message\n"
  70 "";
  71 
  72 const char* stdin_name = "<stdin>";
  73 
  74 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at
  75 // stdout, it considerably speeds up this app, as intended
  76 typedef struct bufwriter {
  77     // buf is the buffer proper
  78     unsigned char* buf;
  79 
  80     // len is how many bytes of the buffer are currently being used
  81     size_t len;
  82 
  83     // cap is the capacity of the buffer, or the most bytes it can hold
  84     size_t cap;
  85 
  86     // out is the destination of all that's written into the buffer
  87     FILE* out;
  88 } bufwriter;
  89 
  90 void init_bufwriter(bufwriter* w, FILE* out, unsigned char* b, size_t cap) {
  91     w->buf = b;
  92     w->len = 0;
  93     w->cap = cap;
  94     w->out = out;
  95 }
  96 
  97 static inline void write_byte(bufwriter* w, unsigned char b) {
  98     if (w->len < w->cap) {
  99         w->buf[w->len++] = b;
 100         return;
 101     }
 102 
 103     fwrite(w->buf, 1, w->cap, w->out);
 104     w->buf[0] = b;
 105     w->len = 1;
 106 }
 107 
 108 void flush(bufwriter* w) {
 109     if (w->len > 0) {
 110         fwrite(w->buf, 1, w->len, w->out);
 111     }
 112     w->len = 0;
 113     fflush(w->out);
 114 }
 115 
 116 bool match_lead(unsigned char* buf, size_t n, char* to) {
 117     for (; n > 0 && *to != 0; buf++, to++, n--) {
 118         if (*buf != *to) {
 119             return false;
 120         }
 121     }
 122     return true;
 123 }
 124 
 125 size_t skip_data_uri(unsigned char* buf, size_t n) {
 126     for (size_t i = 0; i < n; i++) {
 127         if (match_lead(buf + i, n - i, ";base64,")) {
 128             return i + (sizeof(";base64,") - 1);
 129         }
 130     }
 131     return 0;
 132 }
 133 
 134 // INVALID signals an input byte isn't allowed in a base64 stream
 135 #define INVALID 0xff
 136 
 137 const unsigned char base64_rev_lookup[256] = {
 138     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 139     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 140     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 141     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 142     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 143     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 144     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 145     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 146     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 147     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 148     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 149     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 150     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 151     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 152     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 153     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 154     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 155     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 156     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 157     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 158     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 159     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 160     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 161     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 162     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 163     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 164     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 165     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 166     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 167     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 168     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 169     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 170 };
 171 
 172 // unsigned char rev_lookup_byte(unsigned char b) {
 173 //     if ('A' <= b && b <= 'Z') {
 174 //         return b - 'A';
 175 //     }
 176 //     if ('a' <= b && b <= 'z') {
 177 //         return (b - 'a') + 26;
 178 //     }
 179 //     if ('0' <= b && b <= '9') {
 180 //         return (b - '0') + 52;
 181 //     }
 182 //
 183 //     switch (b) {
 184 //     case '+':
 185 //         return 62;
 186 //     case '/':
 187 //         return 63;
 188 //     default:
 189 //         return INVALID;
 190 //     }
 191 // }
 192 
 193 void show_invalid_byte(unsigned char b, size_t line, size_t pos) {
 194     const char* msg = "invalid base64 data";
 195     const char* fmt = ERROR_LINE("%s (byte %d, line: %ld, pos: %ld)");
 196     fprintf(stderr, fmt, msg, b, (long)line, (long)pos);
 197 }
 198 
 199 bool handle_reader(bufwriter* w, FILE* src, const char* path) {
 200     unsigned char buf[IBUF_SIZE];
 201 
 202     size_t line = 1;
 203     size_t pos = 1;
 204     size_t payload = 0;
 205     size_t padding = 0;
 206 
 207     unsigned char quad[4];
 208     quad[0] = 0;
 209     quad[1] = 0;
 210     quad[2] = 0;
 211     quad[3] = 0;
 212 
 213     unsigned char prev = 0;
 214 
 215     while (!feof(w->out)) {
 216         size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src);
 217         if (n < 1) {
 218             // assume input is over when no bytes were read
 219             break;
 220         }
 221 
 222         unsigned char* chunk = buf;
 223 
 224         // skip leading utf-8 byte-order-mark bytes, if present
 225         if (payload == 0 && n >= 3 && match_lead(buf, n, "\xef\xbb\xbf")) {
 226             chunk += 3;
 227             n -= 3;
 228         }
 229 
 230         // skip leading data-URI prelude, if present
 231         if (payload == 0 && match_lead(buf, n, "data:")) {
 232             const int skip = skip_data_uri(buf, n);
 233             chunk += skip;
 234             n -= skip;
 235         }
 236 
 237         for (size_t i = 0; i < n; i++) {
 238             const unsigned char v = chunk[i];
 239             const unsigned char b = base64_rev_lookup[v];
 240 
 241             if (padding > 0 && prev == '=') {
 242                 if (v != '\r' && v != '\n' && v != '=') {
 243                     const char* fmt = ERROR_LINE("payload after padding");
 244                     fprintf(stderr, fmt);
 245                     return false;
 246                 }
 247             }
 248 
 249             prev = v;
 250 
 251             if (b == INVALID) {
 252                 // base64 streams can span multiple lines
 253                 if (v == '\n') {
 254                     line++;
 255                     pos = 1;
 256                     continue;
 257                 }
 258 
 259                 // ignore carriage-returns to support CRLF-type lines
 260                 if (v == '\r') {
 261                     continue;
 262                 }
 263 
 264                 if (v == '=') {
 265                     padding++;
 266                     continue;
 267                 }
 268 
 269                 show_invalid_byte(v, line, pos);
 270                 return false;
 271             }
 272 
 273             pos++;
 274 
 275             const size_t step = payload % 4;
 276             quad[step] = b;
 277             payload++;
 278 
 279             if (step == 3) {
 280                 // 01234567 01234567 01234567 01234567
 281                 // 00000000 11111111 22222222 33333333
 282                 // xx000000 xx001111 xx111122 xx222222
 283                 write_byte(w, (quad[0] << 2) | (quad[1] >> 4));
 284                 write_byte(w, (quad[1] << 4) | (quad[2] >> 2));
 285                 write_byte(w, (quad[2] << 6) | (quad[3] >> 0));
 286             }
 287         }
 288 
 289         // don't bother with rest of input when padding is clearly wrong
 290         if (padding > 2) {
 291             break;
 292         }
 293     }
 294 
 295     // try to be resilient to missing trailing/padding equals
 296     // if (padding == 0 && payload > 0) {
 297     //     padding = 4 - (payload % 4);
 298     // }
 299 
 300     if (padding > 2 || (padding > 0 && payload == 0)) {
 301         const char* fmt = ERROR_LINE("excessive padding");
 302         fprintf(stderr, fmt);
 303         return false;
 304     }
 305 
 306     const size_t step = payload % 4;
 307 
 308     // a single base64 byte on its own is worth only 6 bits
 309     if (step == 1) {
 310         const char* fmt = ERROR_LINE("missing final bytes");
 311         fprintf(stderr, fmt);
 312         return false;
 313     }
 314 
 315     const bool a = (step == 0 && padding != 0);
 316     const bool b = (step == 2 && padding != 2);
 317     const bool c = (step == 3 && padding != 1);
 318     if (a || b || c) {
 319         const char* fmt = ERROR_LINE("bad padding");
 320         fprintf(stderr, fmt);
 321         return false;
 322     }
 323 
 324     // don't forget unemitted trailing bytes, if any
 325     switch (padding) {
 326     case 1:
 327         write_byte(w, (quad[0] << 2) | (quad[1] >> 4));
 328         write_byte(w, (quad[1] << 4) | (quad[2] >> 2));
 329         break;
 330 
 331     case 2:
 332         write_byte(w, (quad[0] << 2) | (quad[1] >> 4));
 333         break;
 334     }
 335 
 336     flush(w);
 337     return true;
 338 }
 339 
 340 // handle_file handles data from the filename given; returns false only when
 341 // an error happened
 342 bool handle_file(bufwriter* w, const char* path) {
 343     // a `-` filename stands for the standard input
 344     if (strcmp(path, "-") == 0) {
 345         return handle_reader(w, stdin, stdin_name);
 346     }
 347 
 348     FILE* f = fopen(path, "rb");
 349     if (f == NULL) {
 350         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 351         return false;
 352     }
 353 
 354     const bool ok = handle_reader(w, f, path);
 355     fclose(f);
 356     return ok;
 357 }
 358 
 359 // is_help_option simplifies control-flow for func run
 360 bool is_help_option(const char* s) {
 361     return s[0] == '-' && (
 362         strcmp(s, "-h") == 0 ||
 363         strcmp(s, "-help") == 0 ||
 364         strcmp(s, "--h") == 0 ||
 365         strcmp(s, "--help") == 0
 366     );
 367 }
 368 
 369 int main(int argc, char** argv) {
 370 #ifdef _WIN32
 371     setmode(fileno(stdin), O_BINARY);
 372     // ensure output lines end in LF instead of CRLF on windows
 373     setmode(fileno(stdout), O_BINARY);
 374     setmode(fileno(stderr), O_BINARY);
 375 #endif
 376 
 377     // emit first-step byte-decoding table for base64 symbols;
 378     // who needs scripts/interpreters when you have compilers?
 379 
 380     // for (unsigned int i = 0; i < 256; i++) {
 381     //     if (i % 8 == 0) {
 382     //         fprintf(stdout, "    ");
 383     //     }
 384     //     fprintf(stdout, "0x%02x,", rev_lookup_byte(i));
 385     //     fprintf(stdout, (i % 8 == 7 && i > 0) ? "\n" : " ");
 386     // }
 387     // return 0;
 388 
 389     if (argc > 1 && is_help_option(argv[1])) {
 390         printf("%s", info);
 391         return 0;
 392     }
 393 
 394     if (argc > 2) {
 395         fprintf(stderr, ERROR_LINE("multiple files not allowed"));
 396         return 1;
 397     }
 398 
 399     // enable full/block-buffering for standard output
 400     setvbuf(stdout, NULL, _IOFBF, 0);
 401 
 402     unsigned char outbuf[OBUF_SIZE];
 403     bufwriter bw;
 404     init_bufwriter(&bw, stdout, outbuf, sizeof(outbuf));
 405 
 406     const char* name = (argc < 2) ? "-" : argv[1];
 407     const int res = handle_file(&bw, name) ? 0 : 1;
 408     flush(&bw);
 409     return res;
 410 }