File: debase64.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./debase64 ./debase64.c
  29 */
  30 
  31 #include <stdbool.h>
  32 #include <stdint.h>
  33 #include <stdio.h>
  34 #include <string.h>
  35 
  36 #ifdef _WIN32
  37 #include <windows.h>
  38 #endif
  39 
  40 const char* info =
  41     ""
  42     "debase64 [options...] [filename...]\n"
  43     "\n"
  44     "\n"
  45     "Decode base64-encoded data: these include data-URIs, which start with a\n"
  46     "MIME declaration before their base64 payload starts.\n"
  47     "\n"
  48     "\n"
  49     "Options\n"
  50     "\n"
  51     "    -h, -help, --h, --help              show this help message\n"
  52     "";
  53 
  54 const char* stdin_name = "<stdin>";
  55 
  56 bool match_lead(unsigned char* buf, size_t n, char* to) {
  57     for (; n > 0 && *to != 0; buf++, to++, n--) {
  58         if (*buf != *to) {
  59             return false;
  60         }
  61     }
  62     return true;
  63 }
  64 
  65 size_t skip_data_uri(unsigned char* buf, size_t n) {
  66     for (size_t i = 0; i < n; i++) {
  67         if (match_lead(buf + i, n - i, ";base64,")) {
  68             return i + (sizeof(";base64,") - 1);
  69         }
  70     }
  71     return 0;
  72 }
  73 
  74 // INVALID signals an input byte isn't allowed in a base64 stream
  75 #define INVALID 0xff
  76 
  77 const unsigned char base64_rev_lookup[256] = {
  78     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  79     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  80     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  81     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  82     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  83     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
  84     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
  85     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  86     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
  87     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
  88     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
  89     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
  90     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
  91     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
  92     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
  93     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
  94     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  95     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  96     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  97     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  98     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
  99     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 100     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 101     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 102     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 103     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 104     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 105     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 106     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 107     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 108     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 109     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 110 };
 111 
 112 // unsigned char rev_lookup_byte(unsigned char b) {
 113 //     if ('A' <= b && b <= 'Z') {
 114 //         return b - 'A';
 115 //     }
 116 //     if ('a' <= b && b <= 'z') {
 117 //         return (b - 'a') + 26;
 118 //     }
 119 //     if ('0' <= b && b <= '9') {
 120 //         return (b - '0') + 52;
 121 //     }
 122 //
 123 //     switch (b) {
 124 //         case '+':
 125 //             return 62;
 126 //         case '/':
 127 //             return 63;
 128 //         default:
 129 //             return INVALID;
 130 //     }
 131 // }
 132 
 133 unsigned char rev_lookup_byte(unsigned char b) {
 134     return base64_rev_lookup[b];
 135 }
 136 
 137 void show_invalid_byte(unsigned char b, size_t line, size_t pos) {
 138     const char* msg = "invalid base64 data";
 139     const char* fmt = "\x1b[31m%s (byte %d, line: %ld, pos: %ld)\x1b[0m\n";
 140     fprintf(stderr, fmt, msg, b, (long)line, (long)pos);
 141 }
 142 
 143 bool handle_reader(FILE* w, FILE* src, const char* path) {
 144     unsigned char buf[32 * 1024];
 145 
 146     uint64_t line = 1;
 147     uint64_t pos = 1;
 148     uint64_t payload = 0;
 149     uint64_t padding = 0;
 150 
 151     unsigned char quad[4];
 152     quad[0] = 0;
 153     quad[1] = 0;
 154     quad[2] = 0;
 155     quad[3] = 0;
 156 
 157     while (!feof(w)) {
 158         size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src);
 159         if (n < 1) {
 160             // assume input is over when no bytes were read
 161             break;
 162         }
 163 
 164         unsigned char* chunk = buf;
 165 
 166         // skip leading utf-8 byte-order-mark bytes, if present
 167         if (payload == 0 && n >= 3 && match_lead(buf, n, "\xef\xbb\xbf")) {
 168             chunk += 3;
 169             n -= 3;
 170         }
 171 
 172         // skip leading data-URI prelude, if present
 173         if (payload == 0 && match_lead(buf, n, "data:")) {
 174             const int skip = skip_data_uri(buf, n);
 175             chunk += skip;
 176             n -= skip;
 177         }
 178 
 179         for (size_t i = 0; i < n; i++) {
 180             const unsigned char v = chunk[i];
 181 
 182             // ignore carriage-returns to support CRLF lines
 183             if (v == '\r') {
 184                 continue;
 185             }
 186 
 187             // base64 streams can span multiple lines
 188             if (v == '\n') {
 189                 line++;
 190                 pos = 1;
 191                 continue;
 192             }
 193 
 194             pos++;
 195 
 196             if (v == '=') {
 197                 padding++;
 198                 continue;
 199             }
 200 
 201             if (padding > 0 && v != '=') {
 202                 putc('\n', w);
 203                 const char* msg = "equal signs are only valid at the end";
 204                 const char* fmt = "\x1b[31m%s (line %ld, pos %ld)\x1b[0m\n";
 205                 fprintf(stderr, fmt, msg, (long)line, (long)pos);
 206                 return false;
 207             }
 208 
 209             unsigned char b = rev_lookup_byte(v);
 210 
 211             if (b == INVALID) {
 212                 show_invalid_byte(v, line, pos);
 213                 return false;
 214             }
 215 
 216             const size_t step = payload % 4;
 217             quad[step] = b;
 218             payload++;
 219 
 220             if (step == 3) {
 221                 // 01234567 01234567 01234567 01234567
 222                 // 00000000 11111111 22222222 33333333
 223                 // xx000000 xx001111 xx111122 xx222222
 224                 putc((quad[0] << 2) | (quad[1] >> 4), w);
 225                 putc((quad[1] << 4) | (quad[2] >> 2), w);
 226                 putc((quad[2] << 6) | (quad[3] >> 0), w);
 227             }
 228         }
 229     }
 230 
 231     // try to be resilient to missing trailing/padding equals
 232     // if (padding == 0 && payload > 0) {
 233     //     padding = 4 - (payload % 4);
 234     // }
 235 
 236     // don't forget unemitted trailing bytes, if any
 237     switch (padding) {
 238         case 1:
 239             putc((quad[0] << 2) | (quad[1] >> 4), w);
 240             putc((quad[1] << 4) | (quad[2] >> 2), w);
 241             break;
 242         case 2:
 243             putc((quad[0] << 2) | (quad[1] >> 4), w);
 244             break;
 245     }
 246 
 247     fflush(w);
 248     return true;
 249 }
 250 
 251 // handle_file handles data from the filename given; returns false only when
 252 // an error happened
 253 bool handle_file(FILE* w, const char* path) {
 254     // a `-` filename stands for the standard input
 255     if (path[0] == '-' && path[1] == 0) {
 256         return handle_reader(w, stdin, stdin_name);
 257     }
 258 
 259     FILE* f = fopen(path, "rb");
 260     if (f == NULL) {
 261         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 262         return false;
 263     }
 264 
 265     const bool ok = handle_reader(w, f, path);
 266     fclose(f);
 267     return ok;
 268 }
 269 
 270 // is_help_option simplifies control-flow for func run
 271 bool is_help_option(const char* s) {
 272     return s[0] == '-' && (
 273         strcmp(s, "-h") == 0 ||
 274         strcmp(s, "-help") == 0 ||
 275         strcmp(s, "--h") == 0 ||
 276         strcmp(s, "--help") == 0
 277     );
 278 }
 279 
 280 int main(int argc, char** argv) {
 281 #ifdef _WIN32
 282     setmode(fileno(stdin), O_BINARY);
 283     // ensure output lines end in LF instead of CRLF on windows
 284     setmode(fileno(stdout), O_BINARY);
 285     setmode(fileno(stderr), O_BINARY);
 286 #endif
 287 
 288     // emit first-step byte-decoding table for base64 symbols;
 289     // who needs scripts/interpreters when you have compilers?
 290 
 291     // for (unsigned int i = 0; i < 256; i++) {
 292     //     if (i % 8 == 0) {
 293     //         fprintf(stdout, "    ");
 294     //     }
 295     //     fprintf(stdout, "0x%02x,", rev_lookup_byte(i));
 296     //     fprintf(stdout, (i % 8 == 7 && i > 0) ? "\n" : " ");
 297     // }
 298     // return 0;
 299 
 300     // handle special cmd-line options
 301     for (size_t i = 1; i < argc; i++) {
 302         if (is_help_option(argv[i])) {
 303             puts(info);
 304             return 0;
 305         }
 306     }
 307 
 308     if (argc > 2) {
 309         fprintf(stderr, "\x1b[31mmultiple files not allowed\x1b[0m\n");
 310         return 1;
 311     }
 312 
 313     const char* name = (argc < 2) ? "-" : argv[1];
 314     return handle_file(stdout, name) ? 0 : 1;
 315 }