File: debase64.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -march=native -mtune=native -flto -o ./debase64 ./debase64.c 29 */ 30 31 #include <stdbool.h> 32 #include <stdint.h> 33 #include <stdio.h> 34 #include <string.h> 35 36 #ifdef _WIN32 37 #include <windows.h> 38 #endif 39 40 #ifdef RED_ERRORS 41 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 42 #ifdef __APPLE__ 43 #define ERROR_STYLE "\x1b[31m" 44 #endif 45 #define RESET_STYLE "\x1b[0m" 46 #else 47 #define ERROR_STYLE 48 #define RESET_STYLE 49 #endif 50 51 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n") 52 53 #ifndef IBUF_SIZE 54 #define IBUF_SIZE (32 * 1024) 55 #endif 56 57 #ifndef OBUF_SIZE 58 #define OBUF_SIZE (8 * 1024) 59 #endif 60 61 const char* info = "" 62 "debase64 [options...] [filename...]\n" 63 "\n" 64 "Decode base64-encoded data: these include data-URIs, which start with a\n" 65 "MIME declaration before their base64 payload starts.\n" 66 "\n" 67 "Options\n" 68 "\n" 69 " -h, -help, --h, --help show this help message\n" 70 ""; 71 72 const char* stdin_name = "<stdin>"; 73 74 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at 75 // stdout, it considerably speeds up this app, as intended 76 typedef struct bufwriter { 77 // buf is the buffer proper 78 unsigned char* buf; 79 80 // len is how many bytes of the buffer are currently being used 81 size_t len; 82 83 // cap is the capacity of the buffer, or the most bytes it can hold 84 size_t cap; 85 86 // out is the destination of all that's written into the buffer 87 FILE* out; 88 } bufwriter; 89 90 void init_bufwriter(bufwriter* w, FILE* out, unsigned char* b, size_t cap) { 91 w->buf = b; 92 w->len = 0; 93 w->cap = cap; 94 w->out = out; 95 } 96 97 static inline void write_byte(bufwriter* w, unsigned char b) { 98 if (w->len < w->cap) { 99 w->buf[w->len++] = b; 100 return; 101 } 102 103 fwrite(w->buf, 1, w->cap, w->out); 104 w->buf[0] = b; 105 w->len = 1; 106 } 107 108 void flush(bufwriter* w) { 109 if (w->len > 0) { 110 fwrite(w->buf, 1, w->len, w->out); 111 } 112 w->len = 0; 113 fflush(w->out); 114 } 115 116 bool match_lead(unsigned char* buf, size_t n, char* to) { 117 for (; n > 0 && *to != 0; buf++, to++, n--) { 118 if (*buf != *to) { 119 return false; 120 } 121 } 122 return true; 123 } 124 125 size_t skip_data_uri(unsigned char* buf, size_t n) { 126 for (size_t i = 0; i < n; i++) { 127 if (match_lead(buf + i, n - i, ";base64,")) { 128 return i + (sizeof(";base64,") - 1); 129 } 130 } 131 return 0; 132 } 133 134 // INVALID signals an input byte isn't allowed in a base64 stream 135 #define INVALID 0xff 136 137 const unsigned char base64_rev_lookup[256] = { 138 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 139 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 140 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 141 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 142 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 143 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, 144 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 145 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 146 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 147 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 148 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 149 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff, 150 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 151 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 152 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 153 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff, 154 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 155 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 156 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 157 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 158 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 159 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 160 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 161 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 162 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 163 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 164 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 165 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 166 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 167 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 168 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 169 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 170 }; 171 172 // unsigned char rev_lookup_byte(unsigned char b) { 173 // if ('A' <= b && b <= 'Z') { 174 // return b - 'A'; 175 // } 176 // if ('a' <= b && b <= 'z') { 177 // return (b - 'a') + 26; 178 // } 179 // if ('0' <= b && b <= '9') { 180 // return (b - '0') + 52; 181 // } 182 // 183 // switch (b) { 184 // case '+': 185 // return 62; 186 // case '/': 187 // return 63; 188 // default: 189 // return INVALID; 190 // } 191 // } 192 193 void show_invalid_byte(unsigned char b, size_t line, size_t pos) { 194 const char* msg = "invalid base64 data"; 195 const char* fmt = ERROR_LINE("%s (byte %d, line: %ld, pos: %ld)"); 196 fprintf(stderr, fmt, msg, b, (long)line, (long)pos); 197 } 198 199 bool handle_reader(bufwriter* w, FILE* src, const char* path) { 200 unsigned char buf[IBUF_SIZE]; 201 202 size_t line = 1; 203 size_t pos = 1; 204 size_t payload = 0; 205 size_t padding = 0; 206 207 unsigned char quad[4]; 208 quad[0] = 0; 209 quad[1] = 0; 210 quad[2] = 0; 211 quad[3] = 0; 212 213 unsigned char prev = 0; 214 215 while (!feof(w->out)) { 216 size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src); 217 if (n < 1) { 218 // assume input is over when no bytes were read 219 break; 220 } 221 222 unsigned char* chunk = buf; 223 224 // skip leading utf-8 byte-order-mark bytes, if present 225 if (payload == 0 && n >= 3 && match_lead(buf, n, "\xef\xbb\xbf")) { 226 chunk += 3; 227 n -= 3; 228 } 229 230 // skip leading data-URI prelude, if present 231 if (payload == 0 && match_lead(buf, n, "data:")) { 232 const int skip = skip_data_uri(buf, n); 233 chunk += skip; 234 n -= skip; 235 } 236 237 for (size_t i = 0; i < n; i++) { 238 const unsigned char v = chunk[i]; 239 const unsigned char b = base64_rev_lookup[v]; 240 241 if (padding > 0 && prev == '=') { 242 if (v != '\r' && v != '\n' && v != '=') { 243 const char* fmt = ERROR_LINE("payload after padding"); 244 fprintf(stderr, fmt); 245 return false; 246 } 247 } 248 249 prev = v; 250 251 if (b == INVALID) { 252 // base64 streams can span multiple lines 253 if (v == '\n') { 254 line++; 255 pos = 1; 256 continue; 257 } 258 259 // ignore carriage-returns to support CRLF-type lines 260 if (v == '\r') { 261 continue; 262 } 263 264 if (v == '=') { 265 padding++; 266 continue; 267 } 268 269 show_invalid_byte(v, line, pos); 270 return false; 271 } 272 273 pos++; 274 275 const size_t step = payload % 4; 276 quad[step] = b; 277 payload++; 278 279 if (step == 3) { 280 // 01234567 01234567 01234567 01234567 281 // 00000000 11111111 22222222 33333333 282 // xx000000 xx001111 xx111122 xx222222 283 write_byte(w, (quad[0] << 2) | (quad[1] >> 4)); 284 write_byte(w, (quad[1] << 4) | (quad[2] >> 2)); 285 write_byte(w, (quad[2] << 6) | (quad[3] >> 0)); 286 } 287 } 288 289 // don't bother with rest of input when padding is clearly wrong 290 if (padding > 2) { 291 break; 292 } 293 } 294 295 // try to be resilient to missing trailing/padding equals 296 // if (padding == 0 && payload > 0) { 297 // padding = 4 - (payload % 4); 298 // } 299 300 if (padding > 2 || (padding > 0 && payload == 0)) { 301 const char* fmt = ERROR_LINE("excessive padding"); 302 fprintf(stderr, fmt); 303 return false; 304 } 305 306 const size_t step = payload % 4; 307 308 // a single base64 byte on its own is worth only 6 bits 309 if (step == 1) { 310 const char* fmt = ERROR_LINE("missing final bytes"); 311 fprintf(stderr, fmt); 312 return false; 313 } 314 315 const bool a = (step == 0 && padding != 0); 316 const bool b = (step == 2 && padding != 2); 317 const bool c = (step == 3 && padding != 1); 318 if (a || b || c) { 319 const char* fmt = ERROR_LINE("bad padding"); 320 fprintf(stderr, fmt); 321 return false; 322 } 323 324 // don't forget unemitted trailing bytes, if any 325 switch (padding) { 326 case 1: 327 write_byte(w, (quad[0] << 2) | (quad[1] >> 4)); 328 write_byte(w, (quad[1] << 4) | (quad[2] >> 2)); 329 break; 330 331 case 2: 332 write_byte(w, (quad[0] << 2) | (quad[1] >> 4)); 333 break; 334 } 335 336 flush(w); 337 return true; 338 } 339 340 // handle_file handles data from the filename given; returns false only when 341 // an error happened 342 bool handle_file(bufwriter* w, const char* path) { 343 // a `-` filename stands for the standard input 344 if (strcmp(path, "-") == 0) { 345 return handle_reader(w, stdin, stdin_name); 346 } 347 348 FILE* f = fopen(path, "rb"); 349 if (f == NULL) { 350 fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path); 351 return false; 352 } 353 354 const bool ok = handle_reader(w, f, path); 355 fclose(f); 356 return ok; 357 } 358 359 // is_help_option simplifies control-flow for func run 360 bool is_help_option(const char* s) { 361 return s[0] == '-' && ( 362 strcmp(s, "-h") == 0 || 363 strcmp(s, "-help") == 0 || 364 strcmp(s, "--h") == 0 || 365 strcmp(s, "--help") == 0 366 ); 367 } 368 369 int main(int argc, char** argv) { 370 #ifdef _WIN32 371 setmode(fileno(stdin), O_BINARY); 372 // ensure output lines end in LF instead of CRLF on windows 373 setmode(fileno(stdout), O_BINARY); 374 setmode(fileno(stderr), O_BINARY); 375 #endif 376 377 // emit first-step byte-decoding table for base64 symbols; 378 // who needs scripts/interpreters when you have compilers? 379 380 // for (unsigned int i = 0; i < 256; i++) { 381 // if (i % 8 == 0) { 382 // fprintf(stdout, " "); 383 // } 384 // fprintf(stdout, "0x%02x,", rev_lookup_byte(i)); 385 // fprintf(stdout, (i % 8 == 7 && i > 0) ? "\n" : " "); 386 // } 387 // return 0; 388 389 if (argc > 1 && is_help_option(argv[1])) { 390 printf("%s", info); 391 return 0; 392 } 393 394 if (argc > 2) { 395 fprintf(stderr, ERROR_LINE("multiple files not allowed")); 396 return 1; 397 } 398 399 // enable full/block-buffering for standard output 400 setvbuf(stdout, NULL, _IOFBF, 0); 401 402 unsigned char outbuf[OBUF_SIZE]; 403 bufwriter bw; 404 init_bufwriter(&bw, stdout, outbuf, sizeof(outbuf)); 405 406 const char* name = (argc < 2) ? "-" : argv[1]; 407 const int res = handle_file(&bw, name) ? 0 : 1; 408 flush(&bw); 409 return res; 410 }