File: debase64.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./debase64 ./debase64.c 29 */ 30 31 #include <stdbool.h> 32 #include <stdint.h> 33 #include <stdio.h> 34 #include <string.h> 35 36 #ifdef _WIN32 37 #include <windows.h> 38 #endif 39 40 #ifdef RED_ERRORS 41 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 42 #ifdef __APPLE__ 43 #define ERROR_STYLE "\x1b[31m" 44 #endif 45 #define RESET_STYLE "\x1b[0m" 46 #else 47 #define ERROR_STYLE 48 #define RESET_STYLE 49 #endif 50 51 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n") 52 53 #ifndef IBUF_SIZE 54 #define IBUF_SIZE (32 * 1024) 55 #endif 56 57 #ifndef OBUF_SIZE 58 #define OBUF_SIZE (8 * 1024) 59 #endif 60 61 const char* info = "" 62 "debase64 [options...] [filename...]\n" 63 "\n" 64 "\n" 65 "Decode base64-encoded data: these include data-URIs, which start with a\n" 66 "MIME declaration before their base64 payload starts.\n" 67 "\n" 68 "\n" 69 "Options\n" 70 "\n" 71 " -h, -help, --h, --help show this help message\n" 72 ""; 73 74 const char* stdin_name = "<stdin>"; 75 76 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at 77 // stdout, it considerably speeds up this app, as intended 78 typedef struct bufwriter { 79 // buf is the buffer proper 80 unsigned char* buf; 81 82 // len is how many bytes of the buffer are currently being used 83 size_t len; 84 85 // cap is the capacity of the buffer, or the most bytes it can hold 86 size_t cap; 87 88 // out is the destination of all that's written into the buffer 89 FILE* out; 90 } bufwriter; 91 92 void init_bufwriter(bufwriter* w, FILE* out, unsigned char* b, size_t cap) { 93 w->buf = b; 94 w->len = 0; 95 w->cap = cap; 96 w->out = out; 97 } 98 99 void write_byte(bufwriter* w, unsigned char b) { 100 if (w->len < w->cap) { 101 w->buf[w->len++] = b; 102 return; 103 } 104 105 fwrite(w->buf, 1, w->cap, w->out); 106 w->buf[0] = b; 107 w->len = 1; 108 } 109 110 void flush(bufwriter* w) { 111 if (w->len > 0) { 112 fwrite(w->buf, 1, w->len, w->out); 113 } 114 w->len = 0; 115 fflush(w->out); 116 } 117 118 bool match_lead(unsigned char* buf, size_t n, char* to) { 119 for (; n > 0 && *to != 0; buf++, to++, n--) { 120 if (*buf != *to) { 121 return false; 122 } 123 } 124 return true; 125 } 126 127 size_t skip_data_uri(unsigned char* buf, size_t n) { 128 for (size_t i = 0; i < n; i++) { 129 if (match_lead(buf + i, n - i, ";base64,")) { 130 return i + (sizeof(";base64,") - 1); 131 } 132 } 133 return 0; 134 } 135 136 // INVALID signals an input byte isn't allowed in a base64 stream 137 #define INVALID 0xff 138 139 const unsigned char base64_rev_lookup[256] = { 140 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 141 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 142 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 143 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 144 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 145 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, 146 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 147 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 148 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 149 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 150 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 151 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff, 152 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 153 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 154 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 155 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff, 156 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 157 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 158 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 159 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 160 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 161 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 162 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 163 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 164 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 165 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 166 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 167 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 168 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 169 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 170 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 171 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 172 }; 173 174 // unsigned char rev_lookup_byte(unsigned char b) { 175 // if ('A' <= b && b <= 'Z') { 176 // return b - 'A'; 177 // } 178 // if ('a' <= b && b <= 'z') { 179 // return (b - 'a') + 26; 180 // } 181 // if ('0' <= b && b <= '9') { 182 // return (b - '0') + 52; 183 // } 184 // 185 // switch (b) { 186 // case '+': 187 // return 62; 188 // case '/': 189 // return 63; 190 // default: 191 // return INVALID; 192 // } 193 // } 194 195 unsigned char rev_lookup_byte(unsigned char b) { 196 return base64_rev_lookup[b]; 197 } 198 199 void show_invalid_byte(unsigned char b, size_t line, size_t pos) { 200 const char* msg = "invalid base64 data"; 201 const char* fmt = ERROR_LINE("%s (byte %d, line: %ld, pos: %ld)"); 202 fprintf(stderr, fmt, msg, b, (long)line, (long)pos); 203 } 204 205 bool handle_reader(bufwriter* w, FILE* src, const char* path) { 206 unsigned char buf[IBUF_SIZE]; 207 208 uint64_t line = 1; 209 uint64_t pos = 1; 210 uint64_t payload = 0; 211 uint64_t padding = 0; 212 213 unsigned char quad[4]; 214 quad[0] = 0; 215 quad[1] = 0; 216 quad[2] = 0; 217 quad[3] = 0; 218 219 while (!feof(w->out)) { 220 size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src); 221 if (n < 1) { 222 // assume input is over when no bytes were read 223 break; 224 } 225 226 unsigned char* chunk = buf; 227 228 // skip leading utf-8 byte-order-mark bytes, if present 229 if (payload == 0 && n >= 3 && match_lead(buf, n, "\xef\xbb\xbf")) { 230 chunk += 3; 231 n -= 3; 232 } 233 234 // skip leading data-URI prelude, if present 235 if (payload == 0 && match_lead(buf, n, "data:")) { 236 const int skip = skip_data_uri(buf, n); 237 chunk += skip; 238 n -= skip; 239 } 240 241 for (size_t i = 0; i < n; i++) { 242 const unsigned char v = chunk[i]; 243 244 // ignore carriage-returns to support CRLF lines 245 if (v == '\r') { 246 continue; 247 } 248 249 // base64 streams can span multiple lines 250 if (v == '\n') { 251 line++; 252 pos = 1; 253 continue; 254 } 255 256 pos++; 257 258 if (v == '=') { 259 padding++; 260 continue; 261 } 262 263 if (padding > 0 && v != '=') { 264 write_byte(w, '\n'); 265 const char* msg = "equal signs are only valid at the end"; 266 const char* fmt = ERROR_LINE("%s (line %ld, pos %ld)"); 267 fprintf(stderr, fmt, msg, (long)line, (long)pos); 268 return false; 269 } 270 271 unsigned char b = rev_lookup_byte(v); 272 273 if (b == INVALID) { 274 show_invalid_byte(v, line, pos); 275 return false; 276 } 277 278 const size_t step = payload % 4; 279 quad[step] = b; 280 payload++; 281 282 if (step == 3) { 283 // 01234567 01234567 01234567 01234567 284 // 00000000 11111111 22222222 33333333 285 // xx000000 xx001111 xx111122 xx222222 286 write_byte(w, (quad[0] << 2) | (quad[1] >> 4)); 287 write_byte(w, (quad[1] << 4) | (quad[2] >> 2)); 288 write_byte(w, (quad[2] << 6) | (quad[3] >> 0)); 289 } 290 } 291 } 292 293 // try to be resilient to missing trailing/padding equals 294 // if (padding == 0 && payload > 0) { 295 // padding = 4 - (payload % 4); 296 // } 297 298 // don't forget unemitted trailing bytes, if any 299 switch (padding) { 300 case 1: 301 write_byte(w, (quad[0] << 2) | (quad[1] >> 4)); 302 write_byte(w, (quad[1] << 4) | (quad[2] >> 2)); 303 break; 304 case 2: 305 write_byte(w, (quad[0] << 2) | (quad[1] >> 4)); 306 break; 307 } 308 309 flush(w); 310 return true; 311 } 312 313 // handle_file handles data from the filename given; returns false only when 314 // an error happened 315 bool handle_file(bufwriter* w, const char* path) { 316 // a `-` filename stands for the standard input 317 if (path[0] == '-' && path[1] == 0) { 318 return handle_reader(w, stdin, stdin_name); 319 } 320 321 FILE* f = fopen(path, "rb"); 322 if (f == NULL) { 323 fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path); 324 return false; 325 } 326 327 const bool ok = handle_reader(w, f, path); 328 fclose(f); 329 return ok; 330 } 331 332 // is_help_option simplifies control-flow for func run 333 bool is_help_option(const char* s) { 334 return s[0] == '-' && ( 335 strcmp(s, "-h") == 0 || 336 strcmp(s, "-help") == 0 || 337 strcmp(s, "--h") == 0 || 338 strcmp(s, "--help") == 0 339 ); 340 } 341 342 int main(int argc, char** argv) { 343 #ifdef _WIN32 344 setmode(fileno(stdin), O_BINARY); 345 // ensure output lines end in LF instead of CRLF on windows 346 setmode(fileno(stdout), O_BINARY); 347 setmode(fileno(stderr), O_BINARY); 348 #endif 349 350 // emit first-step byte-decoding table for base64 symbols; 351 // who needs scripts/interpreters when you have compilers? 352 353 // for (unsigned int i = 0; i < 256; i++) { 354 // if (i % 8 == 0) { 355 // fprintf(stdout, " "); 356 // } 357 // fprintf(stdout, "0x%02x,", rev_lookup_byte(i)); 358 // fprintf(stdout, (i % 8 == 7 && i > 0) ? "\n" : " "); 359 // } 360 // return 0; 361 362 if (argc > 1 && is_help_option(argv[1])) { 363 printf("%s", info); 364 return 0; 365 } 366 367 if (argc > 2) { 368 fprintf(stderr, ERROR_LINE("multiple files not allowed")); 369 return 1; 370 } 371 372 // enable full/block-buffering for standard output 373 // setvbuf(stdout, NULL, _IOFBF, 0); 374 375 unsigned char outbuf[OBUF_SIZE]; 376 bufwriter bw; 377 init_bufwriter(&bw, stdout, outbuf, sizeof(outbuf)); 378 379 const char* name = (argc < 2) ? "-" : argv[1]; 380 const int res = handle_file(&bw, name) ? 0 : 1; 381 flush(&bw); 382 return res; 383 }