File: debase64.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -o ./debase64 ./debase64.c 29 */ 30 31 #include <stdbool.h> 32 #include <stdint.h> 33 #include <stdio.h> 34 #include <string.h> 35 36 #ifdef _WIN32 37 #include <windows.h> 38 #endif 39 40 const char* info = 41 "" 42 "debase64 [options...] [filename...]\n" 43 "\n" 44 "\n" 45 "Decode base64-encoded data: these include data-URIs, which start with a\n" 46 "MIME declaration before their base64 payload starts.\n" 47 "\n" 48 "\n" 49 "Options\n" 50 "\n" 51 " -h, -help, --h, --help show this help message\n" 52 ""; 53 54 const char* stdin_name = "<stdin>"; 55 56 bool match_lead(unsigned char* buf, size_t n, char* to) { 57 for (; n > 0 && *to != 0; buf++, to++, n--) { 58 if (*buf != *to) { 59 return false; 60 } 61 } 62 return true; 63 } 64 65 size_t skip_data_uri(unsigned char* buf, size_t n) { 66 for (size_t i = 0; i < n; i++) { 67 if (match_lead(buf + i, n - i, ";base64,")) { 68 return i + (sizeof(";base64,") - 1); 69 } 70 } 71 return 0; 72 } 73 74 // INVALID signals an input byte isn't allowed in a base64 stream 75 #define INVALID 0xff 76 77 const unsigned char base64_rev_lookup[256] = { 78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 81 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 82 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 83 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, 84 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 85 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 86 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 87 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 88 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 89 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff, 90 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 91 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 92 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 93 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff, 94 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 95 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 96 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 97 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 98 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 99 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 100 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 101 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 102 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 103 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 104 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 105 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 106 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 107 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 108 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 109 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 110 }; 111 112 // unsigned char rev_lookup_byte(unsigned char b) { 113 // if ('A' <= b && b <= 'Z') { 114 // return b - 'A'; 115 // } 116 // if ('a' <= b && b <= 'z') { 117 // return (b - 'a') + 26; 118 // } 119 // if ('0' <= b && b <= '9') { 120 // return (b - '0') + 52; 121 // } 122 // 123 // switch (b) { 124 // case '+': 125 // return 62; 126 // case '/': 127 // return 63; 128 // default: 129 // return INVALID; 130 // } 131 // } 132 133 unsigned char rev_lookup_byte(unsigned char b) { 134 return base64_rev_lookup[b]; 135 } 136 137 void show_invalid_byte(unsigned char b, size_t line, size_t pos) { 138 const char* msg = "invalid base64 data"; 139 const char* fmt = "\x1b[31m%s (byte %d, line: %ld, pos: %ld)\x1b[0m\n"; 140 fprintf(stderr, fmt, msg, b, (long)line, (long)pos); 141 } 142 143 bool handle_reader(FILE* w, FILE* src, const char* path) { 144 unsigned char buf[32 * 1024]; 145 146 uint64_t line = 1; 147 uint64_t pos = 1; 148 uint64_t payload = 0; 149 uint64_t padding = 0; 150 151 unsigned char quad[4]; 152 quad[0] = 0; 153 quad[1] = 0; 154 quad[2] = 0; 155 quad[3] = 0; 156 157 while (!feof(w)) { 158 size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src); 159 if (n < 1) { 160 // assume input is over when no bytes were read 161 break; 162 } 163 164 unsigned char* chunk = buf; 165 166 // skip leading utf-8 byte-order-mark bytes, if present 167 if (payload == 0 && n >= 3 && match_lead(buf, n, "\xef\xbb\xbf")) { 168 chunk += 3; 169 n -= 3; 170 } 171 172 // skip leading data-URI prelude, if present 173 if (payload == 0 && match_lead(buf, n, "data:")) { 174 const int skip = skip_data_uri(buf, n); 175 chunk += skip; 176 n -= skip; 177 } 178 179 for (size_t i = 0; i < n; i++) { 180 const unsigned char v = chunk[i]; 181 182 // ignore carriage-returns to support CRLF lines 183 if (v == '\r') { 184 continue; 185 } 186 187 // base64 streams can span multiple lines 188 if (v == '\n') { 189 line++; 190 pos = 1; 191 continue; 192 } 193 194 pos++; 195 196 if (v == '=') { 197 padding++; 198 continue; 199 } 200 201 if (padding > 0 && v != '=') { 202 putc('\n', w); 203 const char* msg = "equal signs are only valid at the end"; 204 const char* fmt = "\x1b[31m%s (line %ld, pos %ld)\x1b[0m\n"; 205 fprintf(stderr, fmt, msg, (long)line, (long)pos); 206 return false; 207 } 208 209 unsigned char b = rev_lookup_byte(v); 210 211 if (b == INVALID) { 212 show_invalid_byte(v, line, pos); 213 return false; 214 } 215 216 const size_t step = payload % 4; 217 quad[step] = b; 218 payload++; 219 220 if (step == 3) { 221 // 01234567 01234567 01234567 01234567 222 // 00000000 11111111 22222222 33333333 223 // xx000000 xx001111 xx111122 xx222222 224 putc((quad[0] << 2) | (quad[1] >> 4), w); 225 putc((quad[1] << 4) | (quad[2] >> 2), w); 226 putc((quad[2] << 6) | (quad[3] >> 0), w); 227 } 228 } 229 } 230 231 // try to be resilient to missing trailing/padding equals 232 // if (padding == 0 && payload > 0) { 233 // padding = 4 - (payload % 4); 234 // } 235 236 // don't forget unemitted trailing bytes, if any 237 switch (padding) { 238 case 1: 239 putc((quad[0] << 2) | (quad[1] >> 4), w); 240 putc((quad[1] << 4) | (quad[2] >> 2), w); 241 break; 242 case 2: 243 putc((quad[0] << 2) | (quad[1] >> 4), w); 244 break; 245 } 246 247 fflush(w); 248 return true; 249 } 250 251 // handle_file handles data from the filename given; returns false only when 252 // an error happened 253 bool handle_file(FILE* w, const char* path) { 254 // a `-` filename stands for the standard input 255 if (path[0] == '-' && path[1] == 0) { 256 return handle_reader(w, stdin, stdin_name); 257 } 258 259 FILE* f = fopen(path, "rb"); 260 if (f == NULL) { 261 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 262 return false; 263 } 264 265 const bool ok = handle_reader(w, f, path); 266 fclose(f); 267 return ok; 268 } 269 270 // is_help_option simplifies control-flow for func run 271 bool is_help_option(const char* s) { 272 return s[0] == '-' && ( 273 strcmp(s, "-h") == 0 || 274 strcmp(s, "-help") == 0 || 275 strcmp(s, "--h") == 0 || 276 strcmp(s, "--help") == 0 277 ); 278 } 279 280 int main(int argc, char** argv) { 281 #ifdef _WIN32 282 setmode(fileno(stdin), O_BINARY); 283 // ensure output lines end in LF instead of CRLF on windows 284 setmode(fileno(stdout), O_BINARY); 285 setmode(fileno(stderr), O_BINARY); 286 #endif 287 288 // emit first-step byte-decoding table for base64 symbols; 289 // who needs scripts/interpreters when you have compilers? 290 291 // for (unsigned int i = 0; i < 256; i++) { 292 // if (i % 8 == 0) { 293 // fprintf(stdout, " "); 294 // } 295 // fprintf(stdout, "0x%02x,", rev_lookup_byte(i)); 296 // fprintf(stdout, (i % 8 == 7 && i > 0) ? "\n" : " "); 297 // } 298 // return 0; 299 300 // handle special cmd-line options 301 for (size_t i = 1; i < argc; i++) { 302 if (is_help_option(argv[i])) { 303 puts(info); 304 return 0; 305 } 306 } 307 308 if (argc > 2) { 309 fprintf(stderr, "\x1b[31mmultiple files not allowed\x1b[0m\n"); 310 return 1; 311 } 312 313 const char* name = (argc < 2) ? "-" : argv[1]; 314 return handle_file(stdout, name) ? 0 : 1; 315 }