File: datauri.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./datauri ./datauri.c
  29 */
  30 
  31 #include <stdbool.h>
  32 #include <stdint.h>
  33 #include <stdio.h>
  34 #include <string.h>
  35 
  36 #ifdef _WIN32
  37 #include <windows.h>
  38 #endif
  39 
  40 #ifdef RED_ERRORS
  41 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  42 #ifdef __APPLE__
  43 #define ERROR_STYLE "\x1b[31m"
  44 #endif
  45 #define RESET_STYLE "\x1b[0m"
  46 #else
  47 #define ERROR_STYLE
  48 #define RESET_STYLE
  49 #endif
  50 
  51 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  52 
  53 #ifndef IBUF_SIZE
  54 #define IBUF_SIZE (32 * 1024)
  55 #endif
  56 
  57 #ifndef OBUF_SIZE
  58 #define OBUF_SIZE (8 * 1024)
  59 #endif
  60 
  61 const char* info = ""
  62 "datauri [options...] [filenames...]\n"
  63 "\n"
  64 "\n"
  65 "Encode bytes as data-URIs, auto-detecting the file/data type using the first\n"
  66 "few bytes from each data/file stream. When given multiple inputs, the output\n"
  67 "will be multiple lines, one for each file given.\n"
  68 "\n"
  69 "Empty files/inputs result in empty lines. A simple dash (-) stands for the\n"
  70 "standard-input, which is also used automatically when not given any files.\n"
  71 "\n"
  72 "Data-URIs are base64-encoded text representations of arbitrary data, which\n"
  73 "include their payload's MIME-type, and which are directly useable/shareable\n"
  74 "in web-browsers as links, despite not looking like normal links/URIs.\n"
  75 "\n"
  76 "Some web-browsers limit the size of handled data-URIs to tens of kilobytes.\n"
  77 "\n"
  78 "\n"
  79 "Options, also available with leading double-dashes\n"
  80 "\n"
  81 "    -h           show this help message\n"
  82 "    -help        show this help message\n"
  83 "\n"
  84 "    -f           change the fallback MIME type using the next argument\n"
  85 "    -fallback    change the fallback MIME type using the next argument\n"
  86 "\n"
  87 "    -m           change the fallback MIME type using the next argument\n"
  88 "    -mime        change the fallback MIME type using the next argument\n"
  89 "\n"
  90 "    -t           change the fallback MIME type using the next argument\n"
  91 "    -type        change the fallback MIME type using the next argument\n"
  92 "";
  93 
  94 const char* stdin_name = "<stdin>";
  95 
  96 #define default_mime_fallback "application/octet-stream"
  97 
  98 const char* fallback_mime_type = default_mime_fallback;
  99 
 100 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at
 101 // stdout, it considerably speeds up this app, as intended
 102 typedef struct bufwriter {
 103     // buf is the buffer proper
 104     unsigned char* buf;
 105 
 106     // len is how many bytes of the buffer are currently being used
 107     size_t len;
 108 
 109     // cap is the capacity of the buffer, or the most bytes it can hold
 110     size_t cap;
 111 
 112     // out is the destination of all that's written into the buffer
 113     FILE* out;
 114 } bufwriter;
 115 
 116 void init_bufwriter(bufwriter* w, FILE* out, unsigned char* b, size_t cap) {
 117     w->buf = b;
 118     w->len = 0;
 119     w->cap = cap;
 120     w->out = out;
 121 }
 122 
 123 static inline void write_byte(bufwriter* w, unsigned char b) {
 124     if (w->len < w->cap) {
 125         w->buf[w->len++] = b;
 126         return;
 127     }
 128 
 129     fwrite(w->buf, 1, w->cap, w->out);
 130     w->buf[0] = b;
 131     w->len = 1;
 132 }
 133 
 134 void write_string(bufwriter* w, const char* s) {
 135     for (; *s != 0; s++) {
 136         write_byte(w, *s);
 137     }
 138 }
 139 
 140 void flush(bufwriter* w) {
 141     if (w->len > 0) {
 142         fwrite(w->buf, 1, w->len, w->out);
 143     }
 144     w->len = 0;
 145     fflush(w->out);
 146 }
 147 
 148 // can be anything: ensure this value differs from all other literal bytes
 149 // in the generic-headers table: failing that, its value could cause subtle
 150 // type-misdetection bugs; the value is chosen to be `obviously` findable
 151 // in the source, which also implies a constant beyond the ascii range, as
 152 // ascii char-constants are also used in the tables
 153 const unsigned char cba = 0xfd; // 253
 154 
 155 #define aiff "audio/aiff"
 156 #define au "audio/basic"
 157 #define avi "video/avi"
 158 #define avif "image/avif"
 159 #define bmp "image/x-bmp"
 160 #define caf "audio/x-caf"
 161 #define cur "image/vnd.microsoft.icon"
 162 #define css "text/css"
 163 #define csv "text/csv"
 164 #define djvu "image/x-djvu"
 165 #define elf "application/x-elf"
 166 #define exe "application/vnd.microsoft.portable-executable"
 167 #define flac "audio/x-flac"
 168 #define gif "image/gif"
 169 #define gz "application/gzip"
 170 #define heic "image/heic"
 171 #define htm "text/html"
 172 #define html "text/html"
 173 #define ico "image/x-icon"
 174 #define iso "application/octet-stream"
 175 #define jpeg "image/jpeg"
 176 #define js "application/javascript"
 177 #define json "application/json"
 178 #define m4a "audio/aac"
 179 #define m4v "video/x-m4v"
 180 #define midi "audio/midi"
 181 #define mov "video/quicktime"
 182 #define mp4 "video/mp4"
 183 #define mp3 "audio/mpeg"
 184 #define mpg "video/mpeg"
 185 #define octet "application/octet-stream"
 186 #define ogg "audio/ogg"
 187 #define opus "audio/opus"
 188 #define pdf "application/pdf"
 189 #define png "image/png"
 190 #define ps "application/postscript"
 191 #define psd "image/vnd.adobe.photoshop"
 192 #define rtf "application/rtf"
 193 #define sqlite3 "application/x-sqlite3"
 194 #define svg "image/svg+xml"
 195 #define text "text/plain"
 196 #define tiff "image/tiff"
 197 #define tsv "text/tsv"
 198 #define utf8 "text/plain; charset=UTF-8"
 199 #define wasm "application/wasm"
 200 #define wav "audio/x-wav"
 201 #define webp "image/webp"
 202 #define webm "video/webm"
 203 #define xml "application/xml"
 204 #define zip "application/zip"
 205 #define zst "application/zstd"
 206 
 207 // format_descriptor ties a file-header pattern to its data-format type;
 208 // the 15-byte header-limit nicely aligns with the 1-byte length before it
 209 typedef struct format_descriptor {
 210     unsigned char header_length;
 211     unsigned char header_bytes[15];
 212     const char* mime;
 213 } format_descriptor;
 214 
 215 // starts_as tries to match header data to the pattern given: this includes
 216 // allowing `any byte` when the pattern indicates so, using a value reserved
 217 // for that purpose
 218 bool starts_as(const uint8_t* x, size_t xlen, const uint8_t* y, size_t ylen) {
 219     // when header data aren't enough for a pattern, there's no match
 220     if (xlen < ylen) {
 221         return false;
 222     }
 223 
 224     for (size_t i = 0; i < ylen; i++) {
 225         if (y[i] == cba) {
 226             // `can be anything` value always matches
 227             continue;
 228         }
 229 
 230         if (x[i] != y[i]) {
 231             return false;
 232         }
 233     }
 234 
 235     return true;
 236 }
 237 
 238 // not confident enough to actually use this, and replace all table entries
 239 #define start_format_descriptor(...) \
 240     sizeof((unsigned char[]){ __VA_ARGS__ }) / sizeof(unsigned char), \
 241     { __VA_ARGS__ }
 242 
 243 // format markers with leading wildcards, which should be checked before the
 244 // normal ones: this is to prevent mismatches with the latter types, even
 245 // though you can make probabilistic arguments which suggest these mismatches
 246 // should be very unlikely in practice
 247 format_descriptor special_headers[] = {
 248     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a},
 249     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a},
 250     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4},
 251     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4},
 252     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v},
 253     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov},
 254     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic},
 255     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif},
 256     // {
 257     //     24,
 258     //     {
 259     //         cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h',
 260     //         000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1',
 261     //     },
 262     //     m4a,
 263     // },
 264     {0},
 265 };
 266 
 267 format_descriptor hdr_dispatch_0[] = {
 268     {4, {000, 000, 001, 0xBA}, mpg},
 269     {4, {000, 000, 001, 0xB3}, mpg},
 270     {4, {000, 000, 001, 000}, ico},
 271     {4, {000, 000, 002, 000}, cur},
 272     {4, {000, 'a', 's', 'm'}, wasm},
 273     {0},
 274 };
 275 
 276 format_descriptor hdr_dispatch_26[] = {
 277     {4, {0x1A, 0x45, 0xDF, 0xA3}, webm},
 278     {0},
 279 };
 280 
 281 format_descriptor hdr_dispatch_31[] = {
 282     // {4, {0x1F, 0x8B, 0x08, 0x08}, gz},
 283     {3, {0x1F, 0x8B, 0x08}, gz},
 284     {0},
 285 };
 286 
 287 format_descriptor hdr_dispatch_35[] = {
 288     {3, "#! ", text},
 289     {3, "#!/", text},
 290     {0},
 291 };
 292 
 293 format_descriptor hdr_dispatch_37[] = {
 294     {4, "%PDF", pdf},
 295     {4, "%!PS", ps},
 296     {0},
 297 };
 298 
 299 format_descriptor hdr_dispatch_40[] = {
 300     {4, {0x28, 0xB5, 0x2F, 0xFD}, zst},
 301     {0},
 302 };
 303 
 304 format_descriptor hdr_dispatch_46[] = {
 305     {4, ".snd", au},
 306     {0},
 307 };
 308 
 309 format_descriptor hdr_dispatch_56[] = {
 310     {4, "8BPS", psd},
 311     {0},
 312 };
 313 
 314 format_descriptor hdr_dispatch_60[] = {
 315     {15, "<!DOCTYPE html>", html},
 316     {15, "<!DOCTYPE html ", html},
 317     {5, "<svg>", svg},
 318     {5, "<svg ", svg},
 319     {6, "<html>", html},
 320     {6, "<html ", html},
 321     {6, "<head>", html},
 322     {6, "<head ", html},
 323     {6, "<body>", html},
 324     {6, "<body ", html},
 325     {6, "<?xml>", xml},
 326     {6, "<?xml ", xml},
 327     {0},
 328 };
 329 
 330 format_descriptor hdr_dispatch_65[] = {
 331     {
 332         15,
 333         {
 334             'A', 'T', '&', 'T', 'F', 'O', 'R', 'M',
 335             cba, cba, cba, cba, 'D', 'J', 'V',
 336         },
 337         djvu,
 338     },
 339     {0},
 340 };
 341 
 342 format_descriptor hdr_dispatch_66[] = {
 343     {
 344         15,
 345         {
 346             'B', 'M', cba, cba, cba, cba, cba, cba,
 347             cba, cba, cba, cba, cba, cba, 0x28,
 348         },
 349         bmp,
 350     },
 351     {0},
 352 };
 353 
 354 format_descriptor hdr_dispatch_70[] = {
 355     {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff},
 356     {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff},
 357     {0},
 358 };
 359 
 360 format_descriptor hdr_dispatch_71[] = {
 361     {6, "GIF87a", gif},
 362     {6, "GIF89a", gif},
 363     {0},
 364 };
 365 
 366 format_descriptor hdr_dispatch_73[] = {
 367     {4, {'I', 'D', '3', 2}, mp3}, // ID3-format metadata
 368     {4, {'I', 'D', '3', 3}, mp3}, // ID3-format metadata
 369     {4, {'I', 'D', '3', 4}, mp3}, // ID3-format metadata
 370     {4, {'I', 'I', '*', 000}, tiff},
 371     {0},
 372 };
 373 
 374 format_descriptor hdr_dispatch_77[] = {
 375     {4, {'M', 'M', 000, '*'}, tiff},
 376     {4, "MThd", midi},
 377     {6, {'M', 'Z', cba, 000, cba, 000}, exe},
 378     // {6, {'M', 'Z', 0x90, 000, 003, 000}, exe},
 379     // {6, {'M', 'Z', 0x78, 000, 001, 000}, exe},
 380     // {6, {'M', 'Z', 'P', 000, 002, 000}, exe},
 381     {0},
 382 };
 383 
 384 format_descriptor hdr_dispatch_79[] = {
 385     {4, "OggS", ogg},
 386     {0},
 387 };
 388 
 389 format_descriptor hdr_dispatch_80[] = {
 390     {4, {'P', 'K', 003, 004}, zip},
 391     {0},
 392 };
 393 
 394 format_descriptor hdr_dispatch_82[] = {
 395     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp},
 396     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav},
 397     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi},
 398     {0},
 399 };
 400 
 401 // format_descriptor hdr_dispatch_83[] = {
 402 //     // {16, "SQLite format 3\x00", sqlite3},
 403 //     {0},
 404 // };
 405 
 406 format_descriptor hdr_dispatch_99[] = {
 407     {8, {'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf},
 408     {0},
 409 };
 410 
 411 format_descriptor hdr_dispatch_102[] = {
 412     {4, "fLaC", flac},
 413     {0},
 414 };
 415 
 416 format_descriptor hdr_dispatch_123[] = {
 417     {4, "{\\rtf", rtf},
 418     {0},
 419 };
 420 
 421 format_descriptor hdr_dispatch_127[] = {
 422     {4, {127, 'E', 'L', 'F'}, elf},
 423     {0},
 424 };
 425 
 426 format_descriptor hdr_dispatch_137[] = {
 427     {8, {0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png},
 428     {0},
 429 };
 430 
 431 format_descriptor hdr_dispatch_255[] = {
 432     {3, {0xFF, 0xD8, 0xFF}, jpeg},
 433     {5, {0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3},
 434     {2, {0xFF, 0xFB}, mp3},
 435     {0},
 436 };
 437 
 438 // hdr_dispatch groups format-description-groups by their first byte, thus
 439 // shortening total lookups for some data header
 440 //
 441 // notice how the `ftyp` data formats aren't handled here, since these can
 442 // start with any byte, instead of the literal value of the any-byte markers
 443 // they use
 444 //
 445 // all entries are arrays which must always end with a special entry whose
 446 // pattern-length is declared to be 0, since there's no explicit way to know
 447 // the length of these arrays when looping on them
 448 //
 449 // all non-null entries are setup explicitly, later in the code
 450 format_descriptor* hdr_dispatch[256] = {
 451     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 452     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 453     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 454     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 455     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 456     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 457     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 458     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 459     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 460     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 461     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 462     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 463     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 464     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 465     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 466     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 467     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 468     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 469     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 470     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 471     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 472     NULL, NULL, NULL, NULL,
 473 };
 474 
 475 // guess_mime tries to auto-detect a MIME-type from the header bytes given,
 476 // using the lookup-tables
 477 const char* guess_mime(const unsigned char* buf, size_t len) {
 478     if (len == 0) {
 479         return NULL;
 480     }
 481 
 482     // just in case, start with the patterns which allow any first byte
 483     for (size_t i = 0; special_headers[i].header_length > 0; i++) {
 484         const unsigned char* hb = special_headers[i].header_bytes;
 485         const size_t hl = special_headers[i].header_length;
 486         if (starts_as(buf, len, hb, hl)) {
 487             return special_headers[i].mime;
 488         }
 489     }
 490 
 491     // the m4a-dash header exceeds the 15-byte limit of the lookup tables
 492     const uint8_t header1[24] = {
 493         cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h',
 494         000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1',
 495     };
 496     if (starts_as(buf, len, header1, sizeof(header1))) {
 497         return m4a;
 498     }
 499 
 500     // the sqlite3 header exceeds the 15-byte limit of the lookup tables
 501     const uint8_t header2[16] = "SQLite format 3\x00";
 502     if (starts_as(buf, len, header2, sizeof(header2))) {
 503         return sqlite3;
 504     }
 505 
 506     format_descriptor* guesses = hdr_dispatch[buf[0]];
 507     if (guesses == NULL) {
 508         return fallback_mime_type;
 509     }
 510 
 511     for (size_t i = 0; guesses[i].header_length > 0; i++) {
 512         const unsigned char* hb = guesses[i].header_bytes;
 513         const size_t hl = guesses[i].header_length;
 514         if (starts_as(buf, len, hb, hl)) {
 515             return guesses[i].mime;
 516         }
 517     }
 518 
 519     return fallback_mime_type;
 520 }
 521 
 522 bool is_mime_disabled(const char* mime) {
 523     return (mime != NULL) && (mime[0] == 'n') && (
 524         strcmp(mime, "no") == 0 ||
 525         strcmp(mime, "nomime") == 0 ||
 526         strcmp(mime, "no-mime") == 0 ||
 527         strcmp(mime, "none") == 0 ||
 528         strcmp(mime, "not") == 0
 529     );
 530 }
 531 
 532 // start_data_uri starts the output by declaring the data-URI to be an
 533 // auto-detected MIME-type; the return value is the auto-detection success
 534 bool start_data_uri(bufwriter* w, const unsigned char* buf, size_t len) {
 535     const char* mime = guess_mime(buf, len);
 536     if (is_mime_disabled(mime)) {
 537         return true;
 538     }
 539     if (mime == NULL || mime[0] == 0) {
 540         return false;
 541     }
 542 
 543     write_string(w, "data:");
 544     for (size_t i = 0; mime[i] != 0; i++) {
 545         write_byte(w, mime[i]);
 546     }
 547     write_string(w, ";base64,");
 548     return true;
 549 }
 550 
 551 const unsigned char base64_lookup[64] =
 552     ""
 553     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
 554     "";
 555 
 556 static inline uint32_t combine_triple(const unsigned char data[3]) {
 557     return (data[0] << 16) | (data[1] << 8) | (data[2] << 0);
 558 }
 559 
 560 static inline void emit_triple(bufwriter* w, uint32_t v) {
 561     write_byte(w, base64_lookup[0x3f & (v >> 18)]);
 562     write_byte(w, base64_lookup[0x3f & (v >> 12)]);
 563     write_byte(w, base64_lookup[0x3f & (v >> 6)]);
 564     write_byte(w, base64_lookup[0x3f & (v >> 0)]);
 565 }
 566 
 567 void emit_couple(bufwriter* w, uint32_t v) {
 568     write_byte(w, base64_lookup[0x3f & (v >> 18)]);
 569     write_byte(w, base64_lookup[0x3f & (v >> 12)]);
 570     write_byte(w, base64_lookup[0x3f & (v >> 6)]);
 571     write_byte(w, '=');
 572 }
 573 
 574 void emit_single(bufwriter* w, uint32_t v) {
 575     write_byte(w, base64_lookup[0x3f & (v >> 18)]);
 576     write_byte(w, base64_lookup[0x3f & (v >> 12)]);
 577     write_byte(w, '=');
 578     write_byte(w, '=');
 579 }
 580 
 581 bool handle_reader(bufwriter* w, FILE* src, const char* path) {
 582     unsigned char buf[IBUF_SIZE];
 583     uint64_t bytes = 0;
 584 
 585     // triple holds groups of 3 bytes at once, which is required by base64,
 586     // except for the last few bytes of input, which are padded with equals
 587     unsigned char triple[3];
 588     triple[0] = 0;
 589     triple[1] = 0;
 590     triple[2] = 0;
 591 
 592     while (!feof(w->out)) {
 593         const size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src);
 594         if (n < 1) {
 595             // assume input is over when no bytes were read
 596             break;
 597         }
 598 
 599         if (bytes == 0 && !start_data_uri(w, buf, n)) {
 600             write_byte(w, '\n');
 601             flush(w);
 602             const char* msg = "can't auto-detect MIME type for";
 603             fprintf(stderr, ERROR_LINE("%s %s"), msg, path);
 604             return false;
 605         }
 606 
 607         // unrolling loop doesn't seem to outperform compiling this with -O3
 608         size_t where = bytes % 3;
 609         for (size_t i = 0; i < n; i++) {
 610             triple[where++] = buf[i];
 611             if (where == 3) {
 612                 emit_triple(w, combine_triple(triple));
 613                 where = 0;
 614             }
 615         }
 616         bytes += n;
 617     }
 618 
 619     // empty inputs result in empty outputs
 620     if (bytes == 0) {
 621         return true;
 622     }
 623 
 624     // don't forget unemitted trailing bytes, if any: these need special
 625     // handling, as they include `=` signs; if the input bytes were a
 626     // multiple of 3, there won't be any trailing bytes
 627     switch (bytes % 3) {
 628     case 1:
 629         triple[1] = 0;
 630         triple[2] = 0;
 631         emit_single(w, combine_triple(triple));
 632         break;
 633     case 2:
 634         triple[2] = 0;
 635         emit_couple(w, combine_triple(triple));
 636         break;
 637     }
 638 
 639     // end with a line-feed, so multiple input streams are each encoded in
 640     // their own line
 641     if (bytes > 0) {
 642         write_byte(w, '\n');
 643         flush(w);
 644     }
 645     return true;
 646 }
 647 
 648 // handle_file handles data from the filename given; returns false only when
 649 // an error happened
 650 bool handle_file(bufwriter* w, const char* path) {
 651     // a `-` filename stands for the standard input
 652     if (path[0] == '-' && path[1] == 0) {
 653         return handle_reader(w, stdin, stdin_name);
 654     }
 655 
 656     FILE* f = fopen(path, "rb");
 657     if (f == NULL) {
 658         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 659         return false;
 660     }
 661 
 662     const bool ok = handle_reader(w, f, path);
 663     fclose(f);
 664     return ok;
 665 }
 666 
 667 // is_help_option simplifies control-flow for func run
 668 bool is_help_option(const char* s) {
 669     return s[0] == '-' && (
 670         strcmp(s, "-h") == 0 ||
 671         strcmp(s, "-help") == 0 ||
 672         strcmp(s, "--h") == 0 ||
 673         strcmp(s, "--help") == 0
 674     );
 675 }
 676 
 677 // is_fallback_option simplifies control-flow for func run
 678 bool is_fallback_option(const char* s) {
 679     return s[0] == '-' && (
 680         strcmp(s, "-f") == 0 ||
 681         strcmp(s, "-fallback") == 0 ||
 682         strcmp(s, "--f") == 0 ||
 683         strcmp(s, "--fallback") == 0 ||
 684         strcmp(s, "-m") == 0 ||
 685         strcmp(s, "-mime") == 0 ||
 686         strcmp(s, "--m") == 0 ||
 687         strcmp(s, "--mime") == 0 ||
 688         strcmp(s, "-t") == 0 ||
 689         strcmp(s, "-type") == 0 ||
 690         strcmp(s, "--t") == 0 ||
 691         strcmp(s, "--type") == 0
 692     );
 693 }
 694 
 695 const char* fallback_aliases[196] = {
 696     // tiny shortcuts
 697     "b", octet,
 698     "j", json,
 699     "t", text,
 700     "u", utf8,
 701 
 702     // failure fallbacks
 703     "e", "",
 704     "err", "",
 705     "error", "",
 706     "f", "",
 707     "fail", "",
 708 
 709     // common mistakes
 710     "text/json", json,
 711 
 712     // other
 713     "", default_mime_fallback,
 714     "default", default_mime_fallback,
 715 
 716     "aif", aiff,
 717     "aiff", aiff,
 718     "au", au,
 719     "avi", avi,
 720     "avif", avif,
 721     "bmp", bmp,
 722     "caf", caf,
 723     "cur", cur,
 724     "css", css,
 725     "csv", csv,
 726     "djvu", djvu,
 727     "elf", elf,
 728     "exe", exe,
 729     "flac", flac,
 730     "gif", gif,
 731     "gz", gz,
 732     "heic", heic,
 733     "html", html,
 734     "ico", ico,
 735     "iso", iso,
 736     "jpg", jpeg,
 737     "jpeg", jpeg,
 738     "js", js,
 739     "json", json,
 740     "m4a", m4a,
 741     "m4v", m4v,
 742     "midi", midi,
 743     "mov", mov,
 744     "mp4", mp4,
 745     "mp3", mp3,
 746     "mpeg", mpg,
 747     "ogg", ogg,
 748     "opus", opus,
 749     "pdf", pdf,
 750     "png", png,
 751     "ps", ps,
 752     "psd", psd,
 753     "rtf", rtf,
 754     "sqlite3", sqlite3,
 755     "svg", svg,
 756     "text", text,
 757     "tiff", tiff,
 758     "tsv", tsv,
 759     "wasm", wasm,
 760     "wav", wav,
 761     "webp", webp,
 762     "webm", webm,
 763     "xml", xml,
 764     "zip", zip,
 765     "zst", zst,
 766 
 767     // longer shortcuts
 768     "aac", m4a,
 769     "aif", aiff,
 770     "bin", octet,
 771     "binary", octet,
 772     "bits", octet,
 773     "gzip", gz,
 774     "htm", htm,
 775     "mid", midi,
 776     "mpg", mpg,
 777     "octet", octet,
 778     "octets", octet,
 779     "octetstream", octet,
 780     "octet-stream", octet,
 781     "plain", text,
 782     "sqlite", sqlite3,
 783     "svg+xml", svg,
 784     "tif", tiff,
 785     "utf8", utf8,
 786     "utf-8", utf8,
 787     "xbmp", bmp,
 788     "xcaf", caf,
 789     "xflac", flac,
 790     "xicon", ico,
 791     "xm4v", m4v,
 792     "xsqlite3", sqlite3,
 793     "xwav", wav,
 794     "xwave", wav,
 795     "x-bmp", bmp,
 796     "x-caf", caf,
 797     "x-flac", flac,
 798     "x-icon", ico,
 799     "x-m4v", m4v,
 800     "x-sqlite3", sqlite3,
 801     "x-wav", wav,
 802     "wave", wav,
 803     "zstd", zst,
 804 };
 805 
 806 const char* resolve_alias(const char* name) {
 807     const size_t n = sizeof(fallback_aliases) / sizeof(fallback_aliases[0]);
 808     for (size_t i = 0; i < n; i += 2) {
 809         if (strcmp(name, fallback_aliases[i]) == 0) {
 810             return fallback_aliases[i + 1];
 811         }
 812     }
 813     return name;
 814 }
 815 
 816 // run returns the number of errors
 817 int run(int argc, char** argv, FILE* w) {
 818     unsigned char outbuf[OBUF_SIZE];
 819     bufwriter bw;
 820     init_bufwriter(&bw, w, outbuf, sizeof(outbuf));
 821 
 822     size_t files = 0;
 823     size_t errors = 0;
 824     bool change_fallback = false;
 825     bool options = true;
 826 
 827     // handle all filenames given
 828     for (size_t i = 1; i < argc && !feof(w); i++) {
 829         if (argv[i][0] == '-' && argv[i][1] == '-' && argv[i][2] == 0) {
 830             options = false;
 831             continue;
 832         }
 833 
 834         if (change_fallback) {
 835             fallback_mime_type = resolve_alias(argv[i]);
 836             change_fallback = false;
 837             continue;
 838         }
 839 
 840         if (options && is_fallback_option(argv[i])) {
 841             change_fallback = true;
 842             continue;
 843         }
 844 
 845         if (!handle_file(&bw, argv[i])) {
 846             errors++;
 847         }
 848         files++;
 849     }
 850 
 851     if (change_fallback) {
 852         flush(&bw);
 853         fprintf(stderr, ERROR_LINE("forgot new fallback MIME-type"));
 854         errors++;
 855         return errors;
 856     }
 857 
 858     // no filenames means use stdin as the only input
 859     if (files == 0) {
 860         if (!handle_reader(&bw, stdin, stdin_name)) {
 861             errors++;
 862         }
 863     }
 864 
 865     flush(&bw);
 866     return errors;
 867 }
 868 
 869 int main(int argc, char** argv) {
 870 #ifdef _WIN32
 871     setmode(fileno(stdin), O_BINARY);
 872     // ensure output lines end in LF instead of CRLF on windows
 873     setmode(fileno(stdout), O_BINARY);
 874     setmode(fileno(stderr), O_BINARY);
 875 #endif
 876 
 877     if (argc > 1 && is_help_option(argv[1])) {
 878         printf("%s", info);
 879         return 0;
 880     }
 881 
 882     // fill entries in the type-detection dispatch table
 883     memset(hdr_dispatch, 0, sizeof(hdr_dispatch));
 884     hdr_dispatch[0] = hdr_dispatch_0; // 0
 885     hdr_dispatch[26] = hdr_dispatch_26; // 26
 886     hdr_dispatch[31] = hdr_dispatch_31; // 31
 887     hdr_dispatch[35] = hdr_dispatch_35; // 35 #
 888     hdr_dispatch[37] = hdr_dispatch_37; // 37 %
 889     hdr_dispatch[40] = hdr_dispatch_40; // 40 (
 890     hdr_dispatch[46] = hdr_dispatch_46; // 46 .
 891     hdr_dispatch[56] = hdr_dispatch_56; // 56 8
 892     hdr_dispatch[60] = hdr_dispatch_60; // 60 <
 893     hdr_dispatch[65] = hdr_dispatch_65; // 65 A
 894     hdr_dispatch[66] = hdr_dispatch_66; // 66 B
 895     hdr_dispatch[70] = hdr_dispatch_70; // 70 F
 896     hdr_dispatch[71] = hdr_dispatch_71; // 71 G
 897     hdr_dispatch[73] = hdr_dispatch_73; // 73 I
 898     hdr_dispatch[77] = hdr_dispatch_77; // 77 M
 899     hdr_dispatch[79] = hdr_dispatch_79; // 79 O
 900     hdr_dispatch[80] = hdr_dispatch_80; // 80 P
 901     hdr_dispatch[82] = hdr_dispatch_82; // 82 R
 902     // hdr_dispatch[83] = hdr_dispatch_83; // 83 S
 903     hdr_dispatch[99] = hdr_dispatch_99; // 99 c
 904     hdr_dispatch[102] = hdr_dispatch_102; // 102 f
 905     hdr_dispatch[123] = hdr_dispatch_123; // 123 {
 906     hdr_dispatch[127] = hdr_dispatch_127; // 127
 907     hdr_dispatch[137] = hdr_dispatch_137; // 137
 908     hdr_dispatch[255] = hdr_dispatch_255; // 255
 909 
 910     return run(argc, argv, stdout) == 0 ? 0 : 1;
 911 }