File: datauri.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./datauri ./datauri.c
  29 */
  30 
  31 #include <stdbool.h>
  32 #include <stdint.h>
  33 #include <stdio.h>
  34 #include <string.h>
  35 
  36 #ifdef _WIN32
  37 #include <windows.h>
  38 #endif
  39 
  40 #ifdef RED_ERRORS
  41 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  42 #ifdef __APPLE__
  43 #define ERROR_STYLE "\x1b[31m"
  44 #endif
  45 #define RESET_STYLE "\x1b[0m"
  46 #else
  47 #define ERROR_STYLE
  48 #define RESET_STYLE
  49 #endif
  50 
  51 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  52 
  53 #ifndef IBUF_SIZE
  54 #define IBUF_SIZE (32 * 1024)
  55 #endif
  56 
  57 #ifndef OBUF_SIZE
  58 #define OBUF_SIZE (8 * 1024)
  59 #endif
  60 
  61 const char* info = ""
  62 "datauri [options...] [filenames...]\n"
  63 "\n"
  64 "\n"
  65 "Encode bytes as data-URIs, auto-detecting the file/data type using the first\n"
  66 "few bytes from each data/file stream. When given multiple inputs, the output\n"
  67 "will be multiple lines, one for each file given.\n"
  68 "\n"
  69 "Empty files/inputs result in empty lines. A simple dash (-) stands for the\n"
  70 "standard-input, which is also used automatically when not given any files.\n"
  71 "\n"
  72 "Data-URIs are base64-encoded text representations of arbitrary data, which\n"
  73 "include their payload's MIME-type, and which are directly useable/shareable\n"
  74 "in web-browsers as links, despite not looking like normal links/URIs.\n"
  75 "\n"
  76 "Some web-browsers limit the size of handled data-URIs to tens of kilobytes.\n"
  77 "\n"
  78 "\n"
  79 "Options\n"
  80 "\n"
  81 "    -h, -help, --h, --help              show this help message\n"
  82 "    -f, -fallback, --f, --fallback      change the fallback MIME type\n"
  83 "";
  84 
  85 const char* stdin_name = "<stdin>";
  86 
  87 #define octet "application/octet-stream"
  88 
  89 const char* fallback_mime_type = octet;
  90 
  91 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at
  92 // stdout, it considerably speeds up this app, as intended
  93 typedef struct bufwriter {
  94     // buf is the buffer proper
  95     unsigned char* buf;
  96 
  97     // len is how many bytes of the buffer are currently being used
  98     size_t len;
  99 
 100     // cap is the capacity of the buffer, or the most bytes it can hold
 101     size_t cap;
 102 
 103     // out is the destination of all that's written into the buffer
 104     FILE* out;
 105 } bufwriter;
 106 
 107 void init_bufwriter(bufwriter* w, FILE* out, unsigned char* b, size_t cap) {
 108     w->buf = b;
 109     w->len = 0;
 110     w->cap = cap;
 111     w->out = out;
 112 }
 113 
 114 static inline void write_byte(bufwriter* w, unsigned char b) {
 115     if (w->len < w->cap) {
 116         w->buf[w->len++] = b;
 117         return;
 118     }
 119 
 120     fwrite(w->buf, 1, w->cap, w->out);
 121     w->buf[0] = b;
 122     w->len = 1;
 123 }
 124 
 125 void write_string(bufwriter* w, const char* s) {
 126     for (; *s != 0; s++) {
 127         write_byte(w, *s);
 128     }
 129 }
 130 
 131 void flush(bufwriter* w) {
 132     if (w->len > 0) {
 133         fwrite(w->buf, 1, w->len, w->out);
 134     }
 135     w->len = 0;
 136     fflush(w->out);
 137 }
 138 
 139 // can be anything: ensure this value differs from all other literal bytes
 140 // in the generic-headers table: failing that, its value could cause subtle
 141 // type-misdetection bugs; the value is chosen to be `obviously` findable
 142 // in the source, which also implies a constant beyond the ascii range, as
 143 // ascii char-constants are also used in the tables
 144 const unsigned char cba = 0xfd; // 253
 145 
 146 #define aiff "audio/aiff"
 147 #define au "audio/basic"
 148 #define avi "video/avi"
 149 #define avif "image/avif"
 150 #define bmp "image/x-bmp"
 151 #define caf "audio/x-caf"
 152 #define cur "image/vnd.microsoft.icon"
 153 #define css "text/css"
 154 #define csv "text/csv"
 155 #define djvu "image/x-djvu"
 156 #define elf "application/x-elf"
 157 #define exe "application/vnd.microsoft.portable-executable"
 158 #define flac "audio/x-flac"
 159 #define gif "image/gif"
 160 #define gz "application/gzip"
 161 #define heic "image/heic"
 162 #define htm "text/html"
 163 #define html "text/html"
 164 #define ico "image/x-icon"
 165 #define iso "application/octet-stream"
 166 #define jpeg "image/jpeg"
 167 #define js "application/javascript"
 168 #define json "application/json"
 169 #define m4a "audio/aac"
 170 #define m4v "video/x-m4v"
 171 #define midi "audio/midi"
 172 #define mov "video/quicktime"
 173 #define mp4 "video/mp4"
 174 #define mp3 "audio/mpeg"
 175 #define mpg "video/mpeg"
 176 #define ogg "audio/ogg"
 177 #define opus "audio/opus"
 178 #define pdf "application/pdf"
 179 #define png "image/png"
 180 #define ps "application/postscript"
 181 #define psd "image/vnd.adobe.photoshop"
 182 #define rtf "application/rtf"
 183 #define sqlite3 "application/x-sqlite3"
 184 #define svg "image/svg+xml"
 185 #define text "text/plain"
 186 #define tiff "image/tiff"
 187 #define tsv "text/tsv"
 188 #define utf8 "text/plain; charset=UTF-8"
 189 #define wasm "application/wasm"
 190 #define wav "audio/x-wav"
 191 #define webp "image/webp"
 192 #define webm "video/webm"
 193 #define xml "application/xml"
 194 #define zip "application/zip"
 195 #define zst "application/zstd"
 196 
 197 // format_descriptor ties a file-header pattern to its data-format type;
 198 // the 15-byte header-limit nicely aligns with the 1-byte length before it
 199 typedef struct format_descriptor {
 200     unsigned char header_length;
 201     unsigned char header_bytes[15];
 202     const char* mime;
 203 } format_descriptor;
 204 
 205 // starts_as tries to match header data to the pattern given: this includes
 206 // allowing `any byte` when the pattern indicates so, using a value reserved
 207 // for that purpose
 208 bool starts_as(const uint8_t* x, size_t xlen, const uint8_t* y, size_t ylen) {
 209     // when header data aren't enough for a pattern, there's no match
 210     if (xlen < ylen) {
 211         return false;
 212     }
 213 
 214     for (size_t i = 0; i < ylen; i++) {
 215         if (y[i] == cba) {
 216             // `can be anything` value always matches
 217             continue;
 218         }
 219 
 220         if (x[i] != y[i]) {
 221             return false;
 222         }
 223     }
 224 
 225     return true;
 226 }
 227 
 228 // not confident enough to actually use this, and replace all table entries
 229 #define start_format_descriptor(...) \
 230     sizeof((unsigned char[]){ __VA_ARGS__ }) / sizeof(unsigned char), \
 231     { __VA_ARGS__ }
 232 
 233 // format markers with leading wildcards, which should be checked before the
 234 // normal ones: this is to prevent mismatches with the latter types, even
 235 // though you can make probabilistic arguments which suggest these mismatches
 236 // should be very unlikely in practice
 237 format_descriptor special_headers[] = {
 238     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a},
 239     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a},
 240     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4},
 241     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4},
 242     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v},
 243     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov},
 244     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic},
 245     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif},
 246     // {
 247     //     24,
 248     //     {
 249     //         cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h',
 250     //         000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1',
 251     //     },
 252     //     m4a,
 253     // },
 254     {0},
 255 };
 256 
 257 format_descriptor hdr_dispatch_0[] = {
 258     {4, {000, 000, 001, 0xBA}, mpg},
 259     {4, {000, 000, 001, 0xB3}, mpg},
 260     {4, {000, 000, 001, 000}, ico},
 261     {4, {000, 000, 002, 000}, cur},
 262     {4, {000, 'a', 's', 'm'}, wasm},
 263     {0},
 264 };
 265 
 266 format_descriptor hdr_dispatch_26[] = {
 267     {4, {0x1A, 0x45, 0xDF, 0xA3}, webm},
 268     {0},
 269 };
 270 
 271 format_descriptor hdr_dispatch_31[] = {
 272     // {4, {0x1F, 0x8B, 0x08, 0x08}, gz},
 273     {3, {0x1F, 0x8B, 0x08}, gz},
 274     {0},
 275 };
 276 
 277 format_descriptor hdr_dispatch_35[] = {
 278     {3, "#! ", text},
 279     {3, "#!/", text},
 280     {0},
 281 };
 282 
 283 format_descriptor hdr_dispatch_37[] = {
 284     {4, "%PDF", pdf},
 285     {4, "%!PS", ps},
 286     {0},
 287 };
 288 
 289 format_descriptor hdr_dispatch_40[] = {
 290     {4, {0x28, 0xB5, 0x2F, 0xFD}, zst},
 291     {0},
 292 };
 293 
 294 format_descriptor hdr_dispatch_46[] = {
 295     {4, ".snd", au},
 296     {0},
 297 };
 298 
 299 format_descriptor hdr_dispatch_56[] = {
 300     {4, "8BPS", psd},
 301     {0},
 302 };
 303 
 304 format_descriptor hdr_dispatch_60[] = {
 305     {14, "<!DOCTYPE html", html},
 306     {4, "<svg", svg},
 307     {5, "<html", html},
 308     {5, "<head", html},
 309     {5, "<body", html},
 310     {5, "<?xml", xml},
 311     {0},
 312 };
 313 
 314 format_descriptor hdr_dispatch_65[] = {
 315     {
 316         15,
 317         {
 318             'A', 'T', '&', 'T', 'F', 'O', 'R', 'M',
 319             cba, cba, cba, cba, 'D', 'J', 'V',
 320         },
 321         djvu,
 322     },
 323     {0},
 324 };
 325 
 326 format_descriptor hdr_dispatch_66[] = {
 327     {
 328         15,
 329         {
 330             'B', 'M', cba, cba, cba, cba, cba, cba,
 331             cba, cba, cba, cba, cba, cba, 0x28,
 332         },
 333         bmp,
 334     },
 335     {0},
 336 };
 337 
 338 format_descriptor hdr_dispatch_70[] = {
 339     {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff},
 340     {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff},
 341     {0},
 342 };
 343 
 344 format_descriptor hdr_dispatch_71[] = {
 345     {6, "GIF87a", gif},
 346     {6, "GIF89a", gif},
 347     {0},
 348 };
 349 
 350 format_descriptor hdr_dispatch_73[] = {
 351     {4, {'I', 'D', '3', 2}, mp3}, // ID3-format metadata
 352     {4, {'I', 'D', '3', 3}, mp3}, // ID3-format metadata
 353     {4, {'I', 'D', '3', 4}, mp3}, // ID3-format metadata
 354     {4, {'I', 'I', '*', 000}, tiff},
 355     {0},
 356 };
 357 
 358 format_descriptor hdr_dispatch_77[] = {
 359     {4, {'M', 'M', 000, '*'}, tiff},
 360     {4, "MThd", midi},
 361     {6, {'M', 'Z', cba, 000, cba, 000}, exe},
 362     // {6, {'M', 'Z', 0x90, 000, 003, 000}, exe},
 363     // {6, {'M', 'Z', 0x78, 000, 001, 000}, exe},
 364     // {6, {'M', 'Z', 'P', 000, 002, 000}, exe},
 365     {0},
 366 };
 367 
 368 format_descriptor hdr_dispatch_79[] = {
 369     {4, "OggS", ogg},
 370     {0},
 371 };
 372 
 373 format_descriptor hdr_dispatch_80[] = {
 374     {4, {'P', 'K', 003, 004}, zip},
 375     {0},
 376 };
 377 
 378 format_descriptor hdr_dispatch_82[] = {
 379     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp},
 380     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav},
 381     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi},
 382     {0},
 383 };
 384 
 385 format_descriptor hdr_dispatch_83[] = {
 386     // {16, "SQLite format 3\x00", sqlite3},
 387     {0},
 388 };
 389 
 390 format_descriptor hdr_dispatch_99[] = {
 391     {8, {'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf},
 392     {0},
 393 };
 394 
 395 format_descriptor hdr_dispatch_102[] = {
 396     {4, "fLaC", flac},
 397     {0},
 398 };
 399 
 400 format_descriptor hdr_dispatch_123[] = {
 401     {4, "{\\rtf", rtf},
 402     {0},
 403 };
 404 
 405 format_descriptor hdr_dispatch_127[] = {
 406     {4, {127, 'E', 'L', 'F'}, elf},
 407     {0},
 408 };
 409 
 410 format_descriptor hdr_dispatch_137[] = {
 411     {8, {0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png},
 412     {0},
 413 };
 414 
 415 format_descriptor hdr_dispatch_255[] = {
 416     {3, {0xFF, 0xD8, 0xFF}, jpeg},
 417     {5, {0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3},
 418     {2, {0xFF, 0xFB}, mp3},
 419     {0},
 420 };
 421 
 422 // hdr_dispatch groups format-description-groups by their first byte, thus
 423 // shortening total lookups for some data header
 424 //
 425 // notice how the `ftyp` data formats aren't handled here, since these can
 426 // start with any byte, instead of the literal value of the any-byte markers
 427 // they use
 428 //
 429 // all entries are arrays which must always end with a special entry whose
 430 // pattern-length is declared to be 0, since there's no explicit way to know
 431 // the length of these arrays when looping on them
 432 //
 433 // all non-null entries are setup explicitly, later in the code
 434 format_descriptor* hdr_dispatch[256] = {
 435     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 436     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 437     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 438     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 439     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 440     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 441     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 442     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 443     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 444     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 445     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 446     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 447     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 448     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 449     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 450     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 451     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 452     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 453     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 454     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 455     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 456     NULL, NULL, NULL, NULL,
 457 };
 458 
 459 // guess_mime tries to auto-detect a MIME-type from the header bytes given,
 460 // using the lookup-tables
 461 const char* guess_mime(const unsigned char* buf, size_t len) {
 462     if (len == 0) {
 463         return NULL;
 464     }
 465 
 466     // just in case, start with the patterns which allow any first byte
 467     for (size_t i = 0; special_headers[i].header_length > 0; i++) {
 468         const unsigned char* hb = special_headers[i].header_bytes;
 469         const size_t hl = special_headers[i].header_length;
 470         if (starts_as(buf, len, hb, hl)) {
 471             return special_headers[i].mime;
 472         }
 473     }
 474 
 475     // the m4a-dash header exceeds the 15-byte limit of the lookup tables
 476     const uint8_t header1[24] = {
 477         cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h',
 478         000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1',
 479     };
 480     if (starts_as(buf, len, header1, sizeof(header1))) {
 481         return m4a;
 482     }
 483 
 484     // the sqlite3 header exceeds the 15-byte limit of the lookup tables
 485     const uint8_t header2[16] = "SQLite format 3\x00";
 486     if (starts_as(buf, len, header2, sizeof(header2))) {
 487         return sqlite3;
 488     }
 489 
 490     format_descriptor* guesses = hdr_dispatch[buf[0]];
 491     if (guesses == NULL) {
 492         return fallback_mime_type;
 493     }
 494 
 495     for (size_t i = 0; guesses[i].header_length > 0; i++) {
 496         const unsigned char* hb = guesses[i].header_bytes;
 497         const size_t hl = guesses[i].header_length;
 498         if (starts_as(buf, len, hb, hl)) {
 499             return guesses[i].mime;
 500         }
 501     }
 502 
 503     return fallback_mime_type;
 504 }
 505 
 506 bool is_mime_disabled(const char* mime) {
 507     return (mime != NULL) && (mime[0] == 'n') && (
 508         strcmp(mime, "no") == 0 ||
 509         strcmp(mime, "nomime") == 0 ||
 510         strcmp(mime, "no-mime") == 0 ||
 511         strcmp(mime, "none") == 0 ||
 512         strcmp(mime, "not") == 0
 513     );
 514 }
 515 
 516 // start_data_uri starts the output by declaring the data-URI to be an
 517 // auto-detected MIME-type; the return value is the auto-detection success
 518 bool start_data_uri(bufwriter* w, const unsigned char* buf, size_t len) {
 519     const char* mime = guess_mime(buf, len);
 520     if (is_mime_disabled(mime)) {
 521         return true;
 522     }
 523     if (mime == NULL || mime[0] == 0) {
 524         return false;
 525     }
 526 
 527     write_string(w, "data:");
 528     for (size_t i = 0; mime[i] != 0; i++) {
 529         write_byte(w, mime[i]);
 530     }
 531     write_string(w, ";base64,");
 532     return true;
 533 }
 534 
 535 const unsigned char base64_lookup[64] =
 536     ""
 537     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
 538     "";
 539 
 540 static inline uint32_t combine_triple(const unsigned char data[4]) {
 541     return (data[0] << 16) | (data[1] << 8) | (data[2] << 0);
 542 }
 543 
 544 static inline void emit_triple(bufwriter* w, uint32_t v) {
 545     write_byte(w, base64_lookup[0x3f & (v >> 18)]);
 546     write_byte(w, base64_lookup[0x3f & (v >> 12)]);
 547     write_byte(w, base64_lookup[0x3f & (v >> 6)]);
 548     write_byte(w, base64_lookup[0x3f & (v >> 0)]);
 549 }
 550 
 551 void emit_couple(bufwriter* w, uint32_t v) {
 552     write_byte(w, base64_lookup[0x3f & (v >> 18)]);
 553     write_byte(w, base64_lookup[0x3f & (v >> 12)]);
 554     write_byte(w, base64_lookup[0x3f & (v >> 6)]);
 555     write_byte(w, '=');
 556 }
 557 
 558 void emit_single(bufwriter* w, uint32_t v) {
 559     write_byte(w, base64_lookup[0x3f & (v >> 18)]);
 560     write_byte(w, base64_lookup[0x3f & (v >> 12)]);
 561     write_byte(w, '=');
 562     write_byte(w, '=');
 563 }
 564 
 565 bool handle_reader(bufwriter* w, FILE* src, const char* path) {
 566     unsigned char buf[IBUF_SIZE];
 567     uint64_t bytes = 0;
 568 
 569     // triple holds groups of 3 bytes at once, which is required by base64,
 570     // except for the last few bytes of input, which are padded with equals;
 571     // the 4th item is never used, but having it aligns things to 32 bits
 572     unsigned char triple[4];
 573     triple[0] = 0;
 574     triple[1] = 0;
 575     triple[2] = 0;
 576     triple[3] = 0;
 577 
 578     while (!feof(w->out)) {
 579         const size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src);
 580         if (n < 1) {
 581             // assume input is over when no bytes were read
 582             break;
 583         }
 584 
 585         if (bytes == 0 && !start_data_uri(w, buf, n)) {
 586             write_byte(w, '\n');
 587             flush(w);
 588             const char* msg = "can't auto-detect MIME type for";
 589             fprintf(stderr, ERROR_LINE("%s %s"), msg, path);
 590             return false;
 591         }
 592 
 593         size_t where = bytes % 3;
 594         for (size_t i = 0; i < n; i++, bytes++) {
 595             triple[where++] = buf[i];
 596             if (where == 3) {
 597                 emit_triple(w, combine_triple(triple));
 598                 where = 0;
 599             }
 600         }
 601     }
 602 
 603     // empty inputs result in empty outputs
 604     if (bytes == 0) {
 605         return true;
 606     }
 607 
 608     // don't forget unemitted trailing bytes, if any: these need special
 609     // handling, as they include `=` signs; if the input bytes were a
 610     // multiple of 3, there won't be any trailing bytes
 611     switch (bytes % 3) {
 612     case 1:
 613         triple[1] = 0;
 614         triple[2] = 0;
 615         emit_single(w, combine_triple(triple));
 616         break;
 617     case 2:
 618         triple[2] = 0;
 619         emit_couple(w, combine_triple(triple));
 620         break;
 621     }
 622 
 623     // end with a line-feed, so multiple input streams are each encoded in
 624     // their own line
 625     if (bytes > 0) {
 626         write_byte(w, '\n');
 627         flush(w);
 628     }
 629     return true;
 630 }
 631 
 632 // handle_file handles data from the filename given; returns false only when
 633 // an error happened
 634 bool handle_file(bufwriter* w, const char* path) {
 635     // a `-` filename stands for the standard input
 636     if (path[0] == '-' && path[1] == 0) {
 637         return handle_reader(w, stdin, stdin_name);
 638     }
 639 
 640     FILE* f = fopen(path, "rb");
 641     if (f == NULL) {
 642         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 643         return false;
 644     }
 645 
 646     const bool ok = handle_reader(w, f, path);
 647     fclose(f);
 648     return ok;
 649 }
 650 
 651 // is_help_option simplifies control-flow for func run
 652 bool is_help_option(const char* s) {
 653     return s[0] == '-' && (
 654         strcmp(s, "-h") == 0 ||
 655         strcmp(s, "-help") == 0 ||
 656         strcmp(s, "--h") == 0 ||
 657         strcmp(s, "--help") == 0
 658     );
 659 }
 660 
 661 // is_fallback_option simplifies control-flow for func run
 662 bool is_fallback_option(const char* s) {
 663     return s[0] == '-' && (
 664            strcmp(s, "-f") == 0 ||
 665            strcmp(s, "-fallback") == 0 ||
 666            strcmp(s, "--f") == 0 ||
 667            strcmp(s, "--fallback") == 0
 668     );
 669 }
 670 
 671 const char* fallback_aliases[192] = {
 672     // tiny shortcuts
 673     "b", octet,
 674     "j", json,
 675     "t", text,
 676     "u", utf8,
 677 
 678     // failure fallbacks
 679     "e", "",
 680     "err", "",
 681     "error", "",
 682     "f", "",
 683     "fail", "",
 684 
 685     // common mistakes
 686     "text/json", json,
 687 
 688     "aif", aiff,
 689     "aiff", aiff,
 690     "au", au,
 691     "avi", avi,
 692     "avif", avif,
 693     "bmp", bmp,
 694     "caf", caf,
 695     "cur", cur,
 696     "css", css,
 697     "csv", csv,
 698     "djvu", djvu,
 699     "elf", elf,
 700     "exe", exe,
 701     "flac", flac,
 702     "gif", gif,
 703     "gz", gz,
 704     "heic", heic,
 705     "html", html,
 706     "ico", ico,
 707     "iso", iso,
 708     "jpg", jpeg,
 709     "jpeg", jpeg,
 710     "js", js,
 711     "json", json,
 712     "m4a", m4a,
 713     "m4v", m4v,
 714     "midi", midi,
 715     "mov", mov,
 716     "mp4", mp4,
 717     "mp3", mp3,
 718     "mpeg", mpg,
 719     "ogg", ogg,
 720     "opus", opus,
 721     "pdf", pdf,
 722     "png", png,
 723     "ps", ps,
 724     "psd", psd,
 725     "rtf", rtf,
 726     "sqlite3", sqlite3,
 727     "svg", svg,
 728     "text", text,
 729     "tiff", tiff,
 730     "tsv", tsv,
 731     "wasm", wasm,
 732     "wav", wav,
 733     "webp", webp,
 734     "webm", webm,
 735     "xml", xml,
 736     "zip", zip,
 737     "zst", zst,
 738 
 739     // longer shortcuts
 740     "aac", m4a,
 741     "aif", aiff,
 742     "bin", octet,
 743     "binary", octet,
 744     "bits", octet,
 745     "gzip", gz,
 746     "htm", htm,
 747     "mid", midi,
 748     "mpg", mpg,
 749     "octet", octet,
 750     "octets", octet,
 751     "octetstream", octet,
 752     "octet-stream", octet,
 753     "plain", text,
 754     "sqlite", sqlite3,
 755     "svg+xml", svg,
 756     "tif", tiff,
 757     "utf8", utf8,
 758     "utf-8", utf8,
 759     "xbmp", bmp,
 760     "xcaf", caf,
 761     "xflac", flac,
 762     "xicon", ico,
 763     "xm4v", m4v,
 764     "xsqlite3", sqlite3,
 765     "xwav", wav,
 766     "xwave", wav,
 767     "x-bmp", bmp,
 768     "x-caf", caf,
 769     "x-flac", flac,
 770     "x-icon", ico,
 771     "x-m4v", m4v,
 772     "x-sqlite3", sqlite3,
 773     "x-wav", wav,
 774     "wave", wav,
 775     "zstd", zst,
 776 };
 777 
 778 const char* resolve_alias(const char* name) {
 779     const size_t n = sizeof(fallback_aliases) / sizeof(fallback_aliases[0]);
 780     for (size_t i = 0; i < n; i += 2) {
 781         if (strcmp(name, fallback_aliases[i]) == 0) {
 782             return fallback_aliases[i + 1];
 783         }
 784     }
 785     return name;
 786 }
 787 
 788 // run returns the number of errors
 789 int run(int argc, char** argv, FILE* w) {
 790     unsigned char outbuf[OBUF_SIZE];
 791     bufwriter bw;
 792     init_bufwriter(&bw, w, outbuf, sizeof(outbuf));
 793 
 794     size_t files = 0;
 795     size_t errors = 0;
 796     bool change_fallback = false;
 797 
 798     // handle all filenames given
 799     for (size_t i = 1; i < argc && !feof(w); i++) {
 800         if (change_fallback) {
 801             fallback_mime_type = resolve_alias(argv[i]);
 802             change_fallback = false;
 803             continue;
 804         }
 805 
 806         if (is_fallback_option(argv[i])) {
 807             change_fallback = true;
 808             continue;
 809         }
 810 
 811         if (!handle_file(&bw, argv[i])) {
 812             errors++;
 813         }
 814         files++;
 815     }
 816 
 817     if (change_fallback) {
 818         flush(&bw);
 819         fprintf(stderr, ERROR_LINE("forgot new fallback MIME-type"));
 820         errors++;
 821         return errors;
 822     }
 823 
 824     // no filenames means use stdin as the only input
 825     if (files == 0) {
 826         if (!handle_reader(&bw, stdin, stdin_name)) {
 827             errors++;
 828         }
 829     }
 830 
 831     flush(&bw);
 832     return errors;
 833 }
 834 
 835 int main(int argc, char** argv) {
 836 #ifdef _WIN32
 837     setmode(fileno(stdin), O_BINARY);
 838     // ensure output lines end in LF instead of CRLF on windows
 839     setmode(fileno(stdout), O_BINARY);
 840     setmode(fileno(stderr), O_BINARY);
 841 #endif
 842 
 843     if (argc > 1 && is_help_option(argv[1])) {
 844         printf("%s", info);
 845         return 0;
 846     }
 847 
 848     // fill entries in the type-detection dispatch table
 849     hdr_dispatch[0] = hdr_dispatch_0; // 0
 850     hdr_dispatch[26] = hdr_dispatch_26; // 26
 851     hdr_dispatch[31] = hdr_dispatch_31; // 31
 852     hdr_dispatch[35] = hdr_dispatch_35; // 35 #
 853     hdr_dispatch[37] = hdr_dispatch_37; // 37 %
 854     hdr_dispatch[40] = hdr_dispatch_40; // 40 (
 855     hdr_dispatch[46] = hdr_dispatch_46; // 46 .
 856     hdr_dispatch[56] = hdr_dispatch_56; // 56 8
 857     hdr_dispatch[60] = hdr_dispatch_60; // 60 <
 858     hdr_dispatch[65] = hdr_dispatch_65; // 65 A
 859     hdr_dispatch[66] = hdr_dispatch_66; // 66 B
 860     hdr_dispatch[70] = hdr_dispatch_70; // 70 F
 861     hdr_dispatch[71] = hdr_dispatch_71; // 71 G
 862     hdr_dispatch[73] = hdr_dispatch_73; // 73 I
 863     hdr_dispatch[77] = hdr_dispatch_77; // 77 M
 864     hdr_dispatch[79] = hdr_dispatch_79; // 79 O
 865     hdr_dispatch[80] = hdr_dispatch_80; // 80 P
 866     hdr_dispatch[82] = hdr_dispatch_82; // 82 R
 867     hdr_dispatch[83] = hdr_dispatch_83; // 83 S
 868     hdr_dispatch[99] = hdr_dispatch_99; // 99 c
 869     hdr_dispatch[102] = hdr_dispatch_102; // 102 f
 870     hdr_dispatch[123] = hdr_dispatch_123; // 123 {
 871     hdr_dispatch[127] = hdr_dispatch_127; // 127
 872     hdr_dispatch[137] = hdr_dispatch_137; // 137
 873     hdr_dispatch[255] = hdr_dispatch_255; // 255
 874 
 875     return run(argc, argv, stdout) == 0 ? 0 : 1;
 876 }