File: datauri.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2024 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27     cc -Wall -s -O2 -o ./datauri ./datauri.c
  28 */
  29 
  30 #include <fcntl.h>
  31 #include <math.h>
  32 #include <stdbool.h>
  33 #include <stdint.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 
  38 #ifdef _WIN32
  39 #include <windows.h>
  40 #endif
  41 
  42 const char* info =
  43     ""
  44     "datauri [options...] [filenames...]\n"
  45     "\n"
  46     "\n"
  47     "Encode bytes as data-URIs, auto-detecting the file/data type using the first\n"
  48     "few bytes from each data/file stream. When given multiple inputs, the output\n"
  49     "will be multiple lines, one for each file given.\n"
  50     "\n"
  51     "Empty files/inputs result in empty lines. A simple dash (-) stands for the\n"
  52     "standard-input, which is also used automatically when not given any files.\n"
  53     "\n"
  54     "Data-URIs are base64-encoded text representations of arbitrary data, which\n"
  55     "include their payload's MIME-type, and which are directly useable/shareable\n"
  56     "in web-browsers as links, despite not looking like normal links/URIs.\n"
  57     "\n"
  58     "Some web-browsers limit the size of handled data-URIs to tens of kilobytes.\n"
  59     "\n"
  60     "\n"
  61     "Options\n"
  62     "\n"
  63     "    -h, -help, --h, --help              show this help message\n"
  64     "    -f, -fallback, --f, --fallback      change the fallback MIME type\n"
  65     "";
  66 
  67 const char* stdin_name = "<stdin>";
  68 
  69 const char* fallback_mime_type = "application/octet-stream";
  70 
  71 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at
  72 // stdout, it considerably speeds up this app, as intended
  73 typedef struct bufwriter {
  74     // buf is the buffer proper
  75     unsigned char* buf;
  76 
  77     // len is how many bytes of the buffer are currently being used
  78     size_t len;
  79 
  80     // cap is the capacity of the buffer, or the most bytes it can hold
  81     size_t cap;
  82 
  83     // out is the destination of all that's written into the buffer
  84     FILE* out;
  85 
  86     // done signals when/if no more output is accepted at the destination
  87     bool done;
  88 } bufwriter;
  89 
  90 // init_bufwriter is the constructor for type bufwriter
  91 void init_bufwriter(bufwriter* w, FILE* dst, unsigned char* buf, size_t cap) {
  92     w->buf = buf;
  93     w->len = 0;
  94     w->cap = cap;
  95     w->out = dst;
  96     w->done = false;
  97 }
  98 
  99 // flush does as it says: it empties the buffer after ensuring its bytes end
 100 // on their intended destination
 101 void flush(bufwriter* w) {
 102     if (w->len > 0 && fwrite(w->buf, w->len, 1, w->out) < 1) {
 103         w->done = true;
 104     }
 105     w->len = 0;
 106 }
 107 
 108 // write_bytes does as it says, minimizing the number of calls to fwrite
 109 void write_bytes(bufwriter* w, const unsigned char* src, size_t len) {
 110     if (w->len + len < w->cap) {
 111         // all bytes fit into buffer
 112         memcpy(w->buf + w->len, src, len);
 113         w->len += len;
 114         return;
 115     }
 116 
 117     // ensure current buffer bytes go out, before crossing strides
 118     flush(w);
 119 
 120     // emit all chunks striding beyond/at the buffer's capacity
 121     for (; len >= w->cap; src += w->cap, len -= w->cap) {
 122         if (fwrite(src, w->cap, 1, w->out) < 1) {
 123             w->done = true;
 124             return;
 125         }
 126     }
 127 
 128     // now all, if any, remaining bytes will fit into the buffer
 129     memcpy(w->buf, src, len);
 130     w->len += len;
 131 }
 132 
 133 // write_byte does as it says
 134 void write_byte(bufwriter* w, unsigned char b) {
 135     if (w->len >= w->cap) {
 136         flush(w);
 137     }
 138     w->buf[w->len] = b;
 139     w->len++;
 140 }
 141 
 142 // EMIT_CONST abstracts a common use-case of the bufwriter, which is
 143 // emitting string constants without their final null byte
 144 #define EMIT_CONST(w, x) write_bytes(w, (unsigned char*)x, sizeof(x) - 1)
 145 
 146 // can be anything: ensure this value differs from all other literal bytes
 147 // in the generic-headers table: failing that, its value could cause subtle
 148 // type-misdetection bugs; the value is chosen to be `obviously` findable
 149 // in the source, which also implies a constant beyond the ascii range, as
 150 // ascii char-constants are also used in the tables
 151 const unsigned char cba = 0xfd; // 253
 152 
 153 #define aiff "audio/aiff"
 154 #define au "audio/basic"
 155 #define avi "video/avi"
 156 #define avif "image/avif"
 157 #define bmp "image/x-bmp"
 158 #define caf "audio/x-caf"
 159 #define cur "image/vnd.microsoft.icon"
 160 #define css "text/css"
 161 #define csv "text/csv"
 162 #define djvu "image/x-djvu"
 163 #define elf "application/x-elf"
 164 #define exe "application/vnd.microsoft.portable-executable"
 165 #define flac "audio/x-flac"
 166 #define gif "image/gif"
 167 #define gz "application/gzip"
 168 #define heic "image/heic"
 169 #define htm "text/html"
 170 #define html "text/html"
 171 #define ico "image/x-icon"
 172 #define iso "application/octet-stream"
 173 #define jpg "image/jpeg"
 174 #define jpeg "image/jpeg"
 175 #define js "application/javascript"
 176 #define json "application/json"
 177 #define m4a "audio/aac"
 178 #define m4v "video/x-m4v"
 179 #define mid "audio/midi"
 180 #define mov "video/quicktime"
 181 #define mp4 "video/mp4"
 182 #define mp3 "audio/mpeg"
 183 #define mpg "video/mpeg"
 184 #define ogg "audio/ogg"
 185 #define opus "audio/opus"
 186 #define pdf "application/pdf"
 187 #define png "image/png"
 188 #define ps "application/postscript"
 189 #define psd "image/vnd.adobe.photoshop"
 190 #define rtf "application/rtf"
 191 #define sqlite3 "application/x-sqlite3"
 192 #define svg "image/svg+xml"
 193 #define text "text/plain"
 194 #define tiff "image/tiff"
 195 #define tsv "text/tsv"
 196 #define wasm "application/wasm"
 197 #define wav "audio/x-wav"
 198 #define webp "image/webp"
 199 #define webm "video/webm"
 200 #define xml "application/xml"
 201 #define zip "application/zip"
 202 #define zst "application/zstd"
 203 
 204 // format_descriptor ties a file-header pattern to its data-format type
 205 typedef struct format_descriptor {
 206     unsigned char header_length;
 207     unsigned char header_bytes[24];
 208     const char* mime;
 209 } format_descriptor;
 210 
 211 // starts_as tries to match header data to the pattern given: this includes
 212 // allowing `any byte` when the pattern indicates so, using a value reserved
 213 // for that purpose
 214 bool starts_as(unsigned char* x, size_t xlen, unsigned char* y, size_t ylen) {
 215     // when header data aren't enough for a pattern, there's no match
 216     if (xlen < ylen) {
 217         return false;
 218     }
 219 
 220     for (size_t i = 0; i < xlen; i++) {
 221         if (y[i] == cba) {
 222             // `can be anything` value always matches
 223             continue;
 224         }
 225 
 226         if (x[i] != y[i]) {
 227             return false;
 228         }
 229     }
 230 
 231     return true;
 232 }
 233 
 234 // wrapper func to make func `starts_as` harder to miscall
 235 inline bool match_header(unsigned char* d, size_t len, format_descriptor* to) {
 236     return starts_as(d, len, to->header_bytes, to->header_length);
 237 }
 238 
 239 // not confident enough to actually use this, and replace all table entries
 240 #define start_format_descriptor(...) \
 241     sizeof((unsigned char[]){ __VA_ARGS__ }) / sizeof(unsigned char), \
 242     { __VA_ARGS__ }
 243 
 244 // format markers with leading wildcards, which should be checked before the
 245 // normal ones: this is to prevent mismatches with the latter types, even
 246 // though you can make probabilistic arguments which suggest these mismatches
 247 // should be very unlikely in practice
 248 format_descriptor special_headers[] = {
 249     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a},
 250     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a},
 251     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4},
 252     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4},
 253     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v},
 254     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov},
 255     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic},
 256     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif},
 257     {
 258         24,
 259         {
 260             cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h',
 261             000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1',
 262         },
 263         m4a,
 264     },
 265     {0},
 266 };
 267 
 268 format_descriptor hdr_dispatch_0[] = {
 269     {4, {000, 000, 001, 0xBA}, mpg},
 270     {4, {000, 000, 001, 0xB3}, mpg},
 271     {4, {000, 000, 001, 000}, ico},
 272     {4, {000, 000, 002, 000}, cur},
 273     {4, {000, 'a', 's', 'm'}, wasm},
 274     {0},
 275 };
 276 
 277 format_descriptor hdr_dispatch_26[] = {
 278     {4, {0x1A, 0x45, 0xDF, 0xA3}, webm},
 279     {0},
 280 };
 281 
 282 format_descriptor hdr_dispatch_31[] = {
 283     // {4, {0x1F, 0x8B, 0x08, 0x08}, gz},
 284     {3, {0x1F, 0x8B, 0x08}, gz},
 285     {0},
 286 };
 287 
 288 format_descriptor hdr_dispatch_35[] = {
 289     {3, "#! ", text},
 290     {3, "#!/", text},
 291     {0},
 292 };
 293 
 294 format_descriptor hdr_dispatch_37[] = {
 295     {4, "%PDF", pdf},
 296     {4, "%!PS", ps},
 297     {0},
 298 };
 299 
 300 format_descriptor hdr_dispatch_40[] = {
 301     {4, {0x28, 0xB5, 0x2F, 0xFD}, zst},
 302     {0},
 303 };
 304 
 305 format_descriptor hdr_dispatch_46[] = {
 306     {4, ".snd", au},
 307     {0},
 308 };
 309 
 310 format_descriptor hdr_dispatch_56[] = {
 311     {4, "8BPS", psd},
 312     {0},
 313 };
 314 
 315 format_descriptor hdr_dispatch_60[] = {
 316     {14, "<!DOCTYPE html", html},
 317     {4, "<svg", svg},
 318     {5, "<html", html},
 319     {5, "<head", html},
 320     {5, "<body", html},
 321     {5, "<?xml", xml},
 322     {0},
 323 };
 324 
 325 format_descriptor hdr_dispatch_65[] = {
 326     {
 327         15,
 328         {
 329             'A', 'T', '&', 'T', 'F', 'O', 'R', 'M',
 330             cba, cba, cba, cba, 'D', 'J', 'V',
 331         },
 332         djvu,
 333     },
 334     {0},
 335 };
 336 
 337 format_descriptor hdr_dispatch_66[] = {
 338     {
 339         15,
 340         {
 341             'B', 'M', cba, cba, cba, cba, cba, cba,
 342             cba, cba, cba, cba, cba, cba, 0x28,
 343         },
 344         bmp,
 345     },
 346     {0},
 347 };
 348 
 349 format_descriptor hdr_dispatch_70[] = {
 350     {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff},
 351     {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff},
 352     {0},
 353 };
 354 
 355 format_descriptor hdr_dispatch_71[] = {
 356     {6, "GIF87a", gif},
 357     {6, "GIF89a", gif},
 358     {0},
 359 };
 360 
 361 format_descriptor hdr_dispatch_73[] = {
 362     {4, {'I', 'D', '3', 2}, mp3}, // ID3-format metadata
 363     {4, {'I', 'D', '3', 3}, mp3}, // ID3-format metadata
 364     {4, {'I', 'D', '3', 4}, mp3}, // ID3-format metadata
 365     {4, {'I', 'I', '*', 000}, tiff},
 366     {0},
 367 };
 368 
 369 format_descriptor hdr_dispatch_77[] = {
 370     {4, {'M', 'M', 000, '*'}, tiff},
 371     {4, "MThd", mid},
 372     {6, {'M', 'Z', cba, 000, cba, 000}, exe},
 373     // {6, {'M', 'Z', 0x90, 000, 003, 000}, exe},
 374     // {6, {'M', 'Z', 0x78, 000, 001, 000}, exe},
 375     // {6, {'M', 'Z', 'P', 000, 002, 000}, exe},
 376     {0},
 377 };
 378 
 379 format_descriptor hdr_dispatch_79[] = {
 380     {4, "OggS", ogg},
 381     {0},
 382 };
 383 
 384 format_descriptor hdr_dispatch_80[] = {
 385     {4, {'P', 'K', 003, 004}, zip},
 386     {0},
 387 };
 388 
 389 format_descriptor hdr_dispatch_82[] = {
 390     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp},
 391     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav},
 392     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi},
 393     {0},
 394 };
 395 
 396 format_descriptor hdr_dispatch_83[] = {
 397     {16, "SQLite format 3\x00", sqlite3},
 398     {0},
 399 };
 400 
 401 format_descriptor hdr_dispatch_99[] = {
 402     {8, {'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf},
 403     {0},
 404 };
 405 
 406 format_descriptor hdr_dispatch_102[] = {
 407     {4, "fLaC", flac},
 408     {0},
 409 };
 410 
 411 format_descriptor hdr_dispatch_123[] = {
 412     {4, "{\\rtf", rtf},
 413     {0},
 414 };
 415 
 416 format_descriptor hdr_dispatch_127[] = {
 417     {4, {127, 'E', 'L', 'F'}, elf},
 418     {0},
 419 };
 420 
 421 format_descriptor hdr_dispatch_137[] = {
 422     {8, {0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png},
 423     {0},
 424 };
 425 
 426 format_descriptor hdr_dispatch_255[] = {
 427     {3, {0xFF, 0xD8, 0xFF}, jpg},
 428     {5, {0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3},
 429     {2, {0xFF, 0xFB}, mp3},
 430     {0},
 431 };
 432 
 433 // hdr_dispatch groups format-description-groups by their first byte, thus
 434 // shortening total lookups for some data header
 435 //
 436 // notice how the `ftyp` data formats aren't handled here, since these can
 437 // start with any byte, instead of the literal value of the any-byte markers
 438 // they use
 439 //
 440 // all entries are arrays which must always end with a special entry whose
 441 // pattern-length is declared to be 0, since there's no explicit way to know
 442 // the length of these arrays when looping on them
 443 //
 444 // all non-null entries are setup explicitly, later in the code
 445 format_descriptor* hdr_dispatch[256] = {
 446     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 447     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 448     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 449     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 450     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 451     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 452     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 453     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 454     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 455     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 456     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 457     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 458     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 459     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 460     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 461     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 462     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 463     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 464     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 465     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 466     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 467     NULL, NULL, NULL, NULL,
 468 };
 469 
 470 // guess_mime tries to auto-detect a MIME-type from the header bytes given,
 471 // using the lookup-tables
 472 const char* guess_mime(unsigned char* buf, size_t len) {
 473     if (len == 0) {
 474         return NULL;
 475     }
 476 
 477     // try the patterns which allow any bytes at the very start
 478     for (size_t i = 0; special_headers[i].header_length > 0; i++) {
 479         if (match_header(buf, len, &special_headers[i])) {
 480             return special_headers[i].mime;
 481         }
 482     }
 483 
 484     format_descriptor* guesses = hdr_dispatch[buf[0]];
 485     if (guesses == NULL) {
 486         return fallback_mime_type;
 487     }
 488 
 489     for (size_t i = 0; guesses[i].header_length > 0; i++) {
 490         if (match_header(buf, len, &guesses[i])) {
 491             return guesses[i].mime;
 492         }
 493     }
 494     return fallback_mime_type;
 495 }
 496 
 497 bool is_mime_disabled(const char* mime) {
 498     return (mime[0] == 'n') && (
 499         strcmp(mime, "no") == 0 ||
 500         strcmp(mime, "nomime") == 0 || strcmp(mime, "no-mime") == 0 ||
 501         strcmp(mime, "none") == 0 || strcmp(mime, "not")
 502     );
 503 }
 504 
 505 // start_data_uri starts the output by declaring the data-URI to be an
 506 // auto-detected MIME-type; the return value is the auto-detection success
 507 bool start_data_uri(bufwriter* w, unsigned char* buf, size_t len) {
 508     const char* mime = guess_mime(buf, len);
 509     if (is_mime_disabled(mime)) {
 510         return true;
 511     }
 512     if (mime == NULL || mime[0] == 0) {
 513         return false;
 514     }
 515 
 516     EMIT_CONST(w, "data:");
 517     for (size_t i = 0; mime[i] != 0; i++) {
 518         write_byte(w, mime[i]);
 519     }
 520     EMIT_CONST(w, ";base64,");
 521     return true;
 522 }
 523 
 524 const unsigned char base64_lookup[] =
 525     ""
 526     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
 527     "";
 528 
 529 inline uint32_t combine_triple(unsigned char data[4]) {
 530     return (data[0] << 16) | (data[1] << 8) | data[2];
 531 }
 532 
 533 inline void emit_triple(bufwriter* w, uint32_t v) {
 534     write_byte(w, base64_lookup[0x3f & (v >> 18)]);
 535     write_byte(w, base64_lookup[0x3f & (v >> 12)]);
 536     write_byte(w, base64_lookup[0x3f & (v >> 6)]);
 537     write_byte(w, base64_lookup[0x3f & v]);
 538 }
 539 
 540 inline void emit_couple(bufwriter* w, uint32_t v) {
 541     write_byte(w, base64_lookup[0x3f & (v >> 18)]);
 542     write_byte(w, base64_lookup[0x3f & (v >> 12)]);
 543     write_byte(w, base64_lookup[0x3f & (v >> 6)]);
 544     write_byte(w, '=');
 545 }
 546 
 547 inline void emit_single(bufwriter* w, uint32_t v) {
 548     write_byte(w, base64_lookup[0x3f & (v >> 18)]);
 549     write_byte(w, base64_lookup[0x3f & (v >> 12)]);
 550     write_byte(w, '=');
 551     write_byte(w, '=');
 552 }
 553 
 554 bool handle_reader(bufwriter* w, FILE* src, const char* path) {
 555     // size of the input-buffer must be a multiple of 3
 556     unsigned char buf[48 * 1024];
 557     size_t chunks = 0;
 558     size_t where = 0;
 559     unsigned char triple[4];
 560 
 561     triple[0] = 0;
 562     triple[1] = 0;
 563     triple[2] = 0;
 564     triple[3] = 0;
 565 
 566     while (!w->done) {
 567         const size_t n = fread(&buf, sizeof(unsigned char), sizeof(buf), src);
 568         if (n < 1) {
 569             // assume input is over when no bytes were read
 570             break;
 571         }
 572 
 573         if (chunks == 0) {
 574             char* fmt = "\x1b[31mcan't auto-detect MIME type for %s\x1b[0m\n";
 575             if (!start_data_uri(w, buf, n)) {
 576                 write_byte(w, '\n');
 577                 flush(w);
 578                 fprintf(stderr, fmt, path);
 579                 return false;
 580             }
 581         }
 582         chunks++;
 583 
 584         for (size_t i = 0; i < n; i++) {
 585             triple[where] = buf[i];
 586             if (where < 2) {
 587                 where++;
 588             } else {
 589                 where = 0;
 590                 emit_triple(w, combine_triple(triple));
 591             }
 592         }
 593     }
 594 
 595     // empty inputs result in empty outputs
 596     if (chunks == 0) {
 597         return true;
 598     }
 599 
 600     // don't forget unemitted trailing bytes, if any: these need special
 601     // handling, as they include `=` signs; if the input bytes were a
 602     // multiple of 3, there won't be any trailing bytes
 603     switch (where) {
 604         case 1:
 605             triple[1] = 0;
 606             triple[2] = 0;
 607             emit_single(w, combine_triple(triple));
 608             break;
 609         case 2:
 610             triple[2] = 0;
 611             emit_couple(w, combine_triple(triple));
 612             break;
 613     }
 614 
 615     // end with a line-feed, so multiple input streams are each encoded in
 616     // their own line
 617     if (chunks > 0) {
 618         write_byte(w, '\n');
 619     }
 620     flush(w);
 621     return true;
 622 }
 623 
 624 // handle_file handles data from the filename given; returns false only when
 625 // an error happened
 626 bool handle_file(bufwriter* w, const char* path) {
 627     // a `-` filename stands for the standard input
 628     if (path[0] == '-' && path[1] == 0) {
 629         return handle_reader(w, stdin, stdin_name);
 630     }
 631 
 632     FILE* f = fopen(path, "rb");
 633     if (f == NULL) {
 634         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 635         return false;
 636     }
 637 
 638     const bool ok = handle_reader(w, f, path);
 639     fclose(f);
 640     return ok;
 641 }
 642 
 643 // is_help_option simplifies control-flow for func run
 644 bool is_help_option(char* s) {
 645     return s[0] == '-' && (
 646         strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 ||
 647         strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0
 648     );
 649 }
 650 
 651 // is_fallback_option simplifies control-flow for func run
 652 bool is_fallback_option(char* s) {
 653     return s[0] == '-' && (
 654            strcmp(s, "-f") == 0 || strcmp(s, "-fallback") == 0 ||
 655            strcmp(s, "--f") == 0 || strcmp(s, "--fallback") == 0
 656     );
 657 }
 658 
 659 const char* fallback_aliases[] = {
 660     // "text/json", "application/json",
 661 
 662     // "xbmp", "image/x-bmp",
 663     // "xflac", "audio/x-flac",
 664     // "xicon", "image/x-icon",
 665     // "xm4v", "video/x-m4v",
 666     // "xsqlite3", "application/x-sqlite3",
 667     // "xwav", "audio/x-wav",
 668     // "xwave", "audio/x-wav",
 669     // "x-bmp", "image/x-bmp",
 670     // "x-flac", "audio/x-flac",
 671     // "x-icon", "image/x-icon",
 672     // "x-m4v", "video/x-m4v",
 673     // "x-sqlite3", "application/x-sqlite3",
 674     // "x-wav", "audio/x-wav",
 675 
 676     "b", "application/octet-stream",
 677     "j", "application/json",
 678     "t", "text/plain",
 679     "u", "text/plain; charset=UTF-8",
 680 
 681     "e", "",
 682     "err", "",
 683     "error", "",
 684     "f", "",
 685     "fail", "",
 686 
 687     "aac", "audio/aac",
 688     "aif", "audio/aiff",
 689     "bin", "application/octet-stream",
 690     "binary", "application/octet-stream",
 691     "gzip", "application/gzip",
 692     "midi", "audio/midi",
 693     "mpeg", "video/mpeg",
 694     "octet", "application/octet-stream",
 695     "octetstream", "application/octet-stream",
 696     "octet-stream", "application/octet-stream",
 697     "plain", "text/plain",
 698     "sqlite", "application/x-sqlite3",
 699     "svg+xml", "image/svg+xml",
 700     "tif", "image/tiff",
 701     "utf8", "text/plain; charset=UTF-8",
 702     "utf-8", "text/plain; charset=UTF-8",
 703     "wave", "audio/x-wav",
 704     "zstd", "application/zstd",
 705 
 706     "aiff", "audio/aiff",
 707     "au", "audio/basic",
 708     "avi", "video/avi",
 709     "avif", "image/avif",
 710     "bmp", "image/x-bmp",
 711     "caf", "audio/x-caf",
 712     "cur", "image/vnd.microsoft.icon",
 713     "css", "text/css",
 714     "csv", "text/csv",
 715     "djvu", "image/x-djvu",
 716     "elf", "application/x-elf",
 717     "exe", "application/vnd.microsoft.portable-executable",
 718     "flac", "audio/x-flac",
 719     "gif", "image/gif",
 720     "gz", "application/gzip",
 721     "heic", "image/heic",
 722     "htm", "text/html",
 723     "html", "text/html",
 724     "ico", "image/x-icon",
 725     "iso", "application/octet-stream",
 726     "jpg", "image/jpeg",
 727     "jpeg", "image/jpeg",
 728     "js", "application/javascript",
 729     "json", "application/json",
 730     "m4a", "audio/aac",
 731     "m4v", "video/x-m4v",
 732     "mid", "audio/midi",
 733     "mov", "video/quicktime",
 734     "mp4", "video/mp4",
 735     "mp3", "audio/mpeg",
 736     "mpg", "video/mpeg",
 737     "ogg", "audio/ogg",
 738     "opus", "audio/opus",
 739     "pdf", "application/pdf",
 740     "png", "image/png",
 741     "ps", "application/postscript",
 742     "psd", "image/vnd.adobe.photoshop",
 743     "rtf", "application/rtf",
 744     "sqlite3", "application/x-sqlite3",
 745     "svg", "image/svg+xml",
 746     "text", "text/plain",
 747     "tiff", "image/tiff",
 748     "tsv", "text/tsv",
 749     "wasm", "application/wasm",
 750     "wav", "audio/x-wav",
 751     "webp", "image/webp",
 752     "webm", "video/webm",
 753     "xml", "application/xml",
 754     "zip", "application/zip",
 755     "zst", "application/zstd",
 756 };
 757 
 758 const char* adapt_fallback(char* name) {
 759     for (size_t i = 0; i < sizeof(fallback_aliases) / sizeof(char*); i += 2) {
 760         if (strcmp(name, fallback_aliases[i]) == 0) {
 761             return fallback_aliases[i + 1];
 762         }
 763     }
 764     return name;
 765 }
 766 
 767 // run returns the number of errors
 768 size_t run(int argc, char** argv) {
 769     // handle special cmd-line options
 770     for (size_t i = 1; i < argc; i++) {
 771         if (is_help_option(argv[i])) {
 772             // help option is handled right away, also quitting the app
 773             puts(info);
 774             return 0;
 775         }
 776     }
 777 
 778     bufwriter w;
 779     unsigned char buf[48 * 1024];
 780     init_bufwriter(&w, stdout, buf, sizeof(buf));
 781 
 782     size_t files = 0;
 783     size_t errors = 0;
 784     bool change_fallback = false;
 785 
 786     // handle all filenames given
 787     for (size_t i = 1; i < argc && !w.done; i++) {
 788         if (change_fallback) {
 789             fallback_mime_type = adapt_fallback(argv[i]);
 790             change_fallback = false;
 791             continue;
 792         }
 793 
 794         if (is_fallback_option(argv[i])) {
 795             change_fallback = true;
 796             continue;
 797         }
 798 
 799         if (!handle_file(&w, argv[i])) {
 800             errors++;
 801         }
 802         files++;
 803     }
 804 
 805     if (change_fallback) {
 806         fprintf(stderr, "\x1b[31mforgot new fallback MIME-type\x1b[0m\n");
 807         errors++;
 808     }
 809 
 810     // no filenames means use stdin as the only input
 811     if (files == 0) {
 812         if (!handle_reader(&w, stdin, stdin_name)) {
 813             errors++;
 814         }
 815     }
 816 
 817     return errors;
 818 }
 819 
 820 int main(int argc, char** argv) {
 821 #ifdef _WIN32
 822     setmode(fileno(stdin), O_BINARY);
 823     // ensure output lines end in LF instead of CRLF on windows
 824     setmode(fileno(stdout), O_BINARY);
 825     setmode(fileno(stderr), O_BINARY);
 826 #endif
 827 
 828     // disable automatic stdio buffering, in favor of explicit buffering
 829     setvbuf(stdin, NULL, _IONBF, 0);
 830     setvbuf(stdout, NULL, _IONBF, 0);
 831     setvbuf(stderr, NULL, _IONBF, 0);
 832 
 833     // fill entries in the type-detect dispatch table
 834     hdr_dispatch[0] = hdr_dispatch_0; // 0
 835     hdr_dispatch[26] = hdr_dispatch_26; // 26
 836     hdr_dispatch[31] = hdr_dispatch_31; // 31
 837     hdr_dispatch[35] = hdr_dispatch_35; // 35 #
 838     hdr_dispatch[37] = hdr_dispatch_37; // 37 %
 839     hdr_dispatch[40] = hdr_dispatch_40; // 40 (
 840     hdr_dispatch[46] = hdr_dispatch_46; // 46 .
 841     hdr_dispatch[56] = hdr_dispatch_56; // 56 8
 842     hdr_dispatch[60] = hdr_dispatch_60; // 60 <
 843     hdr_dispatch[65] = hdr_dispatch_65; // 65 A
 844     hdr_dispatch[66] = hdr_dispatch_66; // 66 B
 845     hdr_dispatch[70] = hdr_dispatch_70; // 70 F
 846     hdr_dispatch[71] = hdr_dispatch_71; // 71 G
 847     hdr_dispatch[73] = hdr_dispatch_73; // 73 I
 848     hdr_dispatch[77] = hdr_dispatch_77; // 77 M
 849     hdr_dispatch[79] = hdr_dispatch_79; // 79 O
 850     hdr_dispatch[80] = hdr_dispatch_80; // 80 P
 851     hdr_dispatch[82] = hdr_dispatch_82; // 82 R
 852     hdr_dispatch[83] = hdr_dispatch_83; // 83 S
 853     hdr_dispatch[99] = hdr_dispatch_99; // 99 c
 854     hdr_dispatch[102] = hdr_dispatch_102; // 102 f
 855     hdr_dispatch[123] = hdr_dispatch_123; // 123 {
 856     hdr_dispatch[127] = hdr_dispatch_127; // 127
 857     hdr_dispatch[137] = hdr_dispatch_137; // 137
 858     hdr_dispatch[255] = hdr_dispatch_255; // 255
 859 
 860     return run(argc, argv) == 0 ? 0 : 1;
 861 }