File: datauri.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./datauri ./datauri.c
  29 */
  30 
  31 #include <stdbool.h>
  32 #include <stdint.h>
  33 #include <stdio.h>
  34 #include <string.h>
  35 
  36 #ifdef _WIN32
  37 #include <windows.h>
  38 #endif
  39 
  40 const char* info =
  41     ""
  42     "datauri [options...] [filenames...]\n"
  43     "\n"
  44     "\n"
  45     "Encode bytes as data-URIs, auto-detecting the file/data type using the first\n"
  46     "few bytes from each data/file stream. When given multiple inputs, the output\n"
  47     "will be multiple lines, one for each file given.\n"
  48     "\n"
  49     "Empty files/inputs result in empty lines. A simple dash (-) stands for the\n"
  50     "standard-input, which is also used automatically when not given any files.\n"
  51     "\n"
  52     "Data-URIs are base64-encoded text representations of arbitrary data, which\n"
  53     "include their payload's MIME-type, and which are directly useable/shareable\n"
  54     "in web-browsers as links, despite not looking like normal links/URIs.\n"
  55     "\n"
  56     "Some web-browsers limit the size of handled data-URIs to tens of kilobytes.\n"
  57     "\n"
  58     "\n"
  59     "Options\n"
  60     "\n"
  61     "    -h, -help, --h, --help              show this help message\n"
  62     "    -f, -fallback, --f, --fallback      change the fallback MIME type\n"
  63     "";
  64 
  65 const char* stdin_name = "<stdin>";
  66 
  67 #define octet "application/octet-stream"
  68 
  69 const char* fallback_mime_type = octet;
  70 
  71 // EMIT_CONST abstracts emitting string constants without their final null byte
  72 #define EMIT_CONST(w, x) fwrite(x, sizeof(x) - 1, 1, w)
  73 
  74 void write_bytes(FILE* w, const unsigned char* src, size_t len) {
  75     fwrite(src, len, 1, w);
  76 }
  77 
  78 // can be anything: ensure this value differs from all other literal bytes
  79 // in the generic-headers table: failing that, its value could cause subtle
  80 // type-misdetection bugs; the value is chosen to be `obviously` findable
  81 // in the source, which also implies a constant beyond the ascii range, as
  82 // ascii char-constants are also used in the tables
  83 const unsigned char cba = 0xfd; // 253
  84 
  85 #define aiff "audio/aiff"
  86 #define au "audio/basic"
  87 #define avi "video/avi"
  88 #define avif "image/avif"
  89 #define bmp "image/x-bmp"
  90 #define caf "audio/x-caf"
  91 #define cur "image/vnd.microsoft.icon"
  92 #define css "text/css"
  93 #define csv "text/csv"
  94 #define djvu "image/x-djvu"
  95 #define elf "application/x-elf"
  96 #define exe "application/vnd.microsoft.portable-executable"
  97 #define flac "audio/x-flac"
  98 #define gif "image/gif"
  99 #define gz "application/gzip"
 100 #define heic "image/heic"
 101 #define htm "text/html"
 102 #define html "text/html"
 103 #define ico "image/x-icon"
 104 #define iso "application/octet-stream"
 105 #define jpeg "image/jpeg"
 106 #define js "application/javascript"
 107 #define json "application/json"
 108 #define m4a "audio/aac"
 109 #define m4v "video/x-m4v"
 110 #define midi "audio/midi"
 111 #define mov "video/quicktime"
 112 #define mp4 "video/mp4"
 113 #define mp3 "audio/mpeg"
 114 #define mpg "video/mpeg"
 115 #define ogg "audio/ogg"
 116 #define opus "audio/opus"
 117 #define pdf "application/pdf"
 118 #define png "image/png"
 119 #define ps "application/postscript"
 120 #define psd "image/vnd.adobe.photoshop"
 121 #define rtf "application/rtf"
 122 #define sqlite3 "application/x-sqlite3"
 123 #define svg "image/svg+xml"
 124 #define text "text/plain"
 125 #define tiff "image/tiff"
 126 #define tsv "text/tsv"
 127 #define utf8 "text/plain; charset=UTF-8"
 128 #define wasm "application/wasm"
 129 #define wav "audio/x-wav"
 130 #define webp "image/webp"
 131 #define webm "video/webm"
 132 #define xml "application/xml"
 133 #define zip "application/zip"
 134 #define zst "application/zstd"
 135 
 136 // format_descriptor ties a file-header pattern to its data-format type;
 137 // the 15-byte header-limit nicely aligns with the 1-byte length before it
 138 typedef struct format_descriptor {
 139     unsigned char header_length;
 140     unsigned char header_bytes[15];
 141     const char* mime;
 142 } format_descriptor;
 143 
 144 // starts_as tries to match header data to the pattern given: this includes
 145 // allowing `any byte` when the pattern indicates so, using a value reserved
 146 // for that purpose
 147 bool starts_as(const uint8_t* x, size_t xlen, const uint8_t* y, size_t ylen) {
 148     // when header data aren't enough for a pattern, there's no match
 149     if (xlen < ylen) {
 150         return false;
 151     }
 152 
 153     for (size_t i = 0; i < ylen; i++) {
 154         if (y[i] == cba) {
 155             // `can be anything` value always matches
 156             continue;
 157         }
 158 
 159         if (x[i] != y[i]) {
 160             return false;
 161         }
 162     }
 163 
 164     return true;
 165 }
 166 
 167 // not confident enough to actually use this, and replace all table entries
 168 #define start_format_descriptor(...) \
 169     sizeof((unsigned char[]){ __VA_ARGS__ }) / sizeof(unsigned char), \
 170     { __VA_ARGS__ }
 171 
 172 // format markers with leading wildcards, which should be checked before the
 173 // normal ones: this is to prevent mismatches with the latter types, even
 174 // though you can make probabilistic arguments which suggest these mismatches
 175 // should be very unlikely in practice
 176 format_descriptor special_headers[] = {
 177     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a},
 178     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a},
 179     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4},
 180     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4},
 181     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v},
 182     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov},
 183     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic},
 184     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif},
 185     // {
 186     //     24,
 187     //     {
 188     //         cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h',
 189     //         000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1',
 190     //     },
 191     //     m4a,
 192     // },
 193     {0},
 194 };
 195 
 196 // check_m4a_dash handles the only special-header which exceeds 15 bytes
 197 bool check_m4a_dash(const uint8_t* x, size_t xlen) {
 198     const unsigned char header[24] = {
 199         cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h',
 200         000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1',
 201     };
 202     return starts_as(x, xlen, header, sizeof(header));
 203 }
 204 
 205 format_descriptor hdr_dispatch_0[] = {
 206     {4, {000, 000, 001, 0xBA}, mpg},
 207     {4, {000, 000, 001, 0xB3}, mpg},
 208     {4, {000, 000, 001, 000}, ico},
 209     {4, {000, 000, 002, 000}, cur},
 210     {4, {000, 'a', 's', 'm'}, wasm},
 211     {0},
 212 };
 213 
 214 format_descriptor hdr_dispatch_26[] = {
 215     {4, {0x1A, 0x45, 0xDF, 0xA3}, webm},
 216     {0},
 217 };
 218 
 219 format_descriptor hdr_dispatch_31[] = {
 220     // {4, {0x1F, 0x8B, 0x08, 0x08}, gz},
 221     {3, {0x1F, 0x8B, 0x08}, gz},
 222     {0},
 223 };
 224 
 225 format_descriptor hdr_dispatch_35[] = {
 226     {3, "#! ", text},
 227     {3, "#!/", text},
 228     {0},
 229 };
 230 
 231 format_descriptor hdr_dispatch_37[] = {
 232     {4, "%PDF", pdf},
 233     {4, "%!PS", ps},
 234     {0},
 235 };
 236 
 237 format_descriptor hdr_dispatch_40[] = {
 238     {4, {0x28, 0xB5, 0x2F, 0xFD}, zst},
 239     {0},
 240 };
 241 
 242 format_descriptor hdr_dispatch_46[] = {
 243     {4, ".snd", au},
 244     {0},
 245 };
 246 
 247 format_descriptor hdr_dispatch_56[] = {
 248     {4, "8BPS", psd},
 249     {0},
 250 };
 251 
 252 format_descriptor hdr_dispatch_60[] = {
 253     {14, "<!DOCTYPE html", html},
 254     {4, "<svg", svg},
 255     {5, "<html", html},
 256     {5, "<head", html},
 257     {5, "<body", html},
 258     {5, "<?xml", xml},
 259     {0},
 260 };
 261 
 262 format_descriptor hdr_dispatch_65[] = {
 263     {
 264         15,
 265         {
 266             'A', 'T', '&', 'T', 'F', 'O', 'R', 'M',
 267             cba, cba, cba, cba, 'D', 'J', 'V',
 268         },
 269         djvu,
 270     },
 271     {0},
 272 };
 273 
 274 format_descriptor hdr_dispatch_66[] = {
 275     {
 276         15,
 277         {
 278             'B', 'M', cba, cba, cba, cba, cba, cba,
 279             cba, cba, cba, cba, cba, cba, 0x28,
 280         },
 281         bmp,
 282     },
 283     {0},
 284 };
 285 
 286 format_descriptor hdr_dispatch_70[] = {
 287     {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff},
 288     {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff},
 289     {0},
 290 };
 291 
 292 format_descriptor hdr_dispatch_71[] = {
 293     {6, "GIF87a", gif},
 294     {6, "GIF89a", gif},
 295     {0},
 296 };
 297 
 298 format_descriptor hdr_dispatch_73[] = {
 299     {4, {'I', 'D', '3', 2}, mp3}, // ID3-format metadata
 300     {4, {'I', 'D', '3', 3}, mp3}, // ID3-format metadata
 301     {4, {'I', 'D', '3', 4}, mp3}, // ID3-format metadata
 302     {4, {'I', 'I', '*', 000}, tiff},
 303     {0},
 304 };
 305 
 306 format_descriptor hdr_dispatch_77[] = {
 307     {4, {'M', 'M', 000, '*'}, tiff},
 308     {4, "MThd", midi},
 309     {6, {'M', 'Z', cba, 000, cba, 000}, exe},
 310     // {6, {'M', 'Z', 0x90, 000, 003, 000}, exe},
 311     // {6, {'M', 'Z', 0x78, 000, 001, 000}, exe},
 312     // {6, {'M', 'Z', 'P', 000, 002, 000}, exe},
 313     {0},
 314 };
 315 
 316 format_descriptor hdr_dispatch_79[] = {
 317     {4, "OggS", ogg},
 318     {0},
 319 };
 320 
 321 format_descriptor hdr_dispatch_80[] = {
 322     {4, {'P', 'K', 003, 004}, zip},
 323     {0},
 324 };
 325 
 326 format_descriptor hdr_dispatch_82[] = {
 327     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp},
 328     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav},
 329     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi},
 330     {0},
 331 };
 332 
 333 format_descriptor hdr_dispatch_83[] = {
 334     // {16, "SQLite format 3\x00", sqlite3},
 335     {0},
 336 };
 337 
 338 format_descriptor hdr_dispatch_99[] = {
 339     {8, {'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf},
 340     {0},
 341 };
 342 
 343 format_descriptor hdr_dispatch_102[] = {
 344     {4, "fLaC", flac},
 345     {0},
 346 };
 347 
 348 format_descriptor hdr_dispatch_123[] = {
 349     {4, "{\\rtf", rtf},
 350     {0},
 351 };
 352 
 353 format_descriptor hdr_dispatch_127[] = {
 354     {4, {127, 'E', 'L', 'F'}, elf},
 355     {0},
 356 };
 357 
 358 format_descriptor hdr_dispatch_137[] = {
 359     {8, {0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png},
 360     {0},
 361 };
 362 
 363 format_descriptor hdr_dispatch_255[] = {
 364     {3, {0xFF, 0xD8, 0xFF}, jpeg},
 365     {5, {0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3},
 366     {2, {0xFF, 0xFB}, mp3},
 367     {0},
 368 };
 369 
 370 // hdr_dispatch groups format-description-groups by their first byte, thus
 371 // shortening total lookups for some data header
 372 //
 373 // notice how the `ftyp` data formats aren't handled here, since these can
 374 // start with any byte, instead of the literal value of the any-byte markers
 375 // they use
 376 //
 377 // all entries are arrays which must always end with a special entry whose
 378 // pattern-length is declared to be 0, since there's no explicit way to know
 379 // the length of these arrays when looping on them
 380 //
 381 // all non-null entries are setup explicitly, later in the code
 382 format_descriptor* hdr_dispatch[256] = {
 383     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 384     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 385     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 386     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 387     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 388     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 389     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 390     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 391     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 392     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 393     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 394     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 395     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 396     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 397     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 398     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 399     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 400     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 401     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 402     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 403     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 404     NULL, NULL, NULL, NULL,
 405 };
 406 
 407 // guess_mime tries to auto-detect a MIME-type from the header bytes given,
 408 // using the lookup-tables
 409 const char* guess_mime(const unsigned char* buf, size_t len) {
 410     if (len == 0) {
 411         return NULL;
 412     }
 413 
 414     // try the patterns which allow any bytes at the very start
 415     for (size_t i = 0; special_headers[i].header_length > 0; i++) {
 416         const unsigned char* hb = special_headers[i].header_bytes;
 417         const size_t hl = special_headers[i].header_length;
 418         if (starts_as(buf, len, hb, hl)) {
 419             return special_headers[i].mime;
 420         }
 421     }
 422 
 423     // the m4a-dash header exceeds the 15-byte limit of the lookup tables
 424     if (check_m4a_dash(buf, len)) {
 425         return m4a;
 426     }
 427 
 428     format_descriptor* guesses = hdr_dispatch[buf[0]];
 429     if (guesses == NULL) {
 430         return fallback_mime_type;
 431     }
 432 
 433     for (size_t i = 0; guesses[i].header_length > 0; i++) {
 434         const unsigned char* hb = guesses[i].header_bytes;
 435         const size_t hl = guesses[i].header_length;
 436         if (starts_as(buf, len, hb, hl)) {
 437             return guesses[i].mime;
 438         }
 439     }
 440 
 441     // the sqlite3 header exceeds the 15-byte limit of the lookup tables
 442     const char header[16] = "SQLite format 3\x00";
 443     if (starts_as(buf, len, (uint8_t*)header, sizeof(header))) {
 444         return sqlite3;
 445     }
 446 
 447     return fallback_mime_type;
 448 }
 449 
 450 bool is_mime_disabled(const char* mime) {
 451     return (mime != NULL) && (mime[0] == 'n') && (
 452         strcmp(mime, "no") == 0 ||
 453         strcmp(mime, "nomime") == 0 || strcmp(mime, "no-mime") == 0 ||
 454         strcmp(mime, "none") == 0 || strcmp(mime, "not") == 0
 455     );
 456 }
 457 
 458 // start_data_uri starts the output by declaring the data-URI to be an
 459 // auto-detected MIME-type; the return value is the auto-detection success
 460 bool start_data_uri(FILE* w, const unsigned char* buf, size_t len) {
 461     const char* mime = guess_mime(buf, len);
 462     if (is_mime_disabled(mime)) {
 463         return true;
 464     }
 465     if (mime == NULL || mime[0] == 0) {
 466         return false;
 467     }
 468 
 469     EMIT_CONST(w, "data:");
 470     for (size_t i = 0; mime[i] != 0; i++) {
 471         putc(mime[i], w);
 472     }
 473     EMIT_CONST(w, ";base64,");
 474     return true;
 475 }
 476 
 477 const unsigned char base64_lookup[64] =
 478     ""
 479     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
 480     "";
 481 
 482 uint32_t combine_triple(const unsigned char data[4]) {
 483     return (data[0] << 16) | (data[1] << 8) | (data[2] << 0);
 484 }
 485 
 486 void emit_triple(FILE* w, uint32_t v) {
 487     putc(base64_lookup[0x3f & (v >> 18)], w);
 488     putc(base64_lookup[0x3f & (v >> 12)], w);
 489     putc(base64_lookup[0x3f & (v >> 6)], w);
 490     putc(base64_lookup[0x3f & (v >> 0)], w);
 491 }
 492 
 493 void emit_couple(FILE* w, uint32_t v) {
 494     putc(base64_lookup[0x3f & (v >> 18)], w);
 495     putc(base64_lookup[0x3f & (v >> 12)], w);
 496     putc(base64_lookup[0x3f & (v >> 6)], w);
 497     putc('=', w);
 498 }
 499 
 500 void emit_single(FILE* w, uint32_t v) {
 501     putc(base64_lookup[0x3f & (v >> 18)], w);
 502     putc(base64_lookup[0x3f & (v >> 12)], w);
 503     putc('=', w);
 504     putc('=', w);
 505 }
 506 
 507 bool handle_reader(FILE* w, FILE* src, const char* path) {
 508     unsigned char buf[32 * 1024];
 509     uint64_t bytes = 0;
 510 
 511     // triple holds groups of 3 bytes at once, which is required by base64,
 512     // except for the last few bytes of input, which are padded with equals;
 513     // the 4th item is never used, but having it aligns things to 32 bits
 514     unsigned char triple[4];
 515 
 516     triple[0] = 0;
 517     triple[1] = 0;
 518     triple[2] = 0;
 519     triple[3] = 0;
 520 
 521     while (!feof(w)) {
 522         const size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src);
 523         if (n < 1) {
 524             // assume input is over when no bytes were read
 525             break;
 526         }
 527 
 528         if (bytes == 0 && !start_data_uri(w, buf, n)) {
 529             putc('\n', w);
 530             const char* msg = "can't auto-detect MIME type for";
 531             fprintf(stderr, "\x1b[31m%s %s\x1b[0m\n", msg, path);
 532             return false;
 533         }
 534 
 535         size_t where = bytes % 3;
 536         for (size_t i = 0; i < n; i++, bytes++) {
 537             triple[where++] = buf[i];
 538             if (where == 3) {
 539                 emit_triple(w, combine_triple(triple));
 540                 where = 0;
 541             }
 542         }
 543     }
 544 
 545     // empty inputs result in empty outputs
 546     if (bytes == 0) {
 547         return true;
 548     }
 549 
 550     // don't forget unemitted trailing bytes, if any: these need special
 551     // handling, as they include `=` signs; if the input bytes were a
 552     // multiple of 3, there won't be any trailing bytes
 553     switch (bytes % 3) {
 554         case 1:
 555             triple[1] = 0;
 556             triple[2] = 0;
 557             emit_single(w, combine_triple(triple));
 558             break;
 559         case 2:
 560             triple[2] = 0;
 561             emit_couple(w, combine_triple(triple));
 562             break;
 563     }
 564 
 565     // end with a line-feed, so multiple input streams are each encoded in
 566     // their own line
 567     if (bytes > 0) {
 568         putc('\n', w);
 569     }
 570     return true;
 571 }
 572 
 573 // handle_file handles data from the filename given; returns false only when
 574 // an error happened
 575 bool handle_file(FILE* w, const char* path) {
 576     // a `-` filename stands for the standard input
 577     if (path[0] == '-' && path[1] == 0) {
 578         return handle_reader(w, stdin, stdin_name);
 579     }
 580 
 581     FILE* f = fopen(path, "rb");
 582     if (f == NULL) {
 583         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 584         return false;
 585     }
 586 
 587     const bool ok = handle_reader(w, f, path);
 588     fclose(f);
 589     return ok;
 590 }
 591 
 592 // is_help_option simplifies control-flow for func run
 593 bool is_help_option(const char* s) {
 594     return s[0] == '-' && (
 595         strcmp(s, "-h") == 0 ||
 596         strcmp(s, "-help") == 0 ||
 597         strcmp(s, "--h") == 0 ||
 598         strcmp(s, "--help") == 0
 599     );
 600 }
 601 
 602 // is_fallback_option simplifies control-flow for func run
 603 bool is_fallback_option(const char* s) {
 604     return s[0] == '-' && (
 605            strcmp(s, "-f") == 0 ||
 606            strcmp(s, "-fallback") == 0 ||
 607            strcmp(s, "--f") == 0 ||
 608            strcmp(s, "--fallback") == 0
 609     );
 610 }
 611 
 612 const char* fallback_aliases[192] = {
 613     // tiny shortcuts
 614     "b", octet,
 615     "j", json,
 616     "t", text,
 617     "u", utf8,
 618 
 619     // failure fallbacks
 620     "e", "",
 621     "err", "",
 622     "error", "",
 623     "f", "",
 624     "fail", "",
 625 
 626     // common mistakes
 627     "text/json", json,
 628 
 629     "aif", aiff,
 630     "aiff", aiff,
 631     "au", au,
 632     "avi", avi,
 633     "avif", avif,
 634     "bmp", bmp,
 635     "caf", caf,
 636     "cur", cur,
 637     "css", css,
 638     "csv", csv,
 639     "djvu", djvu,
 640     "elf", elf,
 641     "exe", exe,
 642     "flac", flac,
 643     "gif", gif,
 644     "gz", gz,
 645     "heic", heic,
 646     "html", html,
 647     "ico", ico,
 648     "iso", iso,
 649     "jpg", jpeg,
 650     "jpeg", jpeg,
 651     "js", js,
 652     "json", json,
 653     "m4a", m4a,
 654     "m4v", m4v,
 655     "midi", midi,
 656     "mov", mov,
 657     "mp4", mp4,
 658     "mp3", mp3,
 659     "mpeg", mpg,
 660     "ogg", ogg,
 661     "opus", opus,
 662     "pdf", pdf,
 663     "png", png,
 664     "ps", ps,
 665     "psd", psd,
 666     "rtf", rtf,
 667     "sqlite3", sqlite3,
 668     "svg", svg,
 669     "text", text,
 670     "tiff", tiff,
 671     "tsv", tsv,
 672     "wasm", wasm,
 673     "wav", wav,
 674     "webp", webp,
 675     "webm", webm,
 676     "xml", xml,
 677     "zip", zip,
 678     "zst", zst,
 679 
 680     // longer shortcuts
 681     "aac", m4a,
 682     "aif", aiff,
 683     "bin", octet,
 684     "binary", octet,
 685     "bits", octet,
 686     "gzip", gz,
 687     "htm", htm,
 688     "mid", midi,
 689     "mpg", mpg,
 690     "octet", octet,
 691     "octets", octet,
 692     "octetstream", octet,
 693     "octet-stream", octet,
 694     "plain", text,
 695     "sqlite", sqlite3,
 696     "svg+xml", svg,
 697     "tif", tiff,
 698     "utf8", utf8,
 699     "utf-8", utf8,
 700     "xbmp", bmp,
 701     "xcaf", caf,
 702     "xflac", flac,
 703     "xicon", ico,
 704     "xm4v", m4v,
 705     "xsqlite3", sqlite3,
 706     "xwav", wav,
 707     "xwave", wav,
 708     "x-bmp", bmp,
 709     "x-caf", caf,
 710     "x-flac", flac,
 711     "x-icon", ico,
 712     "x-m4v", m4v,
 713     "x-sqlite3", sqlite3,
 714     "x-wav", wav,
 715     "wave", wav,
 716     "zstd", zst,
 717 };
 718 
 719 const char* resolve_alias(const char* name) {
 720     const size_t n = sizeof(fallback_aliases) / sizeof(fallback_aliases[0]);
 721     for (size_t i = 0; i < n; i += 2) {
 722         if (strcmp(name, fallback_aliases[i]) == 0) {
 723             return fallback_aliases[i + 1];
 724         }
 725     }
 726     return name;
 727 }
 728 
 729 // run returns the number of errors
 730 int run(int argc, char** argv, FILE* w) {
 731     size_t files = 0;
 732     size_t errors = 0;
 733     bool change_fallback = false;
 734 
 735     // handle all filenames given
 736     for (size_t i = 1; i < argc && !feof(w); i++) {
 737         if (change_fallback) {
 738             fallback_mime_type = resolve_alias(argv[i]);
 739             change_fallback = false;
 740             continue;
 741         }
 742 
 743         if (is_fallback_option(argv[i])) {
 744             change_fallback = true;
 745             continue;
 746         }
 747 
 748         if (!handle_file(w, argv[i])) {
 749             errors++;
 750         }
 751         files++;
 752     }
 753 
 754     if (change_fallback) {
 755         fprintf(stderr, "\x1b[31mforgot new fallback MIME-type\x1b[0m\n");
 756         errors++;
 757         return errors;
 758     }
 759 
 760     // no filenames means use stdin as the only input
 761     if (files == 0) {
 762         if (!handle_reader(w, stdin, stdin_name)) {
 763             errors++;
 764         }
 765     }
 766 
 767     return errors;
 768 }
 769 
 770 int main(int argc, char** argv) {
 771 #ifdef _WIN32
 772     setmode(fileno(stdin), O_BINARY);
 773     // ensure output lines end in LF instead of CRLF on windows
 774     setmode(fileno(stdout), O_BINARY);
 775     setmode(fileno(stderr), O_BINARY);
 776 #endif
 777 
 778     if (argc > 1 && is_help_option(argv[1])) {
 779         puts(info);
 780         return 0;
 781     }
 782 
 783     // fill entries in the type-detection dispatch table
 784     hdr_dispatch[0] = hdr_dispatch_0; // 0
 785     hdr_dispatch[26] = hdr_dispatch_26; // 26
 786     hdr_dispatch[31] = hdr_dispatch_31; // 31
 787     hdr_dispatch[35] = hdr_dispatch_35; // 35 #
 788     hdr_dispatch[37] = hdr_dispatch_37; // 37 %
 789     hdr_dispatch[40] = hdr_dispatch_40; // 40 (
 790     hdr_dispatch[46] = hdr_dispatch_46; // 46 .
 791     hdr_dispatch[56] = hdr_dispatch_56; // 56 8
 792     hdr_dispatch[60] = hdr_dispatch_60; // 60 <
 793     hdr_dispatch[65] = hdr_dispatch_65; // 65 A
 794     hdr_dispatch[66] = hdr_dispatch_66; // 66 B
 795     hdr_dispatch[70] = hdr_dispatch_70; // 70 F
 796     hdr_dispatch[71] = hdr_dispatch_71; // 71 G
 797     hdr_dispatch[73] = hdr_dispatch_73; // 73 I
 798     hdr_dispatch[77] = hdr_dispatch_77; // 77 M
 799     hdr_dispatch[79] = hdr_dispatch_79; // 79 O
 800     hdr_dispatch[80] = hdr_dispatch_80; // 80 P
 801     hdr_dispatch[82] = hdr_dispatch_82; // 82 R
 802     hdr_dispatch[83] = hdr_dispatch_83; // 83 S
 803     hdr_dispatch[99] = hdr_dispatch_99; // 99 c
 804     hdr_dispatch[102] = hdr_dispatch_102; // 102 f
 805     hdr_dispatch[123] = hdr_dispatch_123; // 123 {
 806     hdr_dispatch[127] = hdr_dispatch_127; // 127
 807     hdr_dispatch[137] = hdr_dispatch_137; // 137
 808     hdr_dispatch[255] = hdr_dispatch_255; // 255
 809 
 810     return run(argc, argv, stdout) == 0 ? 0 : 1;
 811 }