File: datauri.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2024 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./datauri ./datauri.c
  29 */
  30 
  31 #include <fcntl.h>
  32 #include <math.h>
  33 #include <stdbool.h>
  34 #include <stdint.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <string.h>
  38 
  39 #ifdef _WIN32
  40 #include <windows.h>
  41 #endif
  42 
  43 const char* info =
  44     ""
  45     "datauri [options...] [filenames...]\n"
  46     "\n"
  47     "\n"
  48     "Encode bytes as data-URIs, auto-detecting the file/data type using the first\n"
  49     "few bytes from each data/file stream. When given multiple inputs, the output\n"
  50     "will be multiple lines, one for each file given.\n"
  51     "\n"
  52     "Empty files/inputs result in empty lines. A simple dash (-) stands for the\n"
  53     "standard-input, which is also used automatically when not given any files.\n"
  54     "\n"
  55     "Data-URIs are base64-encoded text representations of arbitrary data, which\n"
  56     "include their payload's MIME-type, and which are directly useable/shareable\n"
  57     "in web-browsers as links, despite not looking like normal links/URIs.\n"
  58     "\n"
  59     "Some web-browsers limit the size of handled data-URIs to tens of kilobytes.\n"
  60     "\n"
  61     "\n"
  62     "Options\n"
  63     "\n"
  64     "    -h, -help, --h, --help              show this help message\n"
  65     "    -f, -fallback, --f, --fallback      change the fallback MIME type\n"
  66     "";
  67 
  68 const char* stdin_name = "<stdin>";
  69 
  70 const char* fallback_mime_type = "application/octet-stream";
  71 
  72 // EMIT_CONST abstracts emitting string constants without their final null byte
  73 #define EMIT_CONST(w, x) fwrite(x, sizeof(x) - 1, 1, w)
  74 
  75 inline void write_bytes(FILE* w, const unsigned char* src, size_t len) {
  76     fwrite(src, len, 1, w);
  77 }
  78 
  79 // can be anything: ensure this value differs from all other literal bytes
  80 // in the generic-headers table: failing that, its value could cause subtle
  81 // type-misdetection bugs; the value is chosen to be `obviously` findable
  82 // in the source, which also implies a constant beyond the ascii range, as
  83 // ascii char-constants are also used in the tables
  84 const unsigned char cba = 0xfd; // 253
  85 
  86 #define aiff "audio/aiff"
  87 #define au "audio/basic"
  88 #define avi "video/avi"
  89 #define avif "image/avif"
  90 #define bmp "image/x-bmp"
  91 #define caf "audio/x-caf"
  92 #define cur "image/vnd.microsoft.icon"
  93 #define css "text/css"
  94 #define csv "text/csv"
  95 #define djvu "image/x-djvu"
  96 #define elf "application/x-elf"
  97 #define exe "application/vnd.microsoft.portable-executable"
  98 #define flac "audio/x-flac"
  99 #define gif "image/gif"
 100 #define gz "application/gzip"
 101 #define heic "image/heic"
 102 #define htm "text/html"
 103 #define html "text/html"
 104 #define ico "image/x-icon"
 105 #define iso "application/octet-stream"
 106 #define jpg "image/jpeg"
 107 #define jpeg "image/jpeg"
 108 #define js "application/javascript"
 109 #define json "application/json"
 110 #define m4a "audio/aac"
 111 #define m4v "video/x-m4v"
 112 #define mid "audio/midi"
 113 #define mov "video/quicktime"
 114 #define mp4 "video/mp4"
 115 #define mp3 "audio/mpeg"
 116 #define mpg "video/mpeg"
 117 #define ogg "audio/ogg"
 118 #define opus "audio/opus"
 119 #define pdf "application/pdf"
 120 #define png "image/png"
 121 #define ps "application/postscript"
 122 #define psd "image/vnd.adobe.photoshop"
 123 #define rtf "application/rtf"
 124 #define sqlite3 "application/x-sqlite3"
 125 #define svg "image/svg+xml"
 126 #define text "text/plain"
 127 #define tiff "image/tiff"
 128 #define tsv "text/tsv"
 129 #define wasm "application/wasm"
 130 #define wav "audio/x-wav"
 131 #define webp "image/webp"
 132 #define webm "video/webm"
 133 #define xml "application/xml"
 134 #define zip "application/zip"
 135 #define zst "application/zstd"
 136 
 137 // format_descriptor ties a file-header pattern to its data-format type
 138 typedef struct format_descriptor {
 139     unsigned char header_length;
 140     unsigned char header_bytes[24];
 141     const char* mime;
 142 } format_descriptor;
 143 
 144 // starts_as tries to match header data to the pattern given: this includes
 145 // allowing `any byte` when the pattern indicates so, using a value reserved
 146 // for that purpose
 147 bool starts_as(const uint8_t* x, size_t xlen, const uint8_t* y, size_t ylen) {
 148     // when header data aren't enough for a pattern, there's no match
 149     if (xlen < ylen) {
 150         return false;
 151     }
 152 
 153     for (size_t i = 0; i < xlen; i++) {
 154         if (y[i] == cba) {
 155             // `can be anything` value always matches
 156             continue;
 157         }
 158 
 159         if (x[i] != y[i]) {
 160             return false;
 161         }
 162     }
 163 
 164     return true;
 165 }
 166 
 167 // wrapper func to make func `starts_as` harder to miscall
 168 inline bool match_header(unsigned char* d, size_t len, format_descriptor* to) {
 169     return starts_as(d, len, to->header_bytes, to->header_length);
 170 }
 171 
 172 // not confident enough to actually use this, and replace all table entries
 173 #define start_format_descriptor(...) \
 174     sizeof((unsigned char[]){ __VA_ARGS__ }) / sizeof(unsigned char), \
 175     { __VA_ARGS__ }
 176 
 177 // format markers with leading wildcards, which should be checked before the
 178 // normal ones: this is to prevent mismatches with the latter types, even
 179 // though you can make probabilistic arguments which suggest these mismatches
 180 // should be very unlikely in practice
 181 format_descriptor special_headers[] = {
 182     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a},
 183     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a},
 184     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4},
 185     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4},
 186     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v},
 187     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov},
 188     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic},
 189     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif},
 190     {
 191         24,
 192         {
 193             cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h',
 194             000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1',
 195         },
 196         m4a,
 197     },
 198     {0},
 199 };
 200 
 201 format_descriptor hdr_dispatch_0[] = {
 202     {4, {000, 000, 001, 0xBA}, mpg},
 203     {4, {000, 000, 001, 0xB3}, mpg},
 204     {4, {000, 000, 001, 000}, ico},
 205     {4, {000, 000, 002, 000}, cur},
 206     {4, {000, 'a', 's', 'm'}, wasm},
 207     {0},
 208 };
 209 
 210 format_descriptor hdr_dispatch_26[] = {
 211     {4, {0x1A, 0x45, 0xDF, 0xA3}, webm},
 212     {0},
 213 };
 214 
 215 format_descriptor hdr_dispatch_31[] = {
 216     // {4, {0x1F, 0x8B, 0x08, 0x08}, gz},
 217     {3, {0x1F, 0x8B, 0x08}, gz},
 218     {0},
 219 };
 220 
 221 format_descriptor hdr_dispatch_35[] = {
 222     {3, "#! ", text},
 223     {3, "#!/", text},
 224     {0},
 225 };
 226 
 227 format_descriptor hdr_dispatch_37[] = {
 228     {4, "%PDF", pdf},
 229     {4, "%!PS", ps},
 230     {0},
 231 };
 232 
 233 format_descriptor hdr_dispatch_40[] = {
 234     {4, {0x28, 0xB5, 0x2F, 0xFD}, zst},
 235     {0},
 236 };
 237 
 238 format_descriptor hdr_dispatch_46[] = {
 239     {4, ".snd", au},
 240     {0},
 241 };
 242 
 243 format_descriptor hdr_dispatch_56[] = {
 244     {4, "8BPS", psd},
 245     {0},
 246 };
 247 
 248 format_descriptor hdr_dispatch_60[] = {
 249     {14, "<!DOCTYPE html", html},
 250     {4, "<svg", svg},
 251     {5, "<html", html},
 252     {5, "<head", html},
 253     {5, "<body", html},
 254     {5, "<?xml", xml},
 255     {0},
 256 };
 257 
 258 format_descriptor hdr_dispatch_65[] = {
 259     {
 260         15,
 261         {
 262             'A', 'T', '&', 'T', 'F', 'O', 'R', 'M',
 263             cba, cba, cba, cba, 'D', 'J', 'V',
 264         },
 265         djvu,
 266     },
 267     {0},
 268 };
 269 
 270 format_descriptor hdr_dispatch_66[] = {
 271     {
 272         15,
 273         {
 274             'B', 'M', cba, cba, cba, cba, cba, cba,
 275             cba, cba, cba, cba, cba, cba, 0x28,
 276         },
 277         bmp,
 278     },
 279     {0},
 280 };
 281 
 282 format_descriptor hdr_dispatch_70[] = {
 283     {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff},
 284     {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff},
 285     {0},
 286 };
 287 
 288 format_descriptor hdr_dispatch_71[] = {
 289     {6, "GIF87a", gif},
 290     {6, "GIF89a", gif},
 291     {0},
 292 };
 293 
 294 format_descriptor hdr_dispatch_73[] = {
 295     {4, {'I', 'D', '3', 2}, mp3}, // ID3-format metadata
 296     {4, {'I', 'D', '3', 3}, mp3}, // ID3-format metadata
 297     {4, {'I', 'D', '3', 4}, mp3}, // ID3-format metadata
 298     {4, {'I', 'I', '*', 000}, tiff},
 299     {0},
 300 };
 301 
 302 format_descriptor hdr_dispatch_77[] = {
 303     {4, {'M', 'M', 000, '*'}, tiff},
 304     {4, "MThd", mid},
 305     {6, {'M', 'Z', cba, 000, cba, 000}, exe},
 306     // {6, {'M', 'Z', 0x90, 000, 003, 000}, exe},
 307     // {6, {'M', 'Z', 0x78, 000, 001, 000}, exe},
 308     // {6, {'M', 'Z', 'P', 000, 002, 000}, exe},
 309     {0},
 310 };
 311 
 312 format_descriptor hdr_dispatch_79[] = {
 313     {4, "OggS", ogg},
 314     {0},
 315 };
 316 
 317 format_descriptor hdr_dispatch_80[] = {
 318     {4, {'P', 'K', 003, 004}, zip},
 319     {0},
 320 };
 321 
 322 format_descriptor hdr_dispatch_82[] = {
 323     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp},
 324     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav},
 325     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi},
 326     {0},
 327 };
 328 
 329 format_descriptor hdr_dispatch_83[] = {
 330     {16, "SQLite format 3\x00", sqlite3},
 331     {0},
 332 };
 333 
 334 format_descriptor hdr_dispatch_99[] = {
 335     {8, {'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf},
 336     {0},
 337 };
 338 
 339 format_descriptor hdr_dispatch_102[] = {
 340     {4, "fLaC", flac},
 341     {0},
 342 };
 343 
 344 format_descriptor hdr_dispatch_123[] = {
 345     {4, "{\\rtf", rtf},
 346     {0},
 347 };
 348 
 349 format_descriptor hdr_dispatch_127[] = {
 350     {4, {127, 'E', 'L', 'F'}, elf},
 351     {0},
 352 };
 353 
 354 format_descriptor hdr_dispatch_137[] = {
 355     {8, {0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png},
 356     {0},
 357 };
 358 
 359 format_descriptor hdr_dispatch_255[] = {
 360     {3, {0xFF, 0xD8, 0xFF}, jpg},
 361     {5, {0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3},
 362     {2, {0xFF, 0xFB}, mp3},
 363     {0},
 364 };
 365 
 366 // hdr_dispatch groups format-description-groups by their first byte, thus
 367 // shortening total lookups for some data header
 368 //
 369 // notice how the `ftyp` data formats aren't handled here, since these can
 370 // start with any byte, instead of the literal value of the any-byte markers
 371 // they use
 372 //
 373 // all entries are arrays which must always end with a special entry whose
 374 // pattern-length is declared to be 0, since there's no explicit way to know
 375 // the length of these arrays when looping on them
 376 //
 377 // all non-null entries are setup explicitly, later in the code
 378 format_descriptor* hdr_dispatch[256] = {
 379     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 380     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 381     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 382     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 383     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 384     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 385     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 386     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 387     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 388     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 389     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 390     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 391     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 392     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 393     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 394     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 395     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 396     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 397     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 398     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 399     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 400     NULL, NULL, NULL, NULL,
 401 };
 402 
 403 // guess_mime tries to auto-detect a MIME-type from the header bytes given,
 404 // using the lookup-tables
 405 const char* guess_mime(unsigned char* buf, size_t len) {
 406     if (len == 0) {
 407         return NULL;
 408     }
 409 
 410     // try the patterns which allow any bytes at the very start
 411     for (size_t i = 0; special_headers[i].header_length > 0; i++) {
 412         if (match_header(buf, len, &special_headers[i])) {
 413             return special_headers[i].mime;
 414         }
 415     }
 416 
 417     format_descriptor* guesses = hdr_dispatch[buf[0]];
 418     if (guesses == NULL) {
 419         return fallback_mime_type;
 420     }
 421 
 422     for (size_t i = 0; guesses[i].header_length > 0; i++) {
 423         if (match_header(buf, len, &guesses[i])) {
 424             return guesses[i].mime;
 425         }
 426     }
 427     return fallback_mime_type;
 428 }
 429 
 430 bool is_mime_disabled(const char* mime) {
 431     return (mime != NULL) && (mime[0] == 'n') && (
 432         strcmp(mime, "no") == 0 ||
 433         strcmp(mime, "nomime") == 0 || strcmp(mime, "no-mime") == 0 ||
 434         strcmp(mime, "none") == 0 || strcmp(mime, "not") == 0
 435     );
 436 }
 437 
 438 // start_data_uri starts the output by declaring the data-URI to be an
 439 // auto-detected MIME-type; the return value is the auto-detection success
 440 bool start_data_uri(FILE* w, unsigned char* buf, size_t len) {
 441     const char* mime = guess_mime(buf, len);
 442     if (is_mime_disabled(mime)) {
 443         return true;
 444     }
 445     if (mime == NULL || mime[0] == 0) {
 446         return false;
 447     }
 448 
 449     EMIT_CONST(w, "data:");
 450     for (size_t i = 0; mime[i] != 0; i++) {
 451         putc(mime[i], w);
 452     }
 453     EMIT_CONST(w, ";base64,");
 454     return true;
 455 }
 456 
 457 const unsigned char base64_lookup[] =
 458     ""
 459     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
 460     "";
 461 
 462 inline uint32_t combine_triple(const unsigned char data[4]) {
 463     return (data[0] << 16) | (data[1] << 8) | data[2];
 464 }
 465 
 466 inline void emit_triple(FILE* w, uint32_t v) {
 467     putc(base64_lookup[0x3f & (v >> 18)], w);
 468     putc(base64_lookup[0x3f & (v >> 12)], w);
 469     putc(base64_lookup[0x3f & (v >> 6)], w);
 470     putc(base64_lookup[0x3f & v], w);
 471 }
 472 
 473 inline void emit_couple(FILE* w, uint32_t v) {
 474     putc(base64_lookup[0x3f & (v >> 18)], w);
 475     putc(base64_lookup[0x3f & (v >> 12)], w);
 476     putc(base64_lookup[0x3f & (v >> 6)], w);
 477     putc('=', w);
 478 }
 479 
 480 inline void emit_single(FILE* w, uint32_t v) {
 481     putc(base64_lookup[0x3f & (v >> 18)], w);
 482     putc(base64_lookup[0x3f & (v >> 12)], w);
 483     putc('=', w);
 484     putc('=', w);
 485 }
 486 
 487 bool handle_reader(FILE* w, FILE* src, const char* path) {
 488     unsigned char buf[32 * 1024];
 489     size_t chunks = 0;
 490     size_t where = 0;
 491 
 492     // triple holds groups of 3 bytes at once, which is required by base64,
 493     // except for the last few bytes of input, which are padded with equals;
 494     // the 4th item is never used, but having it aligns things to 32 bits
 495     unsigned char triple[4];
 496 
 497     triple[0] = 0;
 498     triple[1] = 0;
 499     triple[2] = 0;
 500     triple[3] = 0;
 501 
 502     while (!feof(w)) {
 503         const size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src);
 504         if (n < 1) {
 505             // assume input is over when no bytes were read
 506             break;
 507         }
 508 
 509         if (chunks == 0 && !start_data_uri(w, buf, n)) {
 510             char* fmt = "\x1b[31mcan't auto-detect MIME type for %s\x1b[0m\n";
 511             putc('\n', w);
 512             fprintf(stderr, fmt, path);
 513             return false;
 514         }
 515         chunks++;
 516 
 517         for (size_t i = 0; i < n; i++) {
 518             triple[where] = buf[i];
 519             if (where < 2) {
 520                 where++;
 521             } else {
 522                 emit_triple(w, combine_triple(triple));
 523                 where = 0;
 524             }
 525         }
 526     }
 527 
 528     // empty inputs result in empty outputs
 529     if (chunks == 0) {
 530         return true;
 531     }
 532 
 533     // don't forget unemitted trailing bytes, if any: these need special
 534     // handling, as they include `=` signs; if the input bytes were a
 535     // multiple of 3, there won't be any trailing bytes
 536     switch (where) {
 537         case 1:
 538             triple[1] = 0;
 539             triple[2] = 0;
 540             emit_single(w, combine_triple(triple));
 541             break;
 542         case 2:
 543             triple[2] = 0;
 544             emit_couple(w, combine_triple(triple));
 545             break;
 546     }
 547 
 548     // end with a line-feed, so multiple input streams are each encoded in
 549     // their own line
 550     if (chunks > 0) {
 551         putc('\n', w);
 552     }
 553     return true;
 554 }
 555 
 556 // handle_file handles data from the filename given; returns false only when
 557 // an error happened
 558 bool handle_file(FILE* w, const char* path) {
 559     // a `-` filename stands for the standard input
 560     if (path[0] == '-' && path[1] == 0) {
 561         return handle_reader(w, stdin, stdin_name);
 562     }
 563 
 564     FILE* f = fopen(path, "rb");
 565     if (f == NULL) {
 566         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 567         return false;
 568     }
 569 
 570     const bool ok = handle_reader(w, f, path);
 571     fclose(f);
 572     return ok;
 573 }
 574 
 575 // is_help_option simplifies control-flow for func run
 576 bool is_help_option(char* s) {
 577     return s[0] == '-' && (
 578         strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 ||
 579         strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0
 580     );
 581 }
 582 
 583 // is_fallback_option simplifies control-flow for func run
 584 bool is_fallback_option(char* s) {
 585     return s[0] == '-' && (
 586            strcmp(s, "-f") == 0 || strcmp(s, "-fallback") == 0 ||
 587            strcmp(s, "--f") == 0 || strcmp(s, "--fallback") == 0
 588     );
 589 }
 590 
 591 const char* fallback_aliases[] = {
 592     // "text/json", "application/json",
 593 
 594     // "xbmp", "image/x-bmp",
 595     // "xflac", "audio/x-flac",
 596     // "xicon", "image/x-icon",
 597     // "xm4v", "video/x-m4v",
 598     // "xsqlite3", "application/x-sqlite3",
 599     // "xwav", "audio/x-wav",
 600     // "xwave", "audio/x-wav",
 601     // "x-bmp", "image/x-bmp",
 602     // "x-flac", "audio/x-flac",
 603     // "x-icon", "image/x-icon",
 604     // "x-m4v", "video/x-m4v",
 605     // "x-sqlite3", "application/x-sqlite3",
 606     // "x-wav", "audio/x-wav",
 607 
 608     "b", "application/octet-stream",
 609     "j", "application/json",
 610     "t", "text/plain",
 611     "u", "text/plain; charset=UTF-8",
 612 
 613     "e", "",
 614     "err", "",
 615     "error", "",
 616     "f", "",
 617     "fail", "",
 618 
 619     "aac", "audio/aac",
 620     "aif", "audio/aiff",
 621     "bin", "application/octet-stream",
 622     "binary", "application/octet-stream",
 623     "gzip", "application/gzip",
 624     "midi", "audio/midi",
 625     "mpeg", "video/mpeg",
 626     "octet", "application/octet-stream",
 627     "octetstream", "application/octet-stream",
 628     "octet-stream", "application/octet-stream",
 629     "plain", "text/plain",
 630     "sqlite", "application/x-sqlite3",
 631     "svg+xml", "image/svg+xml",
 632     "tif", "image/tiff",
 633     "utf8", "text/plain; charset=UTF-8",
 634     "utf-8", "text/plain; charset=UTF-8",
 635     "wave", "audio/x-wav",
 636     "zstd", "application/zstd",
 637 
 638     "aiff", "audio/aiff",
 639     "au", "audio/basic",
 640     "avi", "video/avi",
 641     "avif", "image/avif",
 642     "bmp", "image/x-bmp",
 643     "caf", "audio/x-caf",
 644     "cur", "image/vnd.microsoft.icon",
 645     "css", "text/css",
 646     "csv", "text/csv",
 647     "djvu", "image/x-djvu",
 648     "elf", "application/x-elf",
 649     "exe", "application/vnd.microsoft.portable-executable",
 650     "flac", "audio/x-flac",
 651     "gif", "image/gif",
 652     "gz", "application/gzip",
 653     "heic", "image/heic",
 654     "htm", "text/html",
 655     "html", "text/html",
 656     "ico", "image/x-icon",
 657     "iso", "application/octet-stream",
 658     "jpg", "image/jpeg",
 659     "jpeg", "image/jpeg",
 660     "js", "application/javascript",
 661     "json", "application/json",
 662     "m4a", "audio/aac",
 663     "m4v", "video/x-m4v",
 664     "mid", "audio/midi",
 665     "mov", "video/quicktime",
 666     "mp4", "video/mp4",
 667     "mp3", "audio/mpeg",
 668     "mpg", "video/mpeg",
 669     "ogg", "audio/ogg",
 670     "opus", "audio/opus",
 671     "pdf", "application/pdf",
 672     "png", "image/png",
 673     "ps", "application/postscript",
 674     "psd", "image/vnd.adobe.photoshop",
 675     "rtf", "application/rtf",
 676     "sqlite3", "application/x-sqlite3",
 677     "svg", "image/svg+xml",
 678     "text", "text/plain",
 679     "tiff", "image/tiff",
 680     "tsv", "text/tsv",
 681     "wasm", "application/wasm",
 682     "wav", "audio/x-wav",
 683     "webp", "image/webp",
 684     "webm", "video/webm",
 685     "xml", "application/xml",
 686     "zip", "application/zip",
 687     "zst", "application/zstd",
 688 };
 689 
 690 const char* resolve_alias(char* name) {
 691     const size_t n = sizeof(fallback_aliases) / sizeof(fallback_aliases[0]);
 692     for (size_t i = 0; i < n; i += 2) {
 693         if (strcmp(name, fallback_aliases[i]) == 0) {
 694             return fallback_aliases[i + 1];
 695         }
 696     }
 697     return name;
 698 }
 699 
 700 // run returns the number of errors
 701 int run(int argc, char** argv, FILE* w) {
 702     // handle special cmd-line options
 703     for (size_t i = 1; i < argc; i++) {
 704         if (is_help_option(argv[i])) {
 705             // help option is handled right away, also quitting the app
 706             puts(info);
 707             return 0;
 708         }
 709     }
 710 
 711     size_t files = 0;
 712     size_t errors = 0;
 713     bool change_fallback = false;
 714 
 715     // handle all filenames given
 716     for (size_t i = 1; i < argc && !feof(w); i++) {
 717         if (change_fallback) {
 718             fallback_mime_type = resolve_alias(argv[i]);
 719             change_fallback = false;
 720             continue;
 721         }
 722 
 723         if (is_fallback_option(argv[i])) {
 724             change_fallback = true;
 725             continue;
 726         }
 727 
 728         if (!handle_file(w, argv[i])) {
 729             errors++;
 730         }
 731         files++;
 732     }
 733 
 734     if (change_fallback) {
 735         fprintf(stderr, "\x1b[31mforgot new fallback MIME-type\x1b[0m\n");
 736         errors++;
 737     }
 738 
 739     // no filenames means use stdin as the only input
 740     if (files == 0) {
 741         if (!handle_reader(w, stdin, stdin_name)) {
 742             errors++;
 743         }
 744     }
 745 
 746     return errors;
 747 }
 748 
 749 int main(int argc, char** argv) {
 750 #ifdef _WIN32
 751     setmode(fileno(stdin), O_BINARY);
 752     // ensure output lines end in LF instead of CRLF on windows
 753     setmode(fileno(stdout), O_BINARY);
 754     setmode(fileno(stderr), O_BINARY);
 755 #endif
 756 
 757     // fill entries in the type-detect dispatch table
 758     hdr_dispatch[0] = hdr_dispatch_0; // 0
 759     hdr_dispatch[26] = hdr_dispatch_26; // 26
 760     hdr_dispatch[31] = hdr_dispatch_31; // 31
 761     hdr_dispatch[35] = hdr_dispatch_35; // 35 #
 762     hdr_dispatch[37] = hdr_dispatch_37; // 37 %
 763     hdr_dispatch[40] = hdr_dispatch_40; // 40 (
 764     hdr_dispatch[46] = hdr_dispatch_46; // 46 .
 765     hdr_dispatch[56] = hdr_dispatch_56; // 56 8
 766     hdr_dispatch[60] = hdr_dispatch_60; // 60 <
 767     hdr_dispatch[65] = hdr_dispatch_65; // 65 A
 768     hdr_dispatch[66] = hdr_dispatch_66; // 66 B
 769     hdr_dispatch[70] = hdr_dispatch_70; // 70 F
 770     hdr_dispatch[71] = hdr_dispatch_71; // 71 G
 771     hdr_dispatch[73] = hdr_dispatch_73; // 73 I
 772     hdr_dispatch[77] = hdr_dispatch_77; // 77 M
 773     hdr_dispatch[79] = hdr_dispatch_79; // 79 O
 774     hdr_dispatch[80] = hdr_dispatch_80; // 80 P
 775     hdr_dispatch[82] = hdr_dispatch_82; // 82 R
 776     hdr_dispatch[83] = hdr_dispatch_83; // 83 S
 777     hdr_dispatch[99] = hdr_dispatch_99; // 99 c
 778     hdr_dispatch[102] = hdr_dispatch_102; // 102 f
 779     hdr_dispatch[123] = hdr_dispatch_123; // 123 {
 780     hdr_dispatch[127] = hdr_dispatch_127; // 127
 781     hdr_dispatch[137] = hdr_dispatch_137; // 137
 782     hdr_dispatch[255] = hdr_dispatch_255; // 255
 783 
 784     return run(argc, argv, stdout) == 0 ? 0 : 1;
 785 }