File: datauri.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./datauri ./datauri.c
  29 */
  30 
  31 #include <fcntl.h>
  32 #include <math.h>
  33 #include <stdbool.h>
  34 #include <stdint.h>
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <string.h>
  38 
  39 #ifdef _WIN32
  40 #include <windows.h>
  41 #endif
  42 
  43 const char* info =
  44     ""
  45     "datauri [options...] [filenames...]\n"
  46     "\n"
  47     "\n"
  48     "Encode bytes as data-URIs, auto-detecting the file/data type using the first\n"
  49     "few bytes from each data/file stream. When given multiple inputs, the output\n"
  50     "will be multiple lines, one for each file given.\n"
  51     "\n"
  52     "Empty files/inputs result in empty lines. A simple dash (-) stands for the\n"
  53     "standard-input, which is also used automatically when not given any files.\n"
  54     "\n"
  55     "Data-URIs are base64-encoded text representations of arbitrary data, which\n"
  56     "include their payload's MIME-type, and which are directly useable/shareable\n"
  57     "in web-browsers as links, despite not looking like normal links/URIs.\n"
  58     "\n"
  59     "Some web-browsers limit the size of handled data-URIs to tens of kilobytes.\n"
  60     "\n"
  61     "\n"
  62     "Options\n"
  63     "\n"
  64     "    -h, -help, --h, --help              show this help message\n"
  65     "    -f, -fallback, --f, --fallback      change the fallback MIME type\n"
  66     "";
  67 
  68 const char* stdin_name = "<stdin>";
  69 
  70 const char* fallback_mime_type = "application/octet-stream";
  71 
  72 // EMIT_CONST abstracts emitting string constants without their final null byte
  73 #define EMIT_CONST(w, x) fwrite(x, sizeof(x) - 1, 1, w)
  74 
  75 inline void write_bytes(FILE* w, const unsigned char* src, size_t len) {
  76     fwrite(src, len, 1, w);
  77 }
  78 
  79 // can be anything: ensure this value differs from all other literal bytes
  80 // in the generic-headers table: failing that, its value could cause subtle
  81 // type-misdetection bugs; the value is chosen to be `obviously` findable
  82 // in the source, which also implies a constant beyond the ascii range, as
  83 // ascii char-constants are also used in the tables
  84 const unsigned char cba = 0xfd; // 253
  85 
  86 #define aiff "audio/aiff"
  87 #define au "audio/basic"
  88 #define avi "video/avi"
  89 #define avif "image/avif"
  90 #define bmp "image/x-bmp"
  91 #define caf "audio/x-caf"
  92 #define cur "image/vnd.microsoft.icon"
  93 #define css "text/css"
  94 #define csv "text/csv"
  95 #define djvu "image/x-djvu"
  96 #define elf "application/x-elf"
  97 #define exe "application/vnd.microsoft.portable-executable"
  98 #define flac "audio/x-flac"
  99 #define gif "image/gif"
 100 #define gz "application/gzip"
 101 #define heic "image/heic"
 102 #define htm "text/html"
 103 #define html "text/html"
 104 #define ico "image/x-icon"
 105 #define iso "application/octet-stream"
 106 #define jpg "image/jpeg"
 107 #define jpeg "image/jpeg"
 108 #define js "application/javascript"
 109 #define json "application/json"
 110 #define m4a "audio/aac"
 111 #define m4v "video/x-m4v"
 112 #define mid "audio/midi"
 113 #define mov "video/quicktime"
 114 #define mp4 "video/mp4"
 115 #define mp3 "audio/mpeg"
 116 #define mpg "video/mpeg"
 117 #define ogg "audio/ogg"
 118 #define opus "audio/opus"
 119 #define pdf "application/pdf"
 120 #define png "image/png"
 121 #define ps "application/postscript"
 122 #define psd "image/vnd.adobe.photoshop"
 123 #define rtf "application/rtf"
 124 #define sqlite3 "application/x-sqlite3"
 125 #define svg "image/svg+xml"
 126 #define text "text/plain"
 127 #define tiff "image/tiff"
 128 #define tsv "text/tsv"
 129 #define wasm "application/wasm"
 130 #define wav "audio/x-wav"
 131 #define webp "image/webp"
 132 #define webm "video/webm"
 133 #define xml "application/xml"
 134 #define zip "application/zip"
 135 #define zst "application/zstd"
 136 
 137 // format_descriptor ties a file-header pattern to its data-format type
 138 typedef struct format_descriptor {
 139     unsigned char header_length;
 140     unsigned char header_bytes[24];
 141     const char* mime;
 142 } format_descriptor;
 143 
 144 // starts_as tries to match header data to the pattern given: this includes
 145 // allowing `any byte` when the pattern indicates so, using a value reserved
 146 // for that purpose
 147 bool starts_as(const uint8_t* x, size_t xlen, const uint8_t* y, size_t ylen) {
 148     // when header data aren't enough for a pattern, there's no match
 149     if (xlen < ylen) {
 150         return false;
 151     }
 152 
 153     for (size_t i = 0; i < xlen; i++) {
 154         if (y[i] == cba) {
 155             // `can be anything` value always matches
 156             continue;
 157         }
 158 
 159         if (x[i] != y[i]) {
 160             return false;
 161         }
 162     }
 163 
 164     return true;
 165 }
 166 
 167 // wrapper func to make func `starts_as` harder to miscall
 168 inline bool match_header(unsigned char* d, size_t len, format_descriptor* to) {
 169     return starts_as(d, len, to->header_bytes, to->header_length);
 170 }
 171 
 172 // not confident enough to actually use this, and replace all table entries
 173 #define start_format_descriptor(...) \
 174     sizeof((unsigned char[]){ __VA_ARGS__ }) / sizeof(unsigned char), \
 175     { __VA_ARGS__ }
 176 
 177 // format markers with leading wildcards, which should be checked before the
 178 // normal ones: this is to prevent mismatches with the latter types, even
 179 // though you can make probabilistic arguments which suggest these mismatches
 180 // should be very unlikely in practice
 181 format_descriptor special_headers[] = {
 182     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a},
 183     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a},
 184     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4},
 185     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4},
 186     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v},
 187     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov},
 188     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic},
 189     {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif},
 190     {
 191         24,
 192         {
 193             cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h',
 194             000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1',
 195         },
 196         m4a,
 197     },
 198     {0},
 199 };
 200 
 201 format_descriptor hdr_dispatch_0[] = {
 202     {4, {000, 000, 001, 0xBA}, mpg},
 203     {4, {000, 000, 001, 0xB3}, mpg},
 204     {4, {000, 000, 001, 000}, ico},
 205     {4, {000, 000, 002, 000}, cur},
 206     {4, {000, 'a', 's', 'm'}, wasm},
 207     {0},
 208 };
 209 
 210 format_descriptor hdr_dispatch_26[] = {
 211     {4, {0x1A, 0x45, 0xDF, 0xA3}, webm},
 212     {0},
 213 };
 214 
 215 format_descriptor hdr_dispatch_31[] = {
 216     // {4, {0x1F, 0x8B, 0x08, 0x08}, gz},
 217     {3, {0x1F, 0x8B, 0x08}, gz},
 218     {0},
 219 };
 220 
 221 format_descriptor hdr_dispatch_35[] = {
 222     {3, "#! ", text},
 223     {3, "#!/", text},
 224     {0},
 225 };
 226 
 227 format_descriptor hdr_dispatch_37[] = {
 228     {4, "%PDF", pdf},
 229     {4, "%!PS", ps},
 230     {0},
 231 };
 232 
 233 format_descriptor hdr_dispatch_40[] = {
 234     {4, {0x28, 0xB5, 0x2F, 0xFD}, zst},
 235     {0},
 236 };
 237 
 238 format_descriptor hdr_dispatch_46[] = {
 239     {4, ".snd", au},
 240     {0},
 241 };
 242 
 243 format_descriptor hdr_dispatch_56[] = {
 244     {4, "8BPS", psd},
 245     {0},
 246 };
 247 
 248 format_descriptor hdr_dispatch_60[] = {
 249     {14, "<!DOCTYPE html", html},
 250     {4, "<svg", svg},
 251     {5, "<html", html},
 252     {5, "<head", html},
 253     {5, "<body", html},
 254     {5, "<?xml", xml},
 255     {0},
 256 };
 257 
 258 format_descriptor hdr_dispatch_65[] = {
 259     {
 260         15,
 261         {
 262             'A', 'T', '&', 'T', 'F', 'O', 'R', 'M',
 263             cba, cba, cba, cba, 'D', 'J', 'V',
 264         },
 265         djvu,
 266     },
 267     {0},
 268 };
 269 
 270 format_descriptor hdr_dispatch_66[] = {
 271     {
 272         15,
 273         {
 274             'B', 'M', cba, cba, cba, cba, cba, cba,
 275             cba, cba, cba, cba, cba, cba, 0x28,
 276         },
 277         bmp,
 278     },
 279     {0},
 280 };
 281 
 282 format_descriptor hdr_dispatch_70[] = {
 283     {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff},
 284     {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff},
 285     {0},
 286 };
 287 
 288 format_descriptor hdr_dispatch_71[] = {
 289     {6, "GIF87a", gif},
 290     {6, "GIF89a", gif},
 291     {0},
 292 };
 293 
 294 format_descriptor hdr_dispatch_73[] = {
 295     {4, {'I', 'D', '3', 2}, mp3}, // ID3-format metadata
 296     {4, {'I', 'D', '3', 3}, mp3}, // ID3-format metadata
 297     {4, {'I', 'D', '3', 4}, mp3}, // ID3-format metadata
 298     {4, {'I', 'I', '*', 000}, tiff},
 299     {0},
 300 };
 301 
 302 format_descriptor hdr_dispatch_77[] = {
 303     {4, {'M', 'M', 000, '*'}, tiff},
 304     {4, "MThd", mid},
 305     {6, {'M', 'Z', cba, 000, cba, 000}, exe},
 306     // {6, {'M', 'Z', 0x90, 000, 003, 000}, exe},
 307     // {6, {'M', 'Z', 0x78, 000, 001, 000}, exe},
 308     // {6, {'M', 'Z', 'P', 000, 002, 000}, exe},
 309     {0},
 310 };
 311 
 312 format_descriptor hdr_dispatch_79[] = {
 313     {4, "OggS", ogg},
 314     {0},
 315 };
 316 
 317 format_descriptor hdr_dispatch_80[] = {
 318     {4, {'P', 'K', 003, 004}, zip},
 319     {0},
 320 };
 321 
 322 format_descriptor hdr_dispatch_82[] = {
 323     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp},
 324     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav},
 325     {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi},
 326     {0},
 327 };
 328 
 329 format_descriptor hdr_dispatch_83[] = {
 330     {16, "SQLite format 3\x00", sqlite3},
 331     {0},
 332 };
 333 
 334 format_descriptor hdr_dispatch_99[] = {
 335     {8, {'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf},
 336     {0},
 337 };
 338 
 339 format_descriptor hdr_dispatch_102[] = {
 340     {4, "fLaC", flac},
 341     {0},
 342 };
 343 
 344 format_descriptor hdr_dispatch_123[] = {
 345     {4, "{\\rtf", rtf},
 346     {0},
 347 };
 348 
 349 format_descriptor hdr_dispatch_127[] = {
 350     {4, {127, 'E', 'L', 'F'}, elf},
 351     {0},
 352 };
 353 
 354 format_descriptor hdr_dispatch_137[] = {
 355     {8, {0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png},
 356     {0},
 357 };
 358 
 359 format_descriptor hdr_dispatch_255[] = {
 360     {3, {0xFF, 0xD8, 0xFF}, jpg},
 361     {5, {0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3},
 362     {2, {0xFF, 0xFB}, mp3},
 363     {0},
 364 };
 365 
 366 // hdr_dispatch groups format-description-groups by their first byte, thus
 367 // shortening total lookups for some data header
 368 //
 369 // notice how the `ftyp` data formats aren't handled here, since these can
 370 // start with any byte, instead of the literal value of the any-byte markers
 371 // they use
 372 //
 373 // all entries are arrays which must always end with a special entry whose
 374 // pattern-length is declared to be 0, since there's no explicit way to know
 375 // the length of these arrays when looping on them
 376 //
 377 // all non-null entries are setup explicitly, later in the code
 378 format_descriptor* hdr_dispatch[256] = {
 379     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 380     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 381     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 382     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 383     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 384     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 385     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 386     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 387     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 388     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 389     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 390     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 391     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 392     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 393     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 394     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 395     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 396     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 397     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 398     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 399     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 400     NULL, NULL, NULL, NULL,
 401 };
 402 
 403 // guess_mime tries to auto-detect a MIME-type from the header bytes given,
 404 // using the lookup-tables
 405 const char* guess_mime(unsigned char* buf, size_t len) {
 406     if (len == 0) {
 407         return NULL;
 408     }
 409 
 410     // try the patterns which allow any bytes at the very start
 411     for (size_t i = 0; special_headers[i].header_length > 0; i++) {
 412         if (match_header(buf, len, &special_headers[i])) {
 413             return special_headers[i].mime;
 414         }
 415     }
 416 
 417     format_descriptor* guesses = hdr_dispatch[buf[0]];
 418     if (guesses == NULL) {
 419         return fallback_mime_type;
 420     }
 421 
 422     for (size_t i = 0; guesses[i].header_length > 0; i++) {
 423         if (match_header(buf, len, &guesses[i])) {
 424             return guesses[i].mime;
 425         }
 426     }
 427     return fallback_mime_type;
 428 }
 429 
 430 bool is_mime_disabled(const char* mime) {
 431     return (mime != NULL) && (mime[0] == 'n') && (
 432         strcmp(mime, "no") == 0 ||
 433         strcmp(mime, "nomime") == 0 || strcmp(mime, "no-mime") == 0 ||
 434         strcmp(mime, "none") == 0 || strcmp(mime, "not") == 0
 435     );
 436 }
 437 
 438 // start_data_uri starts the output by declaring the data-URI to be an
 439 // auto-detected MIME-type; the return value is the auto-detection success
 440 bool start_data_uri(FILE* w, unsigned char* buf, size_t len) {
 441     const char* mime = guess_mime(buf, len);
 442     if (is_mime_disabled(mime)) {
 443         return true;
 444     }
 445     if (mime == NULL || mime[0] == 0) {
 446         return false;
 447     }
 448 
 449     EMIT_CONST(w, "data:");
 450     for (size_t i = 0; mime[i] != 0; i++) {
 451         putc(mime[i], w);
 452     }
 453     EMIT_CONST(w, ";base64,");
 454     return true;
 455 }
 456 
 457 const unsigned char base64_lookup[] =
 458     ""
 459     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
 460     "";
 461 
 462 inline uint32_t combine_triple(const unsigned char data[4]) {
 463     return (data[0] << 16) | (data[1] << 8) | data[2];
 464 }
 465 
 466 inline void emit_triple(FILE* w, uint32_t v) {
 467     putc(base64_lookup[0x3f & (v >> 18)], w);
 468     putc(base64_lookup[0x3f & (v >> 12)], w);
 469     putc(base64_lookup[0x3f & (v >> 6)], w);
 470     putc(base64_lookup[0x3f & v], w);
 471 }
 472 
 473 inline void emit_couple(FILE* w, uint32_t v) {
 474     putc(base64_lookup[0x3f & (v >> 18)], w);
 475     putc(base64_lookup[0x3f & (v >> 12)], w);
 476     putc(base64_lookup[0x3f & (v >> 6)], w);
 477     putc('=', w);
 478 }
 479 
 480 inline void emit_single(FILE* w, uint32_t v) {
 481     putc(base64_lookup[0x3f & (v >> 18)], w);
 482     putc(base64_lookup[0x3f & (v >> 12)], w);
 483     putc('=', w);
 484     putc('=', w);
 485 }
 486 
 487 bool handle_reader(FILE* w, FILE* src, const char* path) {
 488     unsigned char buf[32 * 1024];
 489     size_t chunks = 0;
 490     size_t where = 0;
 491 
 492     // triple holds groups of 3 bytes at once, which is required by base64,
 493     // except for the last few bytes of input, which are padded with equals;
 494     // the 4th item is never used, but having it aligns things to 32 bits
 495     unsigned char triple[4];
 496 
 497     triple[0] = 0;
 498     triple[1] = 0;
 499     triple[2] = 0;
 500     triple[3] = 0;
 501 
 502     while (!feof(w)) {
 503         const size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src);
 504         if (n < 1) {
 505             // assume input is over when no bytes were read
 506             break;
 507         }
 508 
 509         if (chunks == 0 && !start_data_uri(w, buf, n)) {
 510             putc('\n', w);
 511             fflush(w);
 512 
 513             char* fmt = "\x1b[31mcan't auto-detect MIME type for %s\x1b[0m\n";
 514             fprintf(stderr, fmt, path);
 515             return false;
 516         }
 517         chunks++;
 518 
 519         for (size_t i = 0; i < n; i++) {
 520             triple[where] = buf[i];
 521             if (where < 2) {
 522                 where++;
 523             } else {
 524                 emit_triple(w, combine_triple(triple));
 525                 where = 0;
 526             }
 527         }
 528     }
 529 
 530     // empty inputs result in empty outputs
 531     if (chunks == 0) {
 532         return true;
 533     }
 534 
 535     // don't forget unemitted trailing bytes, if any: these need special
 536     // handling, as they include `=` signs; if the input bytes were a
 537     // multiple of 3, there won't be any trailing bytes
 538     switch (where) {
 539         case 1:
 540             triple[1] = 0;
 541             triple[2] = 0;
 542             emit_single(w, combine_triple(triple));
 543             break;
 544         case 2:
 545             triple[2] = 0;
 546             emit_couple(w, combine_triple(triple));
 547             break;
 548     }
 549 
 550     // end with a line-feed, so multiple input streams are each encoded in
 551     // their own line
 552     if (chunks > 0) {
 553         putc('\n', w);
 554         fflush(w);
 555     }
 556     return true;
 557 }
 558 
 559 // handle_file handles data from the filename given; returns false only when
 560 // an error happened
 561 bool handle_file(FILE* w, const char* path) {
 562     // a `-` filename stands for the standard input
 563     if (path[0] == '-' && path[1] == 0) {
 564         return handle_reader(w, stdin, stdin_name);
 565     }
 566 
 567     FILE* f = fopen(path, "rb");
 568     if (f == NULL) {
 569         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 570         return false;
 571     }
 572 
 573     const bool ok = handle_reader(w, f, path);
 574     fclose(f);
 575     return ok;
 576 }
 577 
 578 // is_help_option simplifies control-flow for func run
 579 bool is_help_option(char* s) {
 580     return s[0] == '-' && (
 581         strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 ||
 582         strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0
 583     );
 584 }
 585 
 586 // is_fallback_option simplifies control-flow for func run
 587 bool is_fallback_option(char* s) {
 588     return s[0] == '-' && (
 589            strcmp(s, "-f") == 0 || strcmp(s, "-fallback") == 0 ||
 590            strcmp(s, "--f") == 0 || strcmp(s, "--fallback") == 0
 591     );
 592 }
 593 
 594 const char* fallback_aliases[] = {
 595     // "text/json", "application/json",
 596 
 597     // "xbmp", "image/x-bmp",
 598     // "xflac", "audio/x-flac",
 599     // "xicon", "image/x-icon",
 600     // "xm4v", "video/x-m4v",
 601     // "xsqlite3", "application/x-sqlite3",
 602     // "xwav", "audio/x-wav",
 603     // "xwave", "audio/x-wav",
 604     // "x-bmp", "image/x-bmp",
 605     // "x-flac", "audio/x-flac",
 606     // "x-icon", "image/x-icon",
 607     // "x-m4v", "video/x-m4v",
 608     // "x-sqlite3", "application/x-sqlite3",
 609     // "x-wav", "audio/x-wav",
 610 
 611     "b", "application/octet-stream",
 612     "j", "application/json",
 613     "t", "text/plain",
 614     "u", "text/plain; charset=UTF-8",
 615 
 616     "e", "",
 617     "err", "",
 618     "error", "",
 619     "f", "",
 620     "fail", "",
 621 
 622     "aac", "audio/aac",
 623     "aif", "audio/aiff",
 624     "bin", "application/octet-stream",
 625     "binary", "application/octet-stream",
 626     "gzip", "application/gzip",
 627     "midi", "audio/midi",
 628     "mpeg", "video/mpeg",
 629     "octet", "application/octet-stream",
 630     "octetstream", "application/octet-stream",
 631     "octet-stream", "application/octet-stream",
 632     "plain", "text/plain",
 633     "sqlite", "application/x-sqlite3",
 634     "svg+xml", "image/svg+xml",
 635     "tif", "image/tiff",
 636     "utf8", "text/plain; charset=UTF-8",
 637     "utf-8", "text/plain; charset=UTF-8",
 638     "wave", "audio/x-wav",
 639     "zstd", "application/zstd",
 640 
 641     "aiff", "audio/aiff",
 642     "au", "audio/basic",
 643     "avi", "video/avi",
 644     "avif", "image/avif",
 645     "bmp", "image/x-bmp",
 646     "caf", "audio/x-caf",
 647     "cur", "image/vnd.microsoft.icon",
 648     "css", "text/css",
 649     "csv", "text/csv",
 650     "djvu", "image/x-djvu",
 651     "elf", "application/x-elf",
 652     "exe", "application/vnd.microsoft.portable-executable",
 653     "flac", "audio/x-flac",
 654     "gif", "image/gif",
 655     "gz", "application/gzip",
 656     "heic", "image/heic",
 657     "htm", "text/html",
 658     "html", "text/html",
 659     "ico", "image/x-icon",
 660     "iso", "application/octet-stream",
 661     "jpg", "image/jpeg",
 662     "jpeg", "image/jpeg",
 663     "js", "application/javascript",
 664     "json", "application/json",
 665     "m4a", "audio/aac",
 666     "m4v", "video/x-m4v",
 667     "mid", "audio/midi",
 668     "mov", "video/quicktime",
 669     "mp4", "video/mp4",
 670     "mp3", "audio/mpeg",
 671     "mpg", "video/mpeg",
 672     "ogg", "audio/ogg",
 673     "opus", "audio/opus",
 674     "pdf", "application/pdf",
 675     "png", "image/png",
 676     "ps", "application/postscript",
 677     "psd", "image/vnd.adobe.photoshop",
 678     "rtf", "application/rtf",
 679     "sqlite3", "application/x-sqlite3",
 680     "svg", "image/svg+xml",
 681     "text", "text/plain",
 682     "tiff", "image/tiff",
 683     "tsv", "text/tsv",
 684     "wasm", "application/wasm",
 685     "wav", "audio/x-wav",
 686     "webp", "image/webp",
 687     "webm", "video/webm",
 688     "xml", "application/xml",
 689     "zip", "application/zip",
 690     "zst", "application/zstd",
 691 };
 692 
 693 const char* resolve_alias(char* name) {
 694     const size_t n = sizeof(fallback_aliases) / sizeof(fallback_aliases[0]);
 695     for (size_t i = 0; i < n; i += 2) {
 696         if (strcmp(name, fallback_aliases[i]) == 0) {
 697             return fallback_aliases[i + 1];
 698         }
 699     }
 700     return name;
 701 }
 702 
 703 // run returns the number of errors
 704 int run(int argc, char** argv, FILE* w) {
 705     // handle special cmd-line options
 706     for (size_t i = 1; i < argc; i++) {
 707         if (is_help_option(argv[i])) {
 708             // help option is handled right away, also quitting the app
 709             puts(info);
 710             return 0;
 711         }
 712     }
 713 
 714     size_t files = 0;
 715     size_t errors = 0;
 716     bool change_fallback = false;
 717 
 718     // handle all filenames given
 719     for (size_t i = 1; i < argc && !feof(w); i++) {
 720         if (change_fallback) {
 721             fallback_mime_type = resolve_alias(argv[i]);
 722             change_fallback = false;
 723             continue;
 724         }
 725 
 726         if (is_fallback_option(argv[i])) {
 727             change_fallback = true;
 728             continue;
 729         }
 730 
 731         if (!handle_file(w, argv[i])) {
 732             errors++;
 733         }
 734         files++;
 735     }
 736 
 737     if (change_fallback) {
 738         fprintf(stderr, "\x1b[31mforgot new fallback MIME-type\x1b[0m\n");
 739         errors++;
 740     }
 741 
 742     // no filenames means use stdin as the only input
 743     if (files == 0) {
 744         if (!handle_reader(w, stdin, stdin_name)) {
 745             errors++;
 746         }
 747     }
 748 
 749     return errors;
 750 }
 751 
 752 int main(int argc, char** argv) {
 753 #ifdef _WIN32
 754     setmode(fileno(stdin), O_BINARY);
 755     // ensure output lines end in LF instead of CRLF on windows
 756     setmode(fileno(stdout), O_BINARY);
 757     setmode(fileno(stderr), O_BINARY);
 758 #endif
 759 
 760     // fill entries in the type-detect dispatch table
 761     hdr_dispatch[0] = hdr_dispatch_0; // 0
 762     hdr_dispatch[26] = hdr_dispatch_26; // 26
 763     hdr_dispatch[31] = hdr_dispatch_31; // 31
 764     hdr_dispatch[35] = hdr_dispatch_35; // 35 #
 765     hdr_dispatch[37] = hdr_dispatch_37; // 37 %
 766     hdr_dispatch[40] = hdr_dispatch_40; // 40 (
 767     hdr_dispatch[46] = hdr_dispatch_46; // 46 .
 768     hdr_dispatch[56] = hdr_dispatch_56; // 56 8
 769     hdr_dispatch[60] = hdr_dispatch_60; // 60 <
 770     hdr_dispatch[65] = hdr_dispatch_65; // 65 A
 771     hdr_dispatch[66] = hdr_dispatch_66; // 66 B
 772     hdr_dispatch[70] = hdr_dispatch_70; // 70 F
 773     hdr_dispatch[71] = hdr_dispatch_71; // 71 G
 774     hdr_dispatch[73] = hdr_dispatch_73; // 73 I
 775     hdr_dispatch[77] = hdr_dispatch_77; // 77 M
 776     hdr_dispatch[79] = hdr_dispatch_79; // 79 O
 777     hdr_dispatch[80] = hdr_dispatch_80; // 80 P
 778     hdr_dispatch[82] = hdr_dispatch_82; // 82 R
 779     hdr_dispatch[83] = hdr_dispatch_83; // 83 S
 780     hdr_dispatch[99] = hdr_dispatch_99; // 99 c
 781     hdr_dispatch[102] = hdr_dispatch_102; // 102 f
 782     hdr_dispatch[123] = hdr_dispatch_123; // 123 {
 783     hdr_dispatch[127] = hdr_dispatch_127; // 127
 784     hdr_dispatch[137] = hdr_dispatch_137; // 137
 785     hdr_dispatch[255] = hdr_dispatch_255; // 255
 786 
 787     return run(argc, argv, stdout) == 0 ? 0 : 1;
 788 }