File: datauri.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -o ./datauri ./datauri.c 29 */ 30 31 #include <stdbool.h> 32 #include <stdint.h> 33 #include <stdio.h> 34 #include <string.h> 35 36 #ifdef _WIN32 37 #include <windows.h> 38 #endif 39 40 const char* info = 41 "" 42 "datauri [options...] [filenames...]\n" 43 "\n" 44 "\n" 45 "Encode bytes as data-URIs, auto-detecting the file/data type using the first\n" 46 "few bytes from each data/file stream. When given multiple inputs, the output\n" 47 "will be multiple lines, one for each file given.\n" 48 "\n" 49 "Empty files/inputs result in empty lines. A simple dash (-) stands for the\n" 50 "standard-input, which is also used automatically when not given any files.\n" 51 "\n" 52 "Data-URIs are base64-encoded text representations of arbitrary data, which\n" 53 "include their payload's MIME-type, and which are directly useable/shareable\n" 54 "in web-browsers as links, despite not looking like normal links/URIs.\n" 55 "\n" 56 "Some web-browsers limit the size of handled data-URIs to tens of kilobytes.\n" 57 "\n" 58 "\n" 59 "Options\n" 60 "\n" 61 " -h, -help, --h, --help show this help message\n" 62 " -f, -fallback, --f, --fallback change the fallback MIME type\n" 63 ""; 64 65 const char* stdin_name = "<stdin>"; 66 67 #define octet "application/octet-stream" 68 69 const char* fallback_mime_type = octet; 70 71 // EMIT_CONST abstracts emitting string constants without their final null byte 72 #define EMIT_CONST(w, x) fwrite(x, sizeof(x) - 1, 1, w) 73 74 void write_bytes(FILE* w, const unsigned char* src, size_t len) { 75 fwrite(src, len, 1, w); 76 } 77 78 // can be anything: ensure this value differs from all other literal bytes 79 // in the generic-headers table: failing that, its value could cause subtle 80 // type-misdetection bugs; the value is chosen to be `obviously` findable 81 // in the source, which also implies a constant beyond the ascii range, as 82 // ascii char-constants are also used in the tables 83 const unsigned char cba = 0xfd; // 253 84 85 #define aiff "audio/aiff" 86 #define au "audio/basic" 87 #define avi "video/avi" 88 #define avif "image/avif" 89 #define bmp "image/x-bmp" 90 #define caf "audio/x-caf" 91 #define cur "image/vnd.microsoft.icon" 92 #define css "text/css" 93 #define csv "text/csv" 94 #define djvu "image/x-djvu" 95 #define elf "application/x-elf" 96 #define exe "application/vnd.microsoft.portable-executable" 97 #define flac "audio/x-flac" 98 #define gif "image/gif" 99 #define gz "application/gzip" 100 #define heic "image/heic" 101 #define htm "text/html" 102 #define html "text/html" 103 #define ico "image/x-icon" 104 #define iso "application/octet-stream" 105 #define jpeg "image/jpeg" 106 #define js "application/javascript" 107 #define json "application/json" 108 #define m4a "audio/aac" 109 #define m4v "video/x-m4v" 110 #define midi "audio/midi" 111 #define mov "video/quicktime" 112 #define mp4 "video/mp4" 113 #define mp3 "audio/mpeg" 114 #define mpg "video/mpeg" 115 #define ogg "audio/ogg" 116 #define opus "audio/opus" 117 #define pdf "application/pdf" 118 #define png "image/png" 119 #define ps "application/postscript" 120 #define psd "image/vnd.adobe.photoshop" 121 #define rtf "application/rtf" 122 #define sqlite3 "application/x-sqlite3" 123 #define svg "image/svg+xml" 124 #define text "text/plain" 125 #define tiff "image/tiff" 126 #define tsv "text/tsv" 127 #define utf8 "text/plain; charset=UTF-8" 128 #define wasm "application/wasm" 129 #define wav "audio/x-wav" 130 #define webp "image/webp" 131 #define webm "video/webm" 132 #define xml "application/xml" 133 #define zip "application/zip" 134 #define zst "application/zstd" 135 136 // format_descriptor ties a file-header pattern to its data-format type; 137 // the 15-byte header-limit nicely aligns with the 1-byte length before it 138 typedef struct format_descriptor { 139 unsigned char header_length; 140 unsigned char header_bytes[15]; 141 const char* mime; 142 } format_descriptor; 143 144 // starts_as tries to match header data to the pattern given: this includes 145 // allowing `any byte` when the pattern indicates so, using a value reserved 146 // for that purpose 147 bool starts_as(const uint8_t* x, size_t xlen, const uint8_t* y, size_t ylen) { 148 // when header data aren't enough for a pattern, there's no match 149 if (xlen < ylen) { 150 return false; 151 } 152 153 for (size_t i = 0; i < ylen; i++) { 154 if (y[i] == cba) { 155 // `can be anything` value always matches 156 continue; 157 } 158 159 if (x[i] != y[i]) { 160 return false; 161 } 162 } 163 164 return true; 165 } 166 167 // not confident enough to actually use this, and replace all table entries 168 #define start_format_descriptor(...) \ 169 sizeof((unsigned char[]){ __VA_ARGS__ }) / sizeof(unsigned char), \ 170 { __VA_ARGS__ } 171 172 // format markers with leading wildcards, which should be checked before the 173 // normal ones: this is to prevent mismatches with the latter types, even 174 // though you can make probabilistic arguments which suggest these mismatches 175 // should be very unlikely in practice 176 format_descriptor special_headers[] = { 177 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a}, 178 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a}, 179 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4}, 180 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4}, 181 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v}, 182 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov}, 183 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic}, 184 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif}, 185 // { 186 // 24, 187 // { 188 // cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 189 // 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', 190 // }, 191 // m4a, 192 // }, 193 {0}, 194 }; 195 196 // check_m4a_dash handles the only special-header which exceeds 15 bytes 197 bool check_m4a_dash(const uint8_t* x, size_t xlen) { 198 const unsigned char header[24] = { 199 cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 200 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', 201 }; 202 return starts_as(x, xlen, header, sizeof(header)); 203 } 204 205 format_descriptor hdr_dispatch_0[] = { 206 {4, {000, 000, 001, 0xBA}, mpg}, 207 {4, {000, 000, 001, 0xB3}, mpg}, 208 {4, {000, 000, 001, 000}, ico}, 209 {4, {000, 000, 002, 000}, cur}, 210 {4, {000, 'a', 's', 'm'}, wasm}, 211 {0}, 212 }; 213 214 format_descriptor hdr_dispatch_26[] = { 215 {4, {0x1A, 0x45, 0xDF, 0xA3}, webm}, 216 {0}, 217 }; 218 219 format_descriptor hdr_dispatch_31[] = { 220 // {4, {0x1F, 0x8B, 0x08, 0x08}, gz}, 221 {3, {0x1F, 0x8B, 0x08}, gz}, 222 {0}, 223 }; 224 225 format_descriptor hdr_dispatch_35[] = { 226 {3, "#! ", text}, 227 {3, "#!/", text}, 228 {0}, 229 }; 230 231 format_descriptor hdr_dispatch_37[] = { 232 {4, "%PDF", pdf}, 233 {4, "%!PS", ps}, 234 {0}, 235 }; 236 237 format_descriptor hdr_dispatch_40[] = { 238 {4, {0x28, 0xB5, 0x2F, 0xFD}, zst}, 239 {0}, 240 }; 241 242 format_descriptor hdr_dispatch_46[] = { 243 {4, ".snd", au}, 244 {0}, 245 }; 246 247 format_descriptor hdr_dispatch_56[] = { 248 {4, "8BPS", psd}, 249 {0}, 250 }; 251 252 format_descriptor hdr_dispatch_60[] = { 253 {14, "<!DOCTYPE html", html}, 254 {4, "<svg", svg}, 255 {5, "<html", html}, 256 {5, "<head", html}, 257 {5, "<body", html}, 258 {5, "<?xml", xml}, 259 {0}, 260 }; 261 262 format_descriptor hdr_dispatch_65[] = { 263 { 264 15, 265 { 266 'A', 'T', '&', 'T', 'F', 'O', 'R', 'M', 267 cba, cba, cba, cba, 'D', 'J', 'V', 268 }, 269 djvu, 270 }, 271 {0}, 272 }; 273 274 format_descriptor hdr_dispatch_66[] = { 275 { 276 15, 277 { 278 'B', 'M', cba, cba, cba, cba, cba, cba, 279 cba, cba, cba, cba, cba, cba, 0x28, 280 }, 281 bmp, 282 }, 283 {0}, 284 }; 285 286 format_descriptor hdr_dispatch_70[] = { 287 {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff}, 288 {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff}, 289 {0}, 290 }; 291 292 format_descriptor hdr_dispatch_71[] = { 293 {6, "GIF87a", gif}, 294 {6, "GIF89a", gif}, 295 {0}, 296 }; 297 298 format_descriptor hdr_dispatch_73[] = { 299 {4, {'I', 'D', '3', 2}, mp3}, // ID3-format metadata 300 {4, {'I', 'D', '3', 3}, mp3}, // ID3-format metadata 301 {4, {'I', 'D', '3', 4}, mp3}, // ID3-format metadata 302 {4, {'I', 'I', '*', 000}, tiff}, 303 {0}, 304 }; 305 306 format_descriptor hdr_dispatch_77[] = { 307 {4, {'M', 'M', 000, '*'}, tiff}, 308 {4, "MThd", midi}, 309 {6, {'M', 'Z', cba, 000, cba, 000}, exe}, 310 // {6, {'M', 'Z', 0x90, 000, 003, 000}, exe}, 311 // {6, {'M', 'Z', 0x78, 000, 001, 000}, exe}, 312 // {6, {'M', 'Z', 'P', 000, 002, 000}, exe}, 313 {0}, 314 }; 315 316 format_descriptor hdr_dispatch_79[] = { 317 {4, "OggS", ogg}, 318 {0}, 319 }; 320 321 format_descriptor hdr_dispatch_80[] = { 322 {4, {'P', 'K', 003, 004}, zip}, 323 {0}, 324 }; 325 326 format_descriptor hdr_dispatch_82[] = { 327 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp}, 328 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav}, 329 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi}, 330 {0}, 331 }; 332 333 format_descriptor hdr_dispatch_83[] = { 334 // {16, "SQLite format 3\x00", sqlite3}, 335 {0}, 336 }; 337 338 format_descriptor hdr_dispatch_99[] = { 339 {8, {'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf}, 340 {0}, 341 }; 342 343 format_descriptor hdr_dispatch_102[] = { 344 {4, "fLaC", flac}, 345 {0}, 346 }; 347 348 format_descriptor hdr_dispatch_123[] = { 349 {4, "{\\rtf", rtf}, 350 {0}, 351 }; 352 353 format_descriptor hdr_dispatch_127[] = { 354 {4, {127, 'E', 'L', 'F'}, elf}, 355 {0}, 356 }; 357 358 format_descriptor hdr_dispatch_137[] = { 359 {8, {0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png}, 360 {0}, 361 }; 362 363 format_descriptor hdr_dispatch_255[] = { 364 {3, {0xFF, 0xD8, 0xFF}, jpeg}, 365 {5, {0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3}, 366 {2, {0xFF, 0xFB}, mp3}, 367 {0}, 368 }; 369 370 // hdr_dispatch groups format-description-groups by their first byte, thus 371 // shortening total lookups for some data header 372 // 373 // notice how the `ftyp` data formats aren't handled here, since these can 374 // start with any byte, instead of the literal value of the any-byte markers 375 // they use 376 // 377 // all entries are arrays which must always end with a special entry whose 378 // pattern-length is declared to be 0, since there's no explicit way to know 379 // the length of these arrays when looping on them 380 // 381 // all non-null entries are setup explicitly, later in the code 382 format_descriptor* hdr_dispatch[256] = { 383 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 384 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 385 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 386 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 387 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 388 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 389 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 390 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 391 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 392 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 393 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 394 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 395 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 396 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 397 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 398 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 399 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 400 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 401 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 402 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 403 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 404 NULL, NULL, NULL, NULL, 405 }; 406 407 // guess_mime tries to auto-detect a MIME-type from the header bytes given, 408 // using the lookup-tables 409 const char* guess_mime(const unsigned char* buf, size_t len) { 410 if (len == 0) { 411 return NULL; 412 } 413 414 // try the patterns which allow any bytes at the very start 415 for (size_t i = 0; special_headers[i].header_length > 0; i++) { 416 const unsigned char* hb = special_headers[i].header_bytes; 417 const size_t hl = special_headers[i].header_length; 418 if (starts_as(buf, len, hb, hl)) { 419 return special_headers[i].mime; 420 } 421 } 422 423 // the m4a-dash header exceeds the 15-byte limit of the lookup tables 424 if (check_m4a_dash(buf, len)) { 425 return m4a; 426 } 427 428 format_descriptor* guesses = hdr_dispatch[buf[0]]; 429 if (guesses == NULL) { 430 return fallback_mime_type; 431 } 432 433 for (size_t i = 0; guesses[i].header_length > 0; i++) { 434 const unsigned char* hb = guesses[i].header_bytes; 435 const size_t hl = guesses[i].header_length; 436 if (starts_as(buf, len, hb, hl)) { 437 return guesses[i].mime; 438 } 439 } 440 441 // the sqlite3 header exceeds the 15-byte limit of the lookup tables 442 const char header[16] = "SQLite format 3\x00"; 443 if (starts_as(buf, len, (uint8_t*)header, sizeof(header))) { 444 return sqlite3; 445 } 446 447 return fallback_mime_type; 448 } 449 450 bool is_mime_disabled(const char* mime) { 451 return (mime != NULL) && (mime[0] == 'n') && ( 452 strcmp(mime, "no") == 0 || 453 strcmp(mime, "nomime") == 0 || strcmp(mime, "no-mime") == 0 || 454 strcmp(mime, "none") == 0 || strcmp(mime, "not") == 0 455 ); 456 } 457 458 // start_data_uri starts the output by declaring the data-URI to be an 459 // auto-detected MIME-type; the return value is the auto-detection success 460 bool start_data_uri(FILE* w, const unsigned char* buf, size_t len) { 461 const char* mime = guess_mime(buf, len); 462 if (is_mime_disabled(mime)) { 463 return true; 464 } 465 if (mime == NULL || mime[0] == 0) { 466 return false; 467 } 468 469 EMIT_CONST(w, "data:"); 470 for (size_t i = 0; mime[i] != 0; i++) { 471 putc(mime[i], w); 472 } 473 EMIT_CONST(w, ";base64,"); 474 return true; 475 } 476 477 const unsigned char base64_lookup[64] = 478 "" 479 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" 480 ""; 481 482 uint32_t combine_triple(const unsigned char data[4]) { 483 return (data[0] << 16) | (data[1] << 8) | (data[2] << 0); 484 } 485 486 void emit_triple(FILE* w, uint32_t v) { 487 putc(base64_lookup[0x3f & (v >> 18)], w); 488 putc(base64_lookup[0x3f & (v >> 12)], w); 489 putc(base64_lookup[0x3f & (v >> 6)], w); 490 putc(base64_lookup[0x3f & (v >> 0)], w); 491 } 492 493 void emit_couple(FILE* w, uint32_t v) { 494 putc(base64_lookup[0x3f & (v >> 18)], w); 495 putc(base64_lookup[0x3f & (v >> 12)], w); 496 putc(base64_lookup[0x3f & (v >> 6)], w); 497 putc('=', w); 498 } 499 500 void emit_single(FILE* w, uint32_t v) { 501 putc(base64_lookup[0x3f & (v >> 18)], w); 502 putc(base64_lookup[0x3f & (v >> 12)], w); 503 putc('=', w); 504 putc('=', w); 505 } 506 507 bool handle_reader(FILE* w, FILE* src, const char* path) { 508 unsigned char buf[32 * 1024]; 509 uint64_t bytes = 0; 510 511 // triple holds groups of 3 bytes at once, which is required by base64, 512 // except for the last few bytes of input, which are padded with equals; 513 // the 4th item is never used, but having it aligns things to 32 bits 514 unsigned char triple[4]; 515 516 triple[0] = 0; 517 triple[1] = 0; 518 triple[2] = 0; 519 triple[3] = 0; 520 521 while (!feof(w)) { 522 const size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src); 523 if (n < 1) { 524 // assume input is over when no bytes were read 525 break; 526 } 527 528 if (bytes == 0 && !start_data_uri(w, buf, n)) { 529 putc('\n', w); 530 const char* msg = "can't auto-detect MIME type for"; 531 fprintf(stderr, "\x1b[31m%s %s\x1b[0m\n", msg, path); 532 return false; 533 } 534 535 size_t where = bytes % 3; 536 for (size_t i = 0; i < n; i++, bytes++) { 537 triple[where++] = buf[i]; 538 if (where == 3) { 539 emit_triple(w, combine_triple(triple)); 540 where = 0; 541 } 542 } 543 } 544 545 // empty inputs result in empty outputs 546 if (bytes == 0) { 547 return true; 548 } 549 550 // don't forget unemitted trailing bytes, if any: these need special 551 // handling, as they include `=` signs; if the input bytes were a 552 // multiple of 3, there won't be any trailing bytes 553 switch (bytes % 3) { 554 case 1: 555 triple[1] = 0; 556 triple[2] = 0; 557 emit_single(w, combine_triple(triple)); 558 break; 559 case 2: 560 triple[2] = 0; 561 emit_couple(w, combine_triple(triple)); 562 break; 563 } 564 565 // end with a line-feed, so multiple input streams are each encoded in 566 // their own line 567 if (bytes > 0) { 568 putc('\n', w); 569 } 570 return true; 571 } 572 573 // handle_file handles data from the filename given; returns false only when 574 // an error happened 575 bool handle_file(FILE* w, const char* path) { 576 // a `-` filename stands for the standard input 577 if (path[0] == '-' && path[1] == 0) { 578 return handle_reader(w, stdin, stdin_name); 579 } 580 581 FILE* f = fopen(path, "rb"); 582 if (f == NULL) { 583 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 584 return false; 585 } 586 587 const bool ok = handle_reader(w, f, path); 588 fclose(f); 589 return ok; 590 } 591 592 // is_help_option simplifies control-flow for func run 593 bool is_help_option(const char* s) { 594 return s[0] == '-' && ( 595 strcmp(s, "-h") == 0 || 596 strcmp(s, "-help") == 0 || 597 strcmp(s, "--h") == 0 || 598 strcmp(s, "--help") == 0 599 ); 600 } 601 602 // is_fallback_option simplifies control-flow for func run 603 bool is_fallback_option(const char* s) { 604 return s[0] == '-' && ( 605 strcmp(s, "-f") == 0 || 606 strcmp(s, "-fallback") == 0 || 607 strcmp(s, "--f") == 0 || 608 strcmp(s, "--fallback") == 0 609 ); 610 } 611 612 const char* fallback_aliases[192] = { 613 // tiny shortcuts 614 "b", octet, 615 "j", json, 616 "t", text, 617 "u", utf8, 618 619 // failure fallbacks 620 "e", "", 621 "err", "", 622 "error", "", 623 "f", "", 624 "fail", "", 625 626 // common mistakes 627 "text/json", json, 628 629 "aif", aiff, 630 "aiff", aiff, 631 "au", au, 632 "avi", avi, 633 "avif", avif, 634 "bmp", bmp, 635 "caf", caf, 636 "cur", cur, 637 "css", css, 638 "csv", csv, 639 "djvu", djvu, 640 "elf", elf, 641 "exe", exe, 642 "flac", flac, 643 "gif", gif, 644 "gz", gz, 645 "heic", heic, 646 "html", html, 647 "ico", ico, 648 "iso", iso, 649 "jpg", jpeg, 650 "jpeg", jpeg, 651 "js", js, 652 "json", json, 653 "m4a", m4a, 654 "m4v", m4v, 655 "midi", midi, 656 "mov", mov, 657 "mp4", mp4, 658 "mp3", mp3, 659 "mpeg", mpg, 660 "ogg", ogg, 661 "opus", opus, 662 "pdf", pdf, 663 "png", png, 664 "ps", ps, 665 "psd", psd, 666 "rtf", rtf, 667 "sqlite3", sqlite3, 668 "svg", svg, 669 "text", text, 670 "tiff", tiff, 671 "tsv", tsv, 672 "wasm", wasm, 673 "wav", wav, 674 "webp", webp, 675 "webm", webm, 676 "xml", xml, 677 "zip", zip, 678 "zst", zst, 679 680 // longer shortcuts 681 "aac", m4a, 682 "aif", aiff, 683 "bin", octet, 684 "binary", octet, 685 "bits", octet, 686 "gzip", gz, 687 "htm", htm, 688 "mid", midi, 689 "mpg", mpg, 690 "octet", octet, 691 "octets", octet, 692 "octetstream", octet, 693 "octet-stream", octet, 694 "plain", text, 695 "sqlite", sqlite3, 696 "svg+xml", svg, 697 "tif", tiff, 698 "utf8", utf8, 699 "utf-8", utf8, 700 "xbmp", bmp, 701 "xcaf", caf, 702 "xflac", flac, 703 "xicon", ico, 704 "xm4v", m4v, 705 "xsqlite3", sqlite3, 706 "xwav", wav, 707 "xwave", wav, 708 "x-bmp", bmp, 709 "x-caf", caf, 710 "x-flac", flac, 711 "x-icon", ico, 712 "x-m4v", m4v, 713 "x-sqlite3", sqlite3, 714 "x-wav", wav, 715 "wave", wav, 716 "zstd", zst, 717 }; 718 719 const char* resolve_alias(const char* name) { 720 const size_t n = sizeof(fallback_aliases) / sizeof(fallback_aliases[0]); 721 for (size_t i = 0; i < n; i += 2) { 722 if (strcmp(name, fallback_aliases[i]) == 0) { 723 return fallback_aliases[i + 1]; 724 } 725 } 726 return name; 727 } 728 729 // run returns the number of errors 730 int run(int argc, char** argv, FILE* w) { 731 size_t files = 0; 732 size_t errors = 0; 733 bool change_fallback = false; 734 735 // handle all filenames given 736 for (size_t i = 1; i < argc && !feof(w); i++) { 737 if (change_fallback) { 738 fallback_mime_type = resolve_alias(argv[i]); 739 change_fallback = false; 740 continue; 741 } 742 743 if (is_fallback_option(argv[i])) { 744 change_fallback = true; 745 continue; 746 } 747 748 if (!handle_file(w, argv[i])) { 749 errors++; 750 } 751 files++; 752 } 753 754 if (change_fallback) { 755 fprintf(stderr, "\x1b[31mforgot new fallback MIME-type\x1b[0m\n"); 756 errors++; 757 return errors; 758 } 759 760 // no filenames means use stdin as the only input 761 if (files == 0) { 762 if (!handle_reader(w, stdin, stdin_name)) { 763 errors++; 764 } 765 } 766 767 return errors; 768 } 769 770 int main(int argc, char** argv) { 771 #ifdef _WIN32 772 setmode(fileno(stdin), O_BINARY); 773 // ensure output lines end in LF instead of CRLF on windows 774 setmode(fileno(stdout), O_BINARY); 775 setmode(fileno(stderr), O_BINARY); 776 #endif 777 778 if (argc > 1 && is_help_option(argv[1])) { 779 puts(info); 780 return 0; 781 } 782 783 // fill entries in the type-detection dispatch table 784 hdr_dispatch[0] = hdr_dispatch_0; // 0 785 hdr_dispatch[26] = hdr_dispatch_26; // 26 786 hdr_dispatch[31] = hdr_dispatch_31; // 31 787 hdr_dispatch[35] = hdr_dispatch_35; // 35 # 788 hdr_dispatch[37] = hdr_dispatch_37; // 37 % 789 hdr_dispatch[40] = hdr_dispatch_40; // 40 ( 790 hdr_dispatch[46] = hdr_dispatch_46; // 46 . 791 hdr_dispatch[56] = hdr_dispatch_56; // 56 8 792 hdr_dispatch[60] = hdr_dispatch_60; // 60 < 793 hdr_dispatch[65] = hdr_dispatch_65; // 65 A 794 hdr_dispatch[66] = hdr_dispatch_66; // 66 B 795 hdr_dispatch[70] = hdr_dispatch_70; // 70 F 796 hdr_dispatch[71] = hdr_dispatch_71; // 71 G 797 hdr_dispatch[73] = hdr_dispatch_73; // 73 I 798 hdr_dispatch[77] = hdr_dispatch_77; // 77 M 799 hdr_dispatch[79] = hdr_dispatch_79; // 79 O 800 hdr_dispatch[80] = hdr_dispatch_80; // 80 P 801 hdr_dispatch[82] = hdr_dispatch_82; // 82 R 802 hdr_dispatch[83] = hdr_dispatch_83; // 83 S 803 hdr_dispatch[99] = hdr_dispatch_99; // 99 c 804 hdr_dispatch[102] = hdr_dispatch_102; // 102 f 805 hdr_dispatch[123] = hdr_dispatch_123; // 123 { 806 hdr_dispatch[127] = hdr_dispatch_127; // 127 807 hdr_dispatch[137] = hdr_dispatch_137; // 137 808 hdr_dispatch[255] = hdr_dispatch_255; // 255 809 810 return run(argc, argv, stdout) == 0 ? 0 : 1; 811 }