File: datauri.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2024 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O2 -o ./datauri ./datauri.c 29 */ 30 31 #include <fcntl.h> 32 #include <math.h> 33 #include <stdbool.h> 34 #include <stdint.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 39 #ifdef _WIN32 40 #include <windows.h> 41 #endif 42 43 const char* info = 44 "" 45 "datauri [options...] [filenames...]\n" 46 "\n" 47 "\n" 48 "Encode bytes as data-URIs, auto-detecting the file/data type using the first\n" 49 "few bytes from each data/file stream. When given multiple inputs, the output\n" 50 "will be multiple lines, one for each file given.\n" 51 "\n" 52 "Empty files/inputs result in empty lines. A simple dash (-) stands for the\n" 53 "standard-input, which is also used automatically when not given any files.\n" 54 "\n" 55 "Data-URIs are base64-encoded text representations of arbitrary data, which\n" 56 "include their payload's MIME-type, and which are directly useable/shareable\n" 57 "in web-browsers as links, despite not looking like normal links/URIs.\n" 58 "\n" 59 "Some web-browsers limit the size of handled data-URIs to tens of kilobytes.\n" 60 "\n" 61 "\n" 62 "Options\n" 63 "\n" 64 " -h, -help, --h, --help show this help message\n" 65 " -f, -fallback, --f, --fallback change the fallback MIME type\n" 66 ""; 67 68 const char* stdin_name = "<stdin>"; 69 70 const char* fallback_mime_type = "application/octet-stream"; 71 72 // EMIT_CONST abstracts emitting string constants without their final null byte 73 #define EMIT_CONST(w, x) fwrite(x, sizeof(x) - 1, 1, w) 74 75 inline void write_bytes(FILE* w, const unsigned char* src, size_t len) { 76 fwrite(src, len, 1, w); 77 } 78 79 // can be anything: ensure this value differs from all other literal bytes 80 // in the generic-headers table: failing that, its value could cause subtle 81 // type-misdetection bugs; the value is chosen to be `obviously` findable 82 // in the source, which also implies a constant beyond the ascii range, as 83 // ascii char-constants are also used in the tables 84 const unsigned char cba = 0xfd; // 253 85 86 #define aiff "audio/aiff" 87 #define au "audio/basic" 88 #define avi "video/avi" 89 #define avif "image/avif" 90 #define bmp "image/x-bmp" 91 #define caf "audio/x-caf" 92 #define cur "image/vnd.microsoft.icon" 93 #define css "text/css" 94 #define csv "text/csv" 95 #define djvu "image/x-djvu" 96 #define elf "application/x-elf" 97 #define exe "application/vnd.microsoft.portable-executable" 98 #define flac "audio/x-flac" 99 #define gif "image/gif" 100 #define gz "application/gzip" 101 #define heic "image/heic" 102 #define htm "text/html" 103 #define html "text/html" 104 #define ico "image/x-icon" 105 #define iso "application/octet-stream" 106 #define jpg "image/jpeg" 107 #define jpeg "image/jpeg" 108 #define js "application/javascript" 109 #define json "application/json" 110 #define m4a "audio/aac" 111 #define m4v "video/x-m4v" 112 #define mid "audio/midi" 113 #define mov "video/quicktime" 114 #define mp4 "video/mp4" 115 #define mp3 "audio/mpeg" 116 #define mpg "video/mpeg" 117 #define ogg "audio/ogg" 118 #define opus "audio/opus" 119 #define pdf "application/pdf" 120 #define png "image/png" 121 #define ps "application/postscript" 122 #define psd "image/vnd.adobe.photoshop" 123 #define rtf "application/rtf" 124 #define sqlite3 "application/x-sqlite3" 125 #define svg "image/svg+xml" 126 #define text "text/plain" 127 #define tiff "image/tiff" 128 #define tsv "text/tsv" 129 #define wasm "application/wasm" 130 #define wav "audio/x-wav" 131 #define webp "image/webp" 132 #define webm "video/webm" 133 #define xml "application/xml" 134 #define zip "application/zip" 135 #define zst "application/zstd" 136 137 // format_descriptor ties a file-header pattern to its data-format type 138 typedef struct format_descriptor { 139 unsigned char header_length; 140 unsigned char header_bytes[24]; 141 const char* mime; 142 } format_descriptor; 143 144 // starts_as tries to match header data to the pattern given: this includes 145 // allowing `any byte` when the pattern indicates so, using a value reserved 146 // for that purpose 147 bool starts_as(const uint8_t* x, size_t xlen, const uint8_t* y, size_t ylen) { 148 // when header data aren't enough for a pattern, there's no match 149 if (xlen < ylen) { 150 return false; 151 } 152 153 for (size_t i = 0; i < xlen; i++) { 154 if (y[i] == cba) { 155 // `can be anything` value always matches 156 continue; 157 } 158 159 if (x[i] != y[i]) { 160 return false; 161 } 162 } 163 164 return true; 165 } 166 167 // wrapper func to make func `starts_as` harder to miscall 168 inline bool match_header(unsigned char* d, size_t len, format_descriptor* to) { 169 return starts_as(d, len, to->header_bytes, to->header_length); 170 } 171 172 // not confident enough to actually use this, and replace all table entries 173 #define start_format_descriptor(...) \ 174 sizeof((unsigned char[]){ __VA_ARGS__ }) / sizeof(unsigned char), \ 175 { __VA_ARGS__ } 176 177 // format markers with leading wildcards, which should be checked before the 178 // normal ones: this is to prevent mismatches with the latter types, even 179 // though you can make probabilistic arguments which suggest these mismatches 180 // should be very unlikely in practice 181 format_descriptor special_headers[] = { 182 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a}, 183 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a}, 184 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4}, 185 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4}, 186 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v}, 187 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov}, 188 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic}, 189 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif}, 190 { 191 24, 192 { 193 cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 194 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', 195 }, 196 m4a, 197 }, 198 {0}, 199 }; 200 201 format_descriptor hdr_dispatch_0[] = { 202 {4, {000, 000, 001, 0xBA}, mpg}, 203 {4, {000, 000, 001, 0xB3}, mpg}, 204 {4, {000, 000, 001, 000}, ico}, 205 {4, {000, 000, 002, 000}, cur}, 206 {4, {000, 'a', 's', 'm'}, wasm}, 207 {0}, 208 }; 209 210 format_descriptor hdr_dispatch_26[] = { 211 {4, {0x1A, 0x45, 0xDF, 0xA3}, webm}, 212 {0}, 213 }; 214 215 format_descriptor hdr_dispatch_31[] = { 216 // {4, {0x1F, 0x8B, 0x08, 0x08}, gz}, 217 {3, {0x1F, 0x8B, 0x08}, gz}, 218 {0}, 219 }; 220 221 format_descriptor hdr_dispatch_35[] = { 222 {3, "#! ", text}, 223 {3, "#!/", text}, 224 {0}, 225 }; 226 227 format_descriptor hdr_dispatch_37[] = { 228 {4, "%PDF", pdf}, 229 {4, "%!PS", ps}, 230 {0}, 231 }; 232 233 format_descriptor hdr_dispatch_40[] = { 234 {4, {0x28, 0xB5, 0x2F, 0xFD}, zst}, 235 {0}, 236 }; 237 238 format_descriptor hdr_dispatch_46[] = { 239 {4, ".snd", au}, 240 {0}, 241 }; 242 243 format_descriptor hdr_dispatch_56[] = { 244 {4, "8BPS", psd}, 245 {0}, 246 }; 247 248 format_descriptor hdr_dispatch_60[] = { 249 {14, "<!DOCTYPE html", html}, 250 {4, "<svg", svg}, 251 {5, "<html", html}, 252 {5, "<head", html}, 253 {5, "<body", html}, 254 {5, "<?xml", xml}, 255 {0}, 256 }; 257 258 format_descriptor hdr_dispatch_65[] = { 259 { 260 15, 261 { 262 'A', 'T', '&', 'T', 'F', 'O', 'R', 'M', 263 cba, cba, cba, cba, 'D', 'J', 'V', 264 }, 265 djvu, 266 }, 267 {0}, 268 }; 269 270 format_descriptor hdr_dispatch_66[] = { 271 { 272 15, 273 { 274 'B', 'M', cba, cba, cba, cba, cba, cba, 275 cba, cba, cba, cba, cba, cba, 0x28, 276 }, 277 bmp, 278 }, 279 {0}, 280 }; 281 282 format_descriptor hdr_dispatch_70[] = { 283 {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff}, 284 {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff}, 285 {0}, 286 }; 287 288 format_descriptor hdr_dispatch_71[] = { 289 {6, "GIF87a", gif}, 290 {6, "GIF89a", gif}, 291 {0}, 292 }; 293 294 format_descriptor hdr_dispatch_73[] = { 295 {4, {'I', 'D', '3', 2}, mp3}, // ID3-format metadata 296 {4, {'I', 'D', '3', 3}, mp3}, // ID3-format metadata 297 {4, {'I', 'D', '3', 4}, mp3}, // ID3-format metadata 298 {4, {'I', 'I', '*', 000}, tiff}, 299 {0}, 300 }; 301 302 format_descriptor hdr_dispatch_77[] = { 303 {4, {'M', 'M', 000, '*'}, tiff}, 304 {4, "MThd", mid}, 305 {6, {'M', 'Z', cba, 000, cba, 000}, exe}, 306 // {6, {'M', 'Z', 0x90, 000, 003, 000}, exe}, 307 // {6, {'M', 'Z', 0x78, 000, 001, 000}, exe}, 308 // {6, {'M', 'Z', 'P', 000, 002, 000}, exe}, 309 {0}, 310 }; 311 312 format_descriptor hdr_dispatch_79[] = { 313 {4, "OggS", ogg}, 314 {0}, 315 }; 316 317 format_descriptor hdr_dispatch_80[] = { 318 {4, {'P', 'K', 003, 004}, zip}, 319 {0}, 320 }; 321 322 format_descriptor hdr_dispatch_82[] = { 323 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp}, 324 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav}, 325 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi}, 326 {0}, 327 }; 328 329 format_descriptor hdr_dispatch_83[] = { 330 {16, "SQLite format 3\x00", sqlite3}, 331 {0}, 332 }; 333 334 format_descriptor hdr_dispatch_99[] = { 335 {8, {'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf}, 336 {0}, 337 }; 338 339 format_descriptor hdr_dispatch_102[] = { 340 {4, "fLaC", flac}, 341 {0}, 342 }; 343 344 format_descriptor hdr_dispatch_123[] = { 345 {4, "{\\rtf", rtf}, 346 {0}, 347 }; 348 349 format_descriptor hdr_dispatch_127[] = { 350 {4, {127, 'E', 'L', 'F'}, elf}, 351 {0}, 352 }; 353 354 format_descriptor hdr_dispatch_137[] = { 355 {8, {0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png}, 356 {0}, 357 }; 358 359 format_descriptor hdr_dispatch_255[] = { 360 {3, {0xFF, 0xD8, 0xFF}, jpg}, 361 {5, {0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3}, 362 {2, {0xFF, 0xFB}, mp3}, 363 {0}, 364 }; 365 366 // hdr_dispatch groups format-description-groups by their first byte, thus 367 // shortening total lookups for some data header 368 // 369 // notice how the `ftyp` data formats aren't handled here, since these can 370 // start with any byte, instead of the literal value of the any-byte markers 371 // they use 372 // 373 // all entries are arrays which must always end with a special entry whose 374 // pattern-length is declared to be 0, since there's no explicit way to know 375 // the length of these arrays when looping on them 376 // 377 // all non-null entries are setup explicitly, later in the code 378 format_descriptor* hdr_dispatch[256] = { 379 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 380 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 381 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 382 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 383 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 384 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 385 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 386 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 387 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 388 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 389 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 390 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 391 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 392 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 393 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 394 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 395 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 396 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 397 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 398 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 399 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 400 NULL, NULL, NULL, NULL, 401 }; 402 403 // guess_mime tries to auto-detect a MIME-type from the header bytes given, 404 // using the lookup-tables 405 const char* guess_mime(unsigned char* buf, size_t len) { 406 if (len == 0) { 407 return NULL; 408 } 409 410 // try the patterns which allow any bytes at the very start 411 for (size_t i = 0; special_headers[i].header_length > 0; i++) { 412 if (match_header(buf, len, &special_headers[i])) { 413 return special_headers[i].mime; 414 } 415 } 416 417 format_descriptor* guesses = hdr_dispatch[buf[0]]; 418 if (guesses == NULL) { 419 return fallback_mime_type; 420 } 421 422 for (size_t i = 0; guesses[i].header_length > 0; i++) { 423 if (match_header(buf, len, &guesses[i])) { 424 return guesses[i].mime; 425 } 426 } 427 return fallback_mime_type; 428 } 429 430 bool is_mime_disabled(const char* mime) { 431 return (mime != NULL) && (mime[0] == 'n') && ( 432 strcmp(mime, "no") == 0 || 433 strcmp(mime, "nomime") == 0 || strcmp(mime, "no-mime") == 0 || 434 strcmp(mime, "none") == 0 || strcmp(mime, "not") == 0 435 ); 436 } 437 438 // start_data_uri starts the output by declaring the data-URI to be an 439 // auto-detected MIME-type; the return value is the auto-detection success 440 bool start_data_uri(FILE* w, unsigned char* buf, size_t len) { 441 const char* mime = guess_mime(buf, len); 442 if (is_mime_disabled(mime)) { 443 return true; 444 } 445 if (mime == NULL || mime[0] == 0) { 446 return false; 447 } 448 449 EMIT_CONST(w, "data:"); 450 for (size_t i = 0; mime[i] != 0; i++) { 451 putc(mime[i], w); 452 } 453 EMIT_CONST(w, ";base64,"); 454 return true; 455 } 456 457 const unsigned char base64_lookup[] = 458 "" 459 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" 460 ""; 461 462 inline uint32_t combine_triple(const unsigned char data[4]) { 463 return (data[0] << 16) | (data[1] << 8) | data[2]; 464 } 465 466 inline void emit_triple(FILE* w, uint32_t v) { 467 putc(base64_lookup[0x3f & (v >> 18)], w); 468 putc(base64_lookup[0x3f & (v >> 12)], w); 469 putc(base64_lookup[0x3f & (v >> 6)], w); 470 putc(base64_lookup[0x3f & v], w); 471 } 472 473 inline void emit_couple(FILE* w, uint32_t v) { 474 putc(base64_lookup[0x3f & (v >> 18)], w); 475 putc(base64_lookup[0x3f & (v >> 12)], w); 476 putc(base64_lookup[0x3f & (v >> 6)], w); 477 putc('=', w); 478 } 479 480 inline void emit_single(FILE* w, uint32_t v) { 481 putc(base64_lookup[0x3f & (v >> 18)], w); 482 putc(base64_lookup[0x3f & (v >> 12)], w); 483 putc('=', w); 484 putc('=', w); 485 } 486 487 bool handle_reader(FILE* w, FILE* src, const char* path) { 488 unsigned char buf[32 * 1024]; 489 size_t chunks = 0; 490 size_t where = 0; 491 492 // triple holds groups of 3 bytes at once, which is required by base64, 493 // except for the last few bytes of input, which are padded with equals; 494 // the 4th item is never used, but having it aligns things to 32 bits 495 unsigned char triple[4]; 496 497 triple[0] = 0; 498 triple[1] = 0; 499 triple[2] = 0; 500 triple[3] = 0; 501 502 while (!feof(w)) { 503 const size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src); 504 if (n < 1) { 505 // assume input is over when no bytes were read 506 break; 507 } 508 509 if (chunks == 0 && !start_data_uri(w, buf, n)) { 510 char* fmt = "\x1b[31mcan't auto-detect MIME type for %s\x1b[0m\n"; 511 putc('\n', w); 512 fprintf(stderr, fmt, path); 513 return false; 514 } 515 chunks++; 516 517 for (size_t i = 0; i < n; i++) { 518 triple[where] = buf[i]; 519 if (where < 2) { 520 where++; 521 } else { 522 emit_triple(w, combine_triple(triple)); 523 where = 0; 524 } 525 } 526 } 527 528 // empty inputs result in empty outputs 529 if (chunks == 0) { 530 return true; 531 } 532 533 // don't forget unemitted trailing bytes, if any: these need special 534 // handling, as they include `=` signs; if the input bytes were a 535 // multiple of 3, there won't be any trailing bytes 536 switch (where) { 537 case 1: 538 triple[1] = 0; 539 triple[2] = 0; 540 emit_single(w, combine_triple(triple)); 541 break; 542 case 2: 543 triple[2] = 0; 544 emit_couple(w, combine_triple(triple)); 545 break; 546 } 547 548 // end with a line-feed, so multiple input streams are each encoded in 549 // their own line 550 if (chunks > 0) { 551 putc('\n', w); 552 } 553 return true; 554 } 555 556 // handle_file handles data from the filename given; returns false only when 557 // an error happened 558 bool handle_file(FILE* w, const char* path) { 559 // a `-` filename stands for the standard input 560 if (path[0] == '-' && path[1] == 0) { 561 return handle_reader(w, stdin, stdin_name); 562 } 563 564 FILE* f = fopen(path, "rb"); 565 if (f == NULL) { 566 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 567 return false; 568 } 569 570 const bool ok = handle_reader(w, f, path); 571 fclose(f); 572 return ok; 573 } 574 575 // is_help_option simplifies control-flow for func run 576 bool is_help_option(char* s) { 577 return s[0] == '-' && ( 578 strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 || 579 strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0 580 ); 581 } 582 583 // is_fallback_option simplifies control-flow for func run 584 bool is_fallback_option(char* s) { 585 return s[0] == '-' && ( 586 strcmp(s, "-f") == 0 || strcmp(s, "-fallback") == 0 || 587 strcmp(s, "--f") == 0 || strcmp(s, "--fallback") == 0 588 ); 589 } 590 591 const char* fallback_aliases[] = { 592 // "text/json", "application/json", 593 594 // "xbmp", "image/x-bmp", 595 // "xflac", "audio/x-flac", 596 // "xicon", "image/x-icon", 597 // "xm4v", "video/x-m4v", 598 // "xsqlite3", "application/x-sqlite3", 599 // "xwav", "audio/x-wav", 600 // "xwave", "audio/x-wav", 601 // "x-bmp", "image/x-bmp", 602 // "x-flac", "audio/x-flac", 603 // "x-icon", "image/x-icon", 604 // "x-m4v", "video/x-m4v", 605 // "x-sqlite3", "application/x-sqlite3", 606 // "x-wav", "audio/x-wav", 607 608 "b", "application/octet-stream", 609 "j", "application/json", 610 "t", "text/plain", 611 "u", "text/plain; charset=UTF-8", 612 613 "e", "", 614 "err", "", 615 "error", "", 616 "f", "", 617 "fail", "", 618 619 "aac", "audio/aac", 620 "aif", "audio/aiff", 621 "bin", "application/octet-stream", 622 "binary", "application/octet-stream", 623 "gzip", "application/gzip", 624 "midi", "audio/midi", 625 "mpeg", "video/mpeg", 626 "octet", "application/octet-stream", 627 "octetstream", "application/octet-stream", 628 "octet-stream", "application/octet-stream", 629 "plain", "text/plain", 630 "sqlite", "application/x-sqlite3", 631 "svg+xml", "image/svg+xml", 632 "tif", "image/tiff", 633 "utf8", "text/plain; charset=UTF-8", 634 "utf-8", "text/plain; charset=UTF-8", 635 "wave", "audio/x-wav", 636 "zstd", "application/zstd", 637 638 "aiff", "audio/aiff", 639 "au", "audio/basic", 640 "avi", "video/avi", 641 "avif", "image/avif", 642 "bmp", "image/x-bmp", 643 "caf", "audio/x-caf", 644 "cur", "image/vnd.microsoft.icon", 645 "css", "text/css", 646 "csv", "text/csv", 647 "djvu", "image/x-djvu", 648 "elf", "application/x-elf", 649 "exe", "application/vnd.microsoft.portable-executable", 650 "flac", "audio/x-flac", 651 "gif", "image/gif", 652 "gz", "application/gzip", 653 "heic", "image/heic", 654 "htm", "text/html", 655 "html", "text/html", 656 "ico", "image/x-icon", 657 "iso", "application/octet-stream", 658 "jpg", "image/jpeg", 659 "jpeg", "image/jpeg", 660 "js", "application/javascript", 661 "json", "application/json", 662 "m4a", "audio/aac", 663 "m4v", "video/x-m4v", 664 "mid", "audio/midi", 665 "mov", "video/quicktime", 666 "mp4", "video/mp4", 667 "mp3", "audio/mpeg", 668 "mpg", "video/mpeg", 669 "ogg", "audio/ogg", 670 "opus", "audio/opus", 671 "pdf", "application/pdf", 672 "png", "image/png", 673 "ps", "application/postscript", 674 "psd", "image/vnd.adobe.photoshop", 675 "rtf", "application/rtf", 676 "sqlite3", "application/x-sqlite3", 677 "svg", "image/svg+xml", 678 "text", "text/plain", 679 "tiff", "image/tiff", 680 "tsv", "text/tsv", 681 "wasm", "application/wasm", 682 "wav", "audio/x-wav", 683 "webp", "image/webp", 684 "webm", "video/webm", 685 "xml", "application/xml", 686 "zip", "application/zip", 687 "zst", "application/zstd", 688 }; 689 690 const char* resolve_alias(char* name) { 691 const size_t n = sizeof(fallback_aliases) / sizeof(fallback_aliases[0]); 692 for (size_t i = 0; i < n; i += 2) { 693 if (strcmp(name, fallback_aliases[i]) == 0) { 694 return fallback_aliases[i + 1]; 695 } 696 } 697 return name; 698 } 699 700 // run returns the number of errors 701 int run(int argc, char** argv, FILE* w) { 702 // handle special cmd-line options 703 for (size_t i = 1; i < argc; i++) { 704 if (is_help_option(argv[i])) { 705 // help option is handled right away, also quitting the app 706 puts(info); 707 return 0; 708 } 709 } 710 711 size_t files = 0; 712 size_t errors = 0; 713 bool change_fallback = false; 714 715 // handle all filenames given 716 for (size_t i = 1; i < argc && !feof(w); i++) { 717 if (change_fallback) { 718 fallback_mime_type = resolve_alias(argv[i]); 719 change_fallback = false; 720 continue; 721 } 722 723 if (is_fallback_option(argv[i])) { 724 change_fallback = true; 725 continue; 726 } 727 728 if (!handle_file(w, argv[i])) { 729 errors++; 730 } 731 files++; 732 } 733 734 if (change_fallback) { 735 fprintf(stderr, "\x1b[31mforgot new fallback MIME-type\x1b[0m\n"); 736 errors++; 737 } 738 739 // no filenames means use stdin as the only input 740 if (files == 0) { 741 if (!handle_reader(w, stdin, stdin_name)) { 742 errors++; 743 } 744 } 745 746 return errors; 747 } 748 749 int main(int argc, char** argv) { 750 #ifdef _WIN32 751 setmode(fileno(stdin), O_BINARY); 752 // ensure output lines end in LF instead of CRLF on windows 753 setmode(fileno(stdout), O_BINARY); 754 setmode(fileno(stderr), O_BINARY); 755 #endif 756 757 // fill entries in the type-detect dispatch table 758 hdr_dispatch[0] = hdr_dispatch_0; // 0 759 hdr_dispatch[26] = hdr_dispatch_26; // 26 760 hdr_dispatch[31] = hdr_dispatch_31; // 31 761 hdr_dispatch[35] = hdr_dispatch_35; // 35 # 762 hdr_dispatch[37] = hdr_dispatch_37; // 37 % 763 hdr_dispatch[40] = hdr_dispatch_40; // 40 ( 764 hdr_dispatch[46] = hdr_dispatch_46; // 46 . 765 hdr_dispatch[56] = hdr_dispatch_56; // 56 8 766 hdr_dispatch[60] = hdr_dispatch_60; // 60 < 767 hdr_dispatch[65] = hdr_dispatch_65; // 65 A 768 hdr_dispatch[66] = hdr_dispatch_66; // 66 B 769 hdr_dispatch[70] = hdr_dispatch_70; // 70 F 770 hdr_dispatch[71] = hdr_dispatch_71; // 71 G 771 hdr_dispatch[73] = hdr_dispatch_73; // 73 I 772 hdr_dispatch[77] = hdr_dispatch_77; // 77 M 773 hdr_dispatch[79] = hdr_dispatch_79; // 79 O 774 hdr_dispatch[80] = hdr_dispatch_80; // 80 P 775 hdr_dispatch[82] = hdr_dispatch_82; // 82 R 776 hdr_dispatch[83] = hdr_dispatch_83; // 83 S 777 hdr_dispatch[99] = hdr_dispatch_99; // 99 c 778 hdr_dispatch[102] = hdr_dispatch_102; // 102 f 779 hdr_dispatch[123] = hdr_dispatch_123; // 123 { 780 hdr_dispatch[127] = hdr_dispatch_127; // 127 781 hdr_dispatch[137] = hdr_dispatch_137; // 137 782 hdr_dispatch[255] = hdr_dispatch_255; // 255 783 784 return run(argc, argv, stdout) == 0 ? 0 : 1; 785 }