File: datauri.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./datauri ./datauri.c 29 */ 30 31 #include <stdbool.h> 32 #include <stdint.h> 33 #include <stdio.h> 34 #include <string.h> 35 36 #ifdef _WIN32 37 #include <windows.h> 38 #endif 39 40 #ifdef RED_ERRORS 41 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 42 #ifdef __APPLE__ 43 #define ERROR_STYLE "\x1b[31m" 44 #endif 45 #define RESET_STYLE "\x1b[0m" 46 #else 47 #define ERROR_STYLE 48 #define RESET_STYLE 49 #endif 50 51 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n") 52 53 #ifndef IBUF_SIZE 54 #define IBUF_SIZE (32 * 1024) 55 #endif 56 57 #ifndef OBUF_SIZE 58 #define OBUF_SIZE (8 * 1024) 59 #endif 60 61 const char* info = "" 62 "datauri [options...] [filenames...]\n" 63 "\n" 64 "\n" 65 "Encode bytes as data-URIs, auto-detecting the file/data type using the first\n" 66 "few bytes from each data/file stream. When given multiple inputs, the output\n" 67 "will be multiple lines, one for each file given.\n" 68 "\n" 69 "Empty files/inputs result in empty lines. A simple dash (-) stands for the\n" 70 "standard-input, which is also used automatically when not given any files.\n" 71 "\n" 72 "Data-URIs are base64-encoded text representations of arbitrary data, which\n" 73 "include their payload's MIME-type, and which are directly useable/shareable\n" 74 "in web-browsers as links, despite not looking like normal links/URIs.\n" 75 "\n" 76 "Some web-browsers limit the size of handled data-URIs to tens of kilobytes.\n" 77 "\n" 78 "\n" 79 "Options, also available with leading double-dashes\n" 80 "\n" 81 " -h show this help message\n" 82 " -help show this help message\n" 83 "\n" 84 " -f change the fallback MIME type using the next argument\n" 85 " -fallback change the fallback MIME type using the next argument\n" 86 "\n" 87 " -m change the fallback MIME type using the next argument\n" 88 " -mime change the fallback MIME type using the next argument\n" 89 "\n" 90 " -t change the fallback MIME type using the next argument\n" 91 " -type change the fallback MIME type using the next argument\n" 92 ""; 93 94 const char* stdin_name = "<stdin>"; 95 96 #define default_mime_fallback "application/octet-stream" 97 98 const char* fallback_mime_type = default_mime_fallback; 99 100 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at 101 // stdout, it considerably speeds up this app, as intended 102 typedef struct bufwriter { 103 // buf is the buffer proper 104 unsigned char* buf; 105 106 // len is how many bytes of the buffer are currently being used 107 size_t len; 108 109 // cap is the capacity of the buffer, or the most bytes it can hold 110 size_t cap; 111 112 // out is the destination of all that's written into the buffer 113 FILE* out; 114 } bufwriter; 115 116 void init_bufwriter(bufwriter* w, FILE* out, unsigned char* b, size_t cap) { 117 w->buf = b; 118 w->len = 0; 119 w->cap = cap; 120 w->out = out; 121 } 122 123 static inline void write_byte(bufwriter* w, unsigned char b) { 124 if (w->len < w->cap) { 125 w->buf[w->len++] = b; 126 return; 127 } 128 129 fwrite(w->buf, 1, w->cap, w->out); 130 w->buf[0] = b; 131 w->len = 1; 132 } 133 134 void write_string(bufwriter* w, const char* s) { 135 for (; *s != 0; s++) { 136 write_byte(w, *s); 137 } 138 } 139 140 void flush(bufwriter* w) { 141 if (w->len > 0) { 142 fwrite(w->buf, 1, w->len, w->out); 143 } 144 w->len = 0; 145 fflush(w->out); 146 } 147 148 // can be anything: ensure this value differs from all other literal bytes 149 // in the generic-headers table: failing that, its value could cause subtle 150 // type-misdetection bugs; the value is chosen to be `obviously` findable 151 // in the source, which also implies a constant beyond the ascii range, as 152 // ascii char-constants are also used in the tables 153 const unsigned char cba = 0xfd; // 253 154 155 #define aiff "audio/aiff" 156 #define au "audio/basic" 157 #define avi "video/avi" 158 #define avif "image/avif" 159 #define bmp "image/x-bmp" 160 #define caf "audio/x-caf" 161 #define cur "image/vnd.microsoft.icon" 162 #define css "text/css" 163 #define csv "text/csv" 164 #define djvu "image/x-djvu" 165 #define elf "application/x-elf" 166 #define exe "application/vnd.microsoft.portable-executable" 167 #define flac "audio/x-flac" 168 #define gif "image/gif" 169 #define gz "application/gzip" 170 #define heic "image/heic" 171 #define htm "text/html" 172 #define html "text/html" 173 #define ico "image/x-icon" 174 #define iso "application/octet-stream" 175 #define jpeg "image/jpeg" 176 #define js "application/javascript" 177 #define json "application/json" 178 #define m4a "audio/aac" 179 #define m4v "video/x-m4v" 180 #define midi "audio/midi" 181 #define mov "video/quicktime" 182 #define mp4 "video/mp4" 183 #define mp3 "audio/mpeg" 184 #define mpg "video/mpeg" 185 #define octet "application/octet-stream" 186 #define ogg "audio/ogg" 187 #define opus "audio/opus" 188 #define pdf "application/pdf" 189 #define png "image/png" 190 #define ps "application/postscript" 191 #define psd "image/vnd.adobe.photoshop" 192 #define rtf "application/rtf" 193 #define sqlite3 "application/x-sqlite3" 194 #define svg "image/svg+xml" 195 #define text "text/plain" 196 #define tiff "image/tiff" 197 #define tsv "text/tsv" 198 #define utf8 "text/plain; charset=UTF-8" 199 #define wasm "application/wasm" 200 #define wav "audio/x-wav" 201 #define webp "image/webp" 202 #define webm "video/webm" 203 #define xml "application/xml" 204 #define zip "application/zip" 205 #define zst "application/zstd" 206 207 // format_descriptor ties a file-header pattern to its data-format type; 208 // the 15-byte header-limit nicely aligns with the 1-byte length before it 209 typedef struct format_descriptor { 210 unsigned char header_length; 211 unsigned char header_bytes[15]; 212 const char* mime; 213 } format_descriptor; 214 215 // starts_as tries to match header data to the pattern given: this includes 216 // allowing `any byte` when the pattern indicates so, using a value reserved 217 // for that purpose 218 bool starts_as(const uint8_t* x, size_t xlen, const uint8_t* y, size_t ylen) { 219 // when header data aren't enough for a pattern, there's no match 220 if (xlen < ylen) { 221 return false; 222 } 223 224 for (size_t i = 0; i < ylen; i++) { 225 if (y[i] == cba) { 226 // `can be anything` value always matches 227 continue; 228 } 229 230 if (x[i] != y[i]) { 231 return false; 232 } 233 } 234 235 return true; 236 } 237 238 // not confident enough to actually use this, and replace all table entries 239 #define start_format_descriptor(...) \ 240 sizeof((unsigned char[]){ __VA_ARGS__ }) / sizeof(unsigned char), \ 241 { __VA_ARGS__ } 242 243 // format markers with leading wildcards, which should be checked before the 244 // normal ones: this is to prevent mismatches with the latter types, even 245 // though you can make probabilistic arguments which suggest these mismatches 246 // should be very unlikely in practice 247 format_descriptor special_headers[] = { 248 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a}, 249 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a}, 250 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4}, 251 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4}, 252 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v}, 253 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov}, 254 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic}, 255 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif}, 256 // { 257 // 24, 258 // { 259 // cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 260 // 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', 261 // }, 262 // m4a, 263 // }, 264 {0}, 265 }; 266 267 format_descriptor hdr_dispatch_0[] = { 268 {4, {000, 000, 001, 0xBA}, mpg}, 269 {4, {000, 000, 001, 0xB3}, mpg}, 270 {4, {000, 000, 001, 000}, ico}, 271 {4, {000, 000, 002, 000}, cur}, 272 {4, {000, 'a', 's', 'm'}, wasm}, 273 {0}, 274 }; 275 276 format_descriptor hdr_dispatch_26[] = { 277 {4, {0x1A, 0x45, 0xDF, 0xA3}, webm}, 278 {0}, 279 }; 280 281 format_descriptor hdr_dispatch_31[] = { 282 // {4, {0x1F, 0x8B, 0x08, 0x08}, gz}, 283 {3, {0x1F, 0x8B, 0x08}, gz}, 284 {0}, 285 }; 286 287 format_descriptor hdr_dispatch_35[] = { 288 {3, "#! ", text}, 289 {3, "#!/", text}, 290 {0}, 291 }; 292 293 format_descriptor hdr_dispatch_37[] = { 294 {4, "%PDF", pdf}, 295 {4, "%!PS", ps}, 296 {0}, 297 }; 298 299 format_descriptor hdr_dispatch_40[] = { 300 {4, {0x28, 0xB5, 0x2F, 0xFD}, zst}, 301 {0}, 302 }; 303 304 format_descriptor hdr_dispatch_46[] = { 305 {4, ".snd", au}, 306 {0}, 307 }; 308 309 format_descriptor hdr_dispatch_56[] = { 310 {4, "8BPS", psd}, 311 {0}, 312 }; 313 314 format_descriptor hdr_dispatch_60[] = { 315 {15, "<!DOCTYPE html>", html}, 316 {15, "<!DOCTYPE html ", html}, 317 {5, "<svg>", svg}, 318 {5, "<svg ", svg}, 319 {6, "<html>", html}, 320 {6, "<html ", html}, 321 {6, "<head>", html}, 322 {6, "<head ", html}, 323 {6, "<body>", html}, 324 {6, "<body ", html}, 325 {6, "<?xml>", xml}, 326 {6, "<?xml ", xml}, 327 {0}, 328 }; 329 330 format_descriptor hdr_dispatch_65[] = { 331 { 332 15, 333 { 334 'A', 'T', '&', 'T', 'F', 'O', 'R', 'M', 335 cba, cba, cba, cba, 'D', 'J', 'V', 336 }, 337 djvu, 338 }, 339 {0}, 340 }; 341 342 format_descriptor hdr_dispatch_66[] = { 343 { 344 15, 345 { 346 'B', 'M', cba, cba, cba, cba, cba, cba, 347 cba, cba, cba, cba, cba, cba, 0x28, 348 }, 349 bmp, 350 }, 351 {0}, 352 }; 353 354 format_descriptor hdr_dispatch_70[] = { 355 {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff}, 356 {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff}, 357 {0}, 358 }; 359 360 format_descriptor hdr_dispatch_71[] = { 361 {6, "GIF87a", gif}, 362 {6, "GIF89a", gif}, 363 {0}, 364 }; 365 366 format_descriptor hdr_dispatch_73[] = { 367 {4, {'I', 'D', '3', 2}, mp3}, // ID3-format metadata 368 {4, {'I', 'D', '3', 3}, mp3}, // ID3-format metadata 369 {4, {'I', 'D', '3', 4}, mp3}, // ID3-format metadata 370 {4, {'I', 'I', '*', 000}, tiff}, 371 {0}, 372 }; 373 374 format_descriptor hdr_dispatch_77[] = { 375 {4, {'M', 'M', 000, '*'}, tiff}, 376 {4, "MThd", midi}, 377 {6, {'M', 'Z', cba, 000, cba, 000}, exe}, 378 // {6, {'M', 'Z', 0x90, 000, 003, 000}, exe}, 379 // {6, {'M', 'Z', 0x78, 000, 001, 000}, exe}, 380 // {6, {'M', 'Z', 'P', 000, 002, 000}, exe}, 381 {0}, 382 }; 383 384 format_descriptor hdr_dispatch_79[] = { 385 {4, "OggS", ogg}, 386 {0}, 387 }; 388 389 format_descriptor hdr_dispatch_80[] = { 390 {4, {'P', 'K', 003, 004}, zip}, 391 {0}, 392 }; 393 394 format_descriptor hdr_dispatch_82[] = { 395 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp}, 396 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav}, 397 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi}, 398 {0}, 399 }; 400 401 // format_descriptor hdr_dispatch_83[] = { 402 // // {16, "SQLite format 3\x00", sqlite3}, 403 // {0}, 404 // }; 405 406 format_descriptor hdr_dispatch_99[] = { 407 {8, {'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf}, 408 {0}, 409 }; 410 411 format_descriptor hdr_dispatch_102[] = { 412 {4, "fLaC", flac}, 413 {0}, 414 }; 415 416 format_descriptor hdr_dispatch_123[] = { 417 {4, "{\\rtf", rtf}, 418 {0}, 419 }; 420 421 format_descriptor hdr_dispatch_127[] = { 422 {4, {127, 'E', 'L', 'F'}, elf}, 423 {0}, 424 }; 425 426 format_descriptor hdr_dispatch_137[] = { 427 {8, {0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png}, 428 {0}, 429 }; 430 431 format_descriptor hdr_dispatch_255[] = { 432 {3, {0xFF, 0xD8, 0xFF}, jpeg}, 433 {5, {0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3}, 434 {2, {0xFF, 0xFB}, mp3}, 435 {0}, 436 }; 437 438 // hdr_dispatch groups format-description-groups by their first byte, thus 439 // shortening total lookups for some data header 440 // 441 // notice how the `ftyp` data formats aren't handled here, since these can 442 // start with any byte, instead of the literal value of the any-byte markers 443 // they use 444 // 445 // all entries are arrays which must always end with a special entry whose 446 // pattern-length is declared to be 0, since there's no explicit way to know 447 // the length of these arrays when looping on them 448 // 449 // all non-null entries are setup explicitly, later in the code 450 format_descriptor* hdr_dispatch[256] = { 451 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 452 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 453 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 454 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 455 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 456 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 457 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 458 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 459 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 460 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 461 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 462 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 463 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 464 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 465 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 466 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 467 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 468 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 469 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 470 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 471 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 472 NULL, NULL, NULL, NULL, 473 }; 474 475 // guess_mime tries to auto-detect a MIME-type from the header bytes given, 476 // using the lookup-tables 477 const char* guess_mime(const unsigned char* buf, size_t len) { 478 if (len == 0) { 479 return NULL; 480 } 481 482 // just in case, start with the patterns which allow any first byte 483 for (size_t i = 0; special_headers[i].header_length > 0; i++) { 484 const unsigned char* hb = special_headers[i].header_bytes; 485 const size_t hl = special_headers[i].header_length; 486 if (starts_as(buf, len, hb, hl)) { 487 return special_headers[i].mime; 488 } 489 } 490 491 // the m4a-dash header exceeds the 15-byte limit of the lookup tables 492 const uint8_t header1[24] = { 493 cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 494 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', 495 }; 496 if (starts_as(buf, len, header1, sizeof(header1))) { 497 return m4a; 498 } 499 500 // the sqlite3 header exceeds the 15-byte limit of the lookup tables 501 const uint8_t header2[16] = "SQLite format 3\x00"; 502 if (starts_as(buf, len, header2, sizeof(header2))) { 503 return sqlite3; 504 } 505 506 format_descriptor* guesses = hdr_dispatch[buf[0]]; 507 if (guesses == NULL) { 508 return fallback_mime_type; 509 } 510 511 for (size_t i = 0; guesses[i].header_length > 0; i++) { 512 const unsigned char* hb = guesses[i].header_bytes; 513 const size_t hl = guesses[i].header_length; 514 if (starts_as(buf, len, hb, hl)) { 515 return guesses[i].mime; 516 } 517 } 518 519 return fallback_mime_type; 520 } 521 522 bool is_mime_disabled(const char* mime) { 523 return (mime != NULL) && (mime[0] == 'n') && ( 524 strcmp(mime, "no") == 0 || 525 strcmp(mime, "nomime") == 0 || 526 strcmp(mime, "no-mime") == 0 || 527 strcmp(mime, "none") == 0 || 528 strcmp(mime, "not") == 0 529 ); 530 } 531 532 // start_data_uri starts the output by declaring the data-URI to be an 533 // auto-detected MIME-type; the return value is the auto-detection success 534 bool start_data_uri(bufwriter* w, const unsigned char* buf, size_t len) { 535 const char* mime = guess_mime(buf, len); 536 if (is_mime_disabled(mime)) { 537 return true; 538 } 539 if (mime == NULL || mime[0] == 0) { 540 return false; 541 } 542 543 write_string(w, "data:"); 544 for (size_t i = 0; mime[i] != 0; i++) { 545 write_byte(w, mime[i]); 546 } 547 write_string(w, ";base64,"); 548 return true; 549 } 550 551 const unsigned char base64_lookup[64] = 552 "" 553 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" 554 ""; 555 556 static inline uint32_t combine_triple(const unsigned char data[3]) { 557 return (data[0] << 16) | (data[1] << 8) | (data[2] << 0); 558 } 559 560 static inline void emit_triple(bufwriter* w, uint32_t v) { 561 write_byte(w, base64_lookup[0x3f & (v >> 18)]); 562 write_byte(w, base64_lookup[0x3f & (v >> 12)]); 563 write_byte(w, base64_lookup[0x3f & (v >> 6)]); 564 write_byte(w, base64_lookup[0x3f & (v >> 0)]); 565 } 566 567 void emit_couple(bufwriter* w, uint32_t v) { 568 write_byte(w, base64_lookup[0x3f & (v >> 18)]); 569 write_byte(w, base64_lookup[0x3f & (v >> 12)]); 570 write_byte(w, base64_lookup[0x3f & (v >> 6)]); 571 write_byte(w, '='); 572 } 573 574 void emit_single(bufwriter* w, uint32_t v) { 575 write_byte(w, base64_lookup[0x3f & (v >> 18)]); 576 write_byte(w, base64_lookup[0x3f & (v >> 12)]); 577 write_byte(w, '='); 578 write_byte(w, '='); 579 } 580 581 bool handle_reader(bufwriter* w, FILE* src, const char* path) { 582 unsigned char buf[IBUF_SIZE]; 583 uint64_t bytes = 0; 584 585 // triple holds groups of 3 bytes at once, which is required by base64, 586 // except for the last few bytes of input, which are padded with equals 587 unsigned char triple[3]; 588 triple[0] = 0; 589 triple[1] = 0; 590 triple[2] = 0; 591 592 while (!feof(w->out)) { 593 const size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src); 594 if (n < 1) { 595 // assume input is over when no bytes were read 596 break; 597 } 598 599 if (bytes == 0 && !start_data_uri(w, buf, n)) { 600 write_byte(w, '\n'); 601 flush(w); 602 const char* msg = "can't auto-detect MIME type for"; 603 fprintf(stderr, ERROR_LINE("%s %s"), msg, path); 604 return false; 605 } 606 607 // unrolling loop doesn't seem to outperform compiling this with -O3 608 size_t where = bytes % 3; 609 for (size_t i = 0; i < n; i++) { 610 triple[where++] = buf[i]; 611 if (where == 3) { 612 emit_triple(w, combine_triple(triple)); 613 where = 0; 614 } 615 } 616 bytes += n; 617 } 618 619 // empty inputs result in empty outputs 620 if (bytes == 0) { 621 return true; 622 } 623 624 // don't forget unemitted trailing bytes, if any: these need special 625 // handling, as they include `=` signs; if the input bytes were a 626 // multiple of 3, there won't be any trailing bytes 627 switch (bytes % 3) { 628 case 1: 629 triple[1] = 0; 630 triple[2] = 0; 631 emit_single(w, combine_triple(triple)); 632 break; 633 case 2: 634 triple[2] = 0; 635 emit_couple(w, combine_triple(triple)); 636 break; 637 } 638 639 // end with a line-feed, so multiple input streams are each encoded in 640 // their own line 641 if (bytes > 0) { 642 write_byte(w, '\n'); 643 flush(w); 644 } 645 return true; 646 } 647 648 // handle_file handles data from the filename given; returns false only when 649 // an error happened 650 bool handle_file(bufwriter* w, const char* path) { 651 // a `-` filename stands for the standard input 652 if (path[0] == '-' && path[1] == 0) { 653 return handle_reader(w, stdin, stdin_name); 654 } 655 656 FILE* f = fopen(path, "rb"); 657 if (f == NULL) { 658 fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path); 659 return false; 660 } 661 662 const bool ok = handle_reader(w, f, path); 663 fclose(f); 664 return ok; 665 } 666 667 // is_help_option simplifies control-flow for func run 668 bool is_help_option(const char* s) { 669 return s[0] == '-' && ( 670 strcmp(s, "-h") == 0 || 671 strcmp(s, "-help") == 0 || 672 strcmp(s, "--h") == 0 || 673 strcmp(s, "--help") == 0 674 ); 675 } 676 677 // is_fallback_option simplifies control-flow for func run 678 bool is_fallback_option(const char* s) { 679 return s[0] == '-' && ( 680 strcmp(s, "-f") == 0 || 681 strcmp(s, "-fallback") == 0 || 682 strcmp(s, "--f") == 0 || 683 strcmp(s, "--fallback") == 0 || 684 strcmp(s, "-m") == 0 || 685 strcmp(s, "-mime") == 0 || 686 strcmp(s, "--m") == 0 || 687 strcmp(s, "--mime") == 0 || 688 strcmp(s, "-t") == 0 || 689 strcmp(s, "-type") == 0 || 690 strcmp(s, "--t") == 0 || 691 strcmp(s, "--type") == 0 692 ); 693 } 694 695 const char* fallback_aliases[196] = { 696 // tiny shortcuts 697 "b", octet, 698 "j", json, 699 "t", text, 700 "u", utf8, 701 702 // failure fallbacks 703 "e", "", 704 "err", "", 705 "error", "", 706 "f", "", 707 "fail", "", 708 709 // common mistakes 710 "text/json", json, 711 712 // other 713 "", default_mime_fallback, 714 "default", default_mime_fallback, 715 716 "aif", aiff, 717 "aiff", aiff, 718 "au", au, 719 "avi", avi, 720 "avif", avif, 721 "bmp", bmp, 722 "caf", caf, 723 "cur", cur, 724 "css", css, 725 "csv", csv, 726 "djvu", djvu, 727 "elf", elf, 728 "exe", exe, 729 "flac", flac, 730 "gif", gif, 731 "gz", gz, 732 "heic", heic, 733 "html", html, 734 "ico", ico, 735 "iso", iso, 736 "jpg", jpeg, 737 "jpeg", jpeg, 738 "js", js, 739 "json", json, 740 "m4a", m4a, 741 "m4v", m4v, 742 "midi", midi, 743 "mov", mov, 744 "mp4", mp4, 745 "mp3", mp3, 746 "mpeg", mpg, 747 "ogg", ogg, 748 "opus", opus, 749 "pdf", pdf, 750 "png", png, 751 "ps", ps, 752 "psd", psd, 753 "rtf", rtf, 754 "sqlite3", sqlite3, 755 "svg", svg, 756 "text", text, 757 "tiff", tiff, 758 "tsv", tsv, 759 "wasm", wasm, 760 "wav", wav, 761 "webp", webp, 762 "webm", webm, 763 "xml", xml, 764 "zip", zip, 765 "zst", zst, 766 767 // longer shortcuts 768 "aac", m4a, 769 "aif", aiff, 770 "bin", octet, 771 "binary", octet, 772 "bits", octet, 773 "gzip", gz, 774 "htm", htm, 775 "mid", midi, 776 "mpg", mpg, 777 "octet", octet, 778 "octets", octet, 779 "octetstream", octet, 780 "octet-stream", octet, 781 "plain", text, 782 "sqlite", sqlite3, 783 "svg+xml", svg, 784 "tif", tiff, 785 "utf8", utf8, 786 "utf-8", utf8, 787 "xbmp", bmp, 788 "xcaf", caf, 789 "xflac", flac, 790 "xicon", ico, 791 "xm4v", m4v, 792 "xsqlite3", sqlite3, 793 "xwav", wav, 794 "xwave", wav, 795 "x-bmp", bmp, 796 "x-caf", caf, 797 "x-flac", flac, 798 "x-icon", ico, 799 "x-m4v", m4v, 800 "x-sqlite3", sqlite3, 801 "x-wav", wav, 802 "wave", wav, 803 "zstd", zst, 804 }; 805 806 const char* resolve_alias(const char* name) { 807 const size_t n = sizeof(fallback_aliases) / sizeof(fallback_aliases[0]); 808 for (size_t i = 0; i < n; i += 2) { 809 if (strcmp(name, fallback_aliases[i]) == 0) { 810 return fallback_aliases[i + 1]; 811 } 812 } 813 return name; 814 } 815 816 // run returns the number of errors 817 int run(int argc, char** argv, FILE* w) { 818 unsigned char outbuf[OBUF_SIZE]; 819 bufwriter bw; 820 init_bufwriter(&bw, w, outbuf, sizeof(outbuf)); 821 822 size_t files = 0; 823 size_t errors = 0; 824 bool change_fallback = false; 825 bool options = true; 826 827 // handle all filenames given 828 for (size_t i = 1; i < argc && !feof(w); i++) { 829 if (argv[i][0] == '-' && argv[i][1] == '-' && argv[i][2] == 0) { 830 options = false; 831 continue; 832 } 833 834 if (change_fallback) { 835 fallback_mime_type = resolve_alias(argv[i]); 836 change_fallback = false; 837 continue; 838 } 839 840 if (options && is_fallback_option(argv[i])) { 841 change_fallback = true; 842 continue; 843 } 844 845 if (!handle_file(&bw, argv[i])) { 846 errors++; 847 } 848 files++; 849 } 850 851 if (change_fallback) { 852 flush(&bw); 853 fprintf(stderr, ERROR_LINE("forgot new fallback MIME-type")); 854 errors++; 855 return errors; 856 } 857 858 // no filenames means use stdin as the only input 859 if (files == 0) { 860 if (!handle_reader(&bw, stdin, stdin_name)) { 861 errors++; 862 } 863 } 864 865 flush(&bw); 866 return errors; 867 } 868 869 int main(int argc, char** argv) { 870 #ifdef _WIN32 871 setmode(fileno(stdin), O_BINARY); 872 // ensure output lines end in LF instead of CRLF on windows 873 setmode(fileno(stdout), O_BINARY); 874 setmode(fileno(stderr), O_BINARY); 875 #endif 876 877 if (argc > 1 && is_help_option(argv[1])) { 878 printf("%s", info); 879 return 0; 880 } 881 882 // fill entries in the type-detection dispatch table 883 memset(hdr_dispatch, 0, sizeof(hdr_dispatch)); 884 hdr_dispatch[0] = hdr_dispatch_0; // 0 885 hdr_dispatch[26] = hdr_dispatch_26; // 26 886 hdr_dispatch[31] = hdr_dispatch_31; // 31 887 hdr_dispatch[35] = hdr_dispatch_35; // 35 # 888 hdr_dispatch[37] = hdr_dispatch_37; // 37 % 889 hdr_dispatch[40] = hdr_dispatch_40; // 40 ( 890 hdr_dispatch[46] = hdr_dispatch_46; // 46 . 891 hdr_dispatch[56] = hdr_dispatch_56; // 56 8 892 hdr_dispatch[60] = hdr_dispatch_60; // 60 < 893 hdr_dispatch[65] = hdr_dispatch_65; // 65 A 894 hdr_dispatch[66] = hdr_dispatch_66; // 66 B 895 hdr_dispatch[70] = hdr_dispatch_70; // 70 F 896 hdr_dispatch[71] = hdr_dispatch_71; // 71 G 897 hdr_dispatch[73] = hdr_dispatch_73; // 73 I 898 hdr_dispatch[77] = hdr_dispatch_77; // 77 M 899 hdr_dispatch[79] = hdr_dispatch_79; // 79 O 900 hdr_dispatch[80] = hdr_dispatch_80; // 80 P 901 hdr_dispatch[82] = hdr_dispatch_82; // 82 R 902 // hdr_dispatch[83] = hdr_dispatch_83; // 83 S 903 hdr_dispatch[99] = hdr_dispatch_99; // 99 c 904 hdr_dispatch[102] = hdr_dispatch_102; // 102 f 905 hdr_dispatch[123] = hdr_dispatch_123; // 123 { 906 hdr_dispatch[127] = hdr_dispatch_127; // 127 907 hdr_dispatch[137] = hdr_dispatch_137; // 137 908 hdr_dispatch[255] = hdr_dispatch_255; // 255 909 910 return run(argc, argv, stdout) == 0 ? 0 : 1; 911 }