File: datauri.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./datauri ./datauri.c 29 */ 30 31 #include <stdbool.h> 32 #include <stdint.h> 33 #include <stdio.h> 34 #include <string.h> 35 36 #ifdef _WIN32 37 #include <windows.h> 38 #endif 39 40 #ifdef RED_ERRORS 41 #define ERROR_STYLE "\x1b[38;2;204;0;0m" 42 #ifdef __APPLE__ 43 #define ERROR_STYLE "\x1b[31m" 44 #endif 45 #define RESET_STYLE "\x1b[0m" 46 #else 47 #define ERROR_STYLE 48 #define RESET_STYLE 49 #endif 50 51 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n") 52 53 #ifndef IBUF_SIZE 54 #define IBUF_SIZE (32 * 1024) 55 #endif 56 57 #ifndef OBUF_SIZE 58 #define OBUF_SIZE (8 * 1024) 59 #endif 60 61 const char* info = "" 62 "datauri [options...] [filenames...]\n" 63 "\n" 64 "\n" 65 "Encode bytes as data-URIs, auto-detecting the file/data type using the first\n" 66 "few bytes from each data/file stream. When given multiple inputs, the output\n" 67 "will be multiple lines, one for each file given.\n" 68 "\n" 69 "Empty files/inputs result in empty lines. A simple dash (-) stands for the\n" 70 "standard-input, which is also used automatically when not given any files.\n" 71 "\n" 72 "Data-URIs are base64-encoded text representations of arbitrary data, which\n" 73 "include their payload's MIME-type, and which are directly useable/shareable\n" 74 "in web-browsers as links, despite not looking like normal links/URIs.\n" 75 "\n" 76 "Some web-browsers limit the size of handled data-URIs to tens of kilobytes.\n" 77 "\n" 78 "\n" 79 "Options\n" 80 "\n" 81 " -h, -help, --h, --help show this help message\n" 82 " -f, -fallback, --f, --fallback change the fallback MIME type\n" 83 ""; 84 85 const char* stdin_name = "<stdin>"; 86 87 #define octet "application/octet-stream" 88 89 const char* fallback_mime_type = octet; 90 91 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at 92 // stdout, it considerably speeds up this app, as intended 93 typedef struct bufwriter { 94 // buf is the buffer proper 95 unsigned char* buf; 96 97 // len is how many bytes of the buffer are currently being used 98 size_t len; 99 100 // cap is the capacity of the buffer, or the most bytes it can hold 101 size_t cap; 102 103 // out is the destination of all that's written into the buffer 104 FILE* out; 105 } bufwriter; 106 107 void init_bufwriter(bufwriter* w, FILE* out, unsigned char* b, size_t cap) { 108 w->buf = b; 109 w->len = 0; 110 w->cap = cap; 111 w->out = out; 112 } 113 114 static inline void write_byte(bufwriter* w, unsigned char b) { 115 if (w->len < w->cap) { 116 w->buf[w->len++] = b; 117 return; 118 } 119 120 fwrite(w->buf, 1, w->cap, w->out); 121 w->buf[0] = b; 122 w->len = 1; 123 } 124 125 void write_string(bufwriter* w, const char* s) { 126 for (; *s != 0; s++) { 127 write_byte(w, *s); 128 } 129 } 130 131 void flush(bufwriter* w) { 132 if (w->len > 0) { 133 fwrite(w->buf, 1, w->len, w->out); 134 } 135 w->len = 0; 136 fflush(w->out); 137 } 138 139 // can be anything: ensure this value differs from all other literal bytes 140 // in the generic-headers table: failing that, its value could cause subtle 141 // type-misdetection bugs; the value is chosen to be `obviously` findable 142 // in the source, which also implies a constant beyond the ascii range, as 143 // ascii char-constants are also used in the tables 144 const unsigned char cba = 0xfd; // 253 145 146 #define aiff "audio/aiff" 147 #define au "audio/basic" 148 #define avi "video/avi" 149 #define avif "image/avif" 150 #define bmp "image/x-bmp" 151 #define caf "audio/x-caf" 152 #define cur "image/vnd.microsoft.icon" 153 #define css "text/css" 154 #define csv "text/csv" 155 #define djvu "image/x-djvu" 156 #define elf "application/x-elf" 157 #define exe "application/vnd.microsoft.portable-executable" 158 #define flac "audio/x-flac" 159 #define gif "image/gif" 160 #define gz "application/gzip" 161 #define heic "image/heic" 162 #define htm "text/html" 163 #define html "text/html" 164 #define ico "image/x-icon" 165 #define iso "application/octet-stream" 166 #define jpeg "image/jpeg" 167 #define js "application/javascript" 168 #define json "application/json" 169 #define m4a "audio/aac" 170 #define m4v "video/x-m4v" 171 #define midi "audio/midi" 172 #define mov "video/quicktime" 173 #define mp4 "video/mp4" 174 #define mp3 "audio/mpeg" 175 #define mpg "video/mpeg" 176 #define ogg "audio/ogg" 177 #define opus "audio/opus" 178 #define pdf "application/pdf" 179 #define png "image/png" 180 #define ps "application/postscript" 181 #define psd "image/vnd.adobe.photoshop" 182 #define rtf "application/rtf" 183 #define sqlite3 "application/x-sqlite3" 184 #define svg "image/svg+xml" 185 #define text "text/plain" 186 #define tiff "image/tiff" 187 #define tsv "text/tsv" 188 #define utf8 "text/plain; charset=UTF-8" 189 #define wasm "application/wasm" 190 #define wav "audio/x-wav" 191 #define webp "image/webp" 192 #define webm "video/webm" 193 #define xml "application/xml" 194 #define zip "application/zip" 195 #define zst "application/zstd" 196 197 // format_descriptor ties a file-header pattern to its data-format type; 198 // the 15-byte header-limit nicely aligns with the 1-byte length before it 199 typedef struct format_descriptor { 200 unsigned char header_length; 201 unsigned char header_bytes[15]; 202 const char* mime; 203 } format_descriptor; 204 205 // starts_as tries to match header data to the pattern given: this includes 206 // allowing `any byte` when the pattern indicates so, using a value reserved 207 // for that purpose 208 bool starts_as(const uint8_t* x, size_t xlen, const uint8_t* y, size_t ylen) { 209 // when header data aren't enough for a pattern, there's no match 210 if (xlen < ylen) { 211 return false; 212 } 213 214 for (size_t i = 0; i < ylen; i++) { 215 if (y[i] == cba) { 216 // `can be anything` value always matches 217 continue; 218 } 219 220 if (x[i] != y[i]) { 221 return false; 222 } 223 } 224 225 return true; 226 } 227 228 // not confident enough to actually use this, and replace all table entries 229 #define start_format_descriptor(...) \ 230 sizeof((unsigned char[]){ __VA_ARGS__ }) / sizeof(unsigned char), \ 231 { __VA_ARGS__ } 232 233 // format markers with leading wildcards, which should be checked before the 234 // normal ones: this is to prevent mismatches with the latter types, even 235 // though you can make probabilistic arguments which suggest these mismatches 236 // should be very unlikely in practice 237 format_descriptor special_headers[] = { 238 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a}, 239 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a}, 240 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4}, 241 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4}, 242 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v}, 243 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov}, 244 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic}, 245 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif}, 246 // { 247 // 24, 248 // { 249 // cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 250 // 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', 251 // }, 252 // m4a, 253 // }, 254 {0}, 255 }; 256 257 format_descriptor hdr_dispatch_0[] = { 258 {4, {000, 000, 001, 0xBA}, mpg}, 259 {4, {000, 000, 001, 0xB3}, mpg}, 260 {4, {000, 000, 001, 000}, ico}, 261 {4, {000, 000, 002, 000}, cur}, 262 {4, {000, 'a', 's', 'm'}, wasm}, 263 {0}, 264 }; 265 266 format_descriptor hdr_dispatch_26[] = { 267 {4, {0x1A, 0x45, 0xDF, 0xA3}, webm}, 268 {0}, 269 }; 270 271 format_descriptor hdr_dispatch_31[] = { 272 // {4, {0x1F, 0x8B, 0x08, 0x08}, gz}, 273 {3, {0x1F, 0x8B, 0x08}, gz}, 274 {0}, 275 }; 276 277 format_descriptor hdr_dispatch_35[] = { 278 {3, "#! ", text}, 279 {3, "#!/", text}, 280 {0}, 281 }; 282 283 format_descriptor hdr_dispatch_37[] = { 284 {4, "%PDF", pdf}, 285 {4, "%!PS", ps}, 286 {0}, 287 }; 288 289 format_descriptor hdr_dispatch_40[] = { 290 {4, {0x28, 0xB5, 0x2F, 0xFD}, zst}, 291 {0}, 292 }; 293 294 format_descriptor hdr_dispatch_46[] = { 295 {4, ".snd", au}, 296 {0}, 297 }; 298 299 format_descriptor hdr_dispatch_56[] = { 300 {4, "8BPS", psd}, 301 {0}, 302 }; 303 304 format_descriptor hdr_dispatch_60[] = { 305 {14, "<!DOCTYPE html", html}, 306 {4, "<svg", svg}, 307 {5, "<html", html}, 308 {5, "<head", html}, 309 {5, "<body", html}, 310 {5, "<?xml", xml}, 311 {0}, 312 }; 313 314 format_descriptor hdr_dispatch_65[] = { 315 { 316 15, 317 { 318 'A', 'T', '&', 'T', 'F', 'O', 'R', 'M', 319 cba, cba, cba, cba, 'D', 'J', 'V', 320 }, 321 djvu, 322 }, 323 {0}, 324 }; 325 326 format_descriptor hdr_dispatch_66[] = { 327 { 328 15, 329 { 330 'B', 'M', cba, cba, cba, cba, cba, cba, 331 cba, cba, cba, cba, cba, cba, 0x28, 332 }, 333 bmp, 334 }, 335 {0}, 336 }; 337 338 format_descriptor hdr_dispatch_70[] = { 339 {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff}, 340 {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff}, 341 {0}, 342 }; 343 344 format_descriptor hdr_dispatch_71[] = { 345 {6, "GIF87a", gif}, 346 {6, "GIF89a", gif}, 347 {0}, 348 }; 349 350 format_descriptor hdr_dispatch_73[] = { 351 {4, {'I', 'D', '3', 2}, mp3}, // ID3-format metadata 352 {4, {'I', 'D', '3', 3}, mp3}, // ID3-format metadata 353 {4, {'I', 'D', '3', 4}, mp3}, // ID3-format metadata 354 {4, {'I', 'I', '*', 000}, tiff}, 355 {0}, 356 }; 357 358 format_descriptor hdr_dispatch_77[] = { 359 {4, {'M', 'M', 000, '*'}, tiff}, 360 {4, "MThd", midi}, 361 {6, {'M', 'Z', cba, 000, cba, 000}, exe}, 362 // {6, {'M', 'Z', 0x90, 000, 003, 000}, exe}, 363 // {6, {'M', 'Z', 0x78, 000, 001, 000}, exe}, 364 // {6, {'M', 'Z', 'P', 000, 002, 000}, exe}, 365 {0}, 366 }; 367 368 format_descriptor hdr_dispatch_79[] = { 369 {4, "OggS", ogg}, 370 {0}, 371 }; 372 373 format_descriptor hdr_dispatch_80[] = { 374 {4, {'P', 'K', 003, 004}, zip}, 375 {0}, 376 }; 377 378 format_descriptor hdr_dispatch_82[] = { 379 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp}, 380 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav}, 381 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi}, 382 {0}, 383 }; 384 385 format_descriptor hdr_dispatch_83[] = { 386 // {16, "SQLite format 3\x00", sqlite3}, 387 {0}, 388 }; 389 390 format_descriptor hdr_dispatch_99[] = { 391 {8, {'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf}, 392 {0}, 393 }; 394 395 format_descriptor hdr_dispatch_102[] = { 396 {4, "fLaC", flac}, 397 {0}, 398 }; 399 400 format_descriptor hdr_dispatch_123[] = { 401 {4, "{\\rtf", rtf}, 402 {0}, 403 }; 404 405 format_descriptor hdr_dispatch_127[] = { 406 {4, {127, 'E', 'L', 'F'}, elf}, 407 {0}, 408 }; 409 410 format_descriptor hdr_dispatch_137[] = { 411 {8, {0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png}, 412 {0}, 413 }; 414 415 format_descriptor hdr_dispatch_255[] = { 416 {3, {0xFF, 0xD8, 0xFF}, jpeg}, 417 {5, {0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3}, 418 {2, {0xFF, 0xFB}, mp3}, 419 {0}, 420 }; 421 422 // hdr_dispatch groups format-description-groups by their first byte, thus 423 // shortening total lookups for some data header 424 // 425 // notice how the `ftyp` data formats aren't handled here, since these can 426 // start with any byte, instead of the literal value of the any-byte markers 427 // they use 428 // 429 // all entries are arrays which must always end with a special entry whose 430 // pattern-length is declared to be 0, since there's no explicit way to know 431 // the length of these arrays when looping on them 432 // 433 // all non-null entries are setup explicitly, later in the code 434 format_descriptor* hdr_dispatch[256] = { 435 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 436 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 437 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 438 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 439 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 440 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 441 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 442 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 443 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 444 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 445 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 446 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 447 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 448 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 449 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 450 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 451 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 452 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 453 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 454 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 455 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 456 NULL, NULL, NULL, NULL, 457 }; 458 459 // guess_mime tries to auto-detect a MIME-type from the header bytes given, 460 // using the lookup-tables 461 const char* guess_mime(const unsigned char* buf, size_t len) { 462 if (len == 0) { 463 return NULL; 464 } 465 466 // just in case, start with the patterns which allow any first byte 467 for (size_t i = 0; special_headers[i].header_length > 0; i++) { 468 const unsigned char* hb = special_headers[i].header_bytes; 469 const size_t hl = special_headers[i].header_length; 470 if (starts_as(buf, len, hb, hl)) { 471 return special_headers[i].mime; 472 } 473 } 474 475 // the m4a-dash header exceeds the 15-byte limit of the lookup tables 476 const uint8_t header1[24] = { 477 cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 478 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', 479 }; 480 if (starts_as(buf, len, header1, sizeof(header1))) { 481 return m4a; 482 } 483 484 // the sqlite3 header exceeds the 15-byte limit of the lookup tables 485 const uint8_t header2[16] = "SQLite format 3\x00"; 486 if (starts_as(buf, len, header2, sizeof(header2))) { 487 return sqlite3; 488 } 489 490 format_descriptor* guesses = hdr_dispatch[buf[0]]; 491 if (guesses == NULL) { 492 return fallback_mime_type; 493 } 494 495 for (size_t i = 0; guesses[i].header_length > 0; i++) { 496 const unsigned char* hb = guesses[i].header_bytes; 497 const size_t hl = guesses[i].header_length; 498 if (starts_as(buf, len, hb, hl)) { 499 return guesses[i].mime; 500 } 501 } 502 503 return fallback_mime_type; 504 } 505 506 bool is_mime_disabled(const char* mime) { 507 return (mime != NULL) && (mime[0] == 'n') && ( 508 strcmp(mime, "no") == 0 || 509 strcmp(mime, "nomime") == 0 || 510 strcmp(mime, "no-mime") == 0 || 511 strcmp(mime, "none") == 0 || 512 strcmp(mime, "not") == 0 513 ); 514 } 515 516 // start_data_uri starts the output by declaring the data-URI to be an 517 // auto-detected MIME-type; the return value is the auto-detection success 518 bool start_data_uri(bufwriter* w, const unsigned char* buf, size_t len) { 519 const char* mime = guess_mime(buf, len); 520 if (is_mime_disabled(mime)) { 521 return true; 522 } 523 if (mime == NULL || mime[0] == 0) { 524 return false; 525 } 526 527 write_string(w, "data:"); 528 for (size_t i = 0; mime[i] != 0; i++) { 529 write_byte(w, mime[i]); 530 } 531 write_string(w, ";base64,"); 532 return true; 533 } 534 535 const unsigned char base64_lookup[64] = 536 "" 537 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" 538 ""; 539 540 static inline uint32_t combine_triple(const unsigned char data[4]) { 541 return (data[0] << 16) | (data[1] << 8) | (data[2] << 0); 542 } 543 544 static inline void emit_triple(bufwriter* w, uint32_t v) { 545 write_byte(w, base64_lookup[0x3f & (v >> 18)]); 546 write_byte(w, base64_lookup[0x3f & (v >> 12)]); 547 write_byte(w, base64_lookup[0x3f & (v >> 6)]); 548 write_byte(w, base64_lookup[0x3f & (v >> 0)]); 549 } 550 551 void emit_couple(bufwriter* w, uint32_t v) { 552 write_byte(w, base64_lookup[0x3f & (v >> 18)]); 553 write_byte(w, base64_lookup[0x3f & (v >> 12)]); 554 write_byte(w, base64_lookup[0x3f & (v >> 6)]); 555 write_byte(w, '='); 556 } 557 558 void emit_single(bufwriter* w, uint32_t v) { 559 write_byte(w, base64_lookup[0x3f & (v >> 18)]); 560 write_byte(w, base64_lookup[0x3f & (v >> 12)]); 561 write_byte(w, '='); 562 write_byte(w, '='); 563 } 564 565 bool handle_reader(bufwriter* w, FILE* src, const char* path) { 566 unsigned char buf[IBUF_SIZE]; 567 uint64_t bytes = 0; 568 569 // triple holds groups of 3 bytes at once, which is required by base64, 570 // except for the last few bytes of input, which are padded with equals; 571 // the 4th item is never used, but having it aligns things to 32 bits 572 unsigned char triple[4]; 573 triple[0] = 0; 574 triple[1] = 0; 575 triple[2] = 0; 576 triple[3] = 0; 577 578 while (!feof(w->out)) { 579 const size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), src); 580 if (n < 1) { 581 // assume input is over when no bytes were read 582 break; 583 } 584 585 if (bytes == 0 && !start_data_uri(w, buf, n)) { 586 write_byte(w, '\n'); 587 flush(w); 588 const char* msg = "can't auto-detect MIME type for"; 589 fprintf(stderr, ERROR_LINE("%s %s"), msg, path); 590 return false; 591 } 592 593 size_t where = bytes % 3; 594 for (size_t i = 0; i < n; i++, bytes++) { 595 triple[where++] = buf[i]; 596 if (where == 3) { 597 emit_triple(w, combine_triple(triple)); 598 where = 0; 599 } 600 } 601 } 602 603 // empty inputs result in empty outputs 604 if (bytes == 0) { 605 return true; 606 } 607 608 // don't forget unemitted trailing bytes, if any: these need special 609 // handling, as they include `=` signs; if the input bytes were a 610 // multiple of 3, there won't be any trailing bytes 611 switch (bytes % 3) { 612 case 1: 613 triple[1] = 0; 614 triple[2] = 0; 615 emit_single(w, combine_triple(triple)); 616 break; 617 case 2: 618 triple[2] = 0; 619 emit_couple(w, combine_triple(triple)); 620 break; 621 } 622 623 // end with a line-feed, so multiple input streams are each encoded in 624 // their own line 625 if (bytes > 0) { 626 write_byte(w, '\n'); 627 flush(w); 628 } 629 return true; 630 } 631 632 // handle_file handles data from the filename given; returns false only when 633 // an error happened 634 bool handle_file(bufwriter* w, const char* path) { 635 // a `-` filename stands for the standard input 636 if (path[0] == '-' && path[1] == 0) { 637 return handle_reader(w, stdin, stdin_name); 638 } 639 640 FILE* f = fopen(path, "rb"); 641 if (f == NULL) { 642 fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path); 643 return false; 644 } 645 646 const bool ok = handle_reader(w, f, path); 647 fclose(f); 648 return ok; 649 } 650 651 // is_help_option simplifies control-flow for func run 652 bool is_help_option(const char* s) { 653 return s[0] == '-' && ( 654 strcmp(s, "-h") == 0 || 655 strcmp(s, "-help") == 0 || 656 strcmp(s, "--h") == 0 || 657 strcmp(s, "--help") == 0 658 ); 659 } 660 661 // is_fallback_option simplifies control-flow for func run 662 bool is_fallback_option(const char* s) { 663 return s[0] == '-' && ( 664 strcmp(s, "-f") == 0 || 665 strcmp(s, "-fallback") == 0 || 666 strcmp(s, "--f") == 0 || 667 strcmp(s, "--fallback") == 0 668 ); 669 } 670 671 const char* fallback_aliases[192] = { 672 // tiny shortcuts 673 "b", octet, 674 "j", json, 675 "t", text, 676 "u", utf8, 677 678 // failure fallbacks 679 "e", "", 680 "err", "", 681 "error", "", 682 "f", "", 683 "fail", "", 684 685 // common mistakes 686 "text/json", json, 687 688 "aif", aiff, 689 "aiff", aiff, 690 "au", au, 691 "avi", avi, 692 "avif", avif, 693 "bmp", bmp, 694 "caf", caf, 695 "cur", cur, 696 "css", css, 697 "csv", csv, 698 "djvu", djvu, 699 "elf", elf, 700 "exe", exe, 701 "flac", flac, 702 "gif", gif, 703 "gz", gz, 704 "heic", heic, 705 "html", html, 706 "ico", ico, 707 "iso", iso, 708 "jpg", jpeg, 709 "jpeg", jpeg, 710 "js", js, 711 "json", json, 712 "m4a", m4a, 713 "m4v", m4v, 714 "midi", midi, 715 "mov", mov, 716 "mp4", mp4, 717 "mp3", mp3, 718 "mpeg", mpg, 719 "ogg", ogg, 720 "opus", opus, 721 "pdf", pdf, 722 "png", png, 723 "ps", ps, 724 "psd", psd, 725 "rtf", rtf, 726 "sqlite3", sqlite3, 727 "svg", svg, 728 "text", text, 729 "tiff", tiff, 730 "tsv", tsv, 731 "wasm", wasm, 732 "wav", wav, 733 "webp", webp, 734 "webm", webm, 735 "xml", xml, 736 "zip", zip, 737 "zst", zst, 738 739 // longer shortcuts 740 "aac", m4a, 741 "aif", aiff, 742 "bin", octet, 743 "binary", octet, 744 "bits", octet, 745 "gzip", gz, 746 "htm", htm, 747 "mid", midi, 748 "mpg", mpg, 749 "octet", octet, 750 "octets", octet, 751 "octetstream", octet, 752 "octet-stream", octet, 753 "plain", text, 754 "sqlite", sqlite3, 755 "svg+xml", svg, 756 "tif", tiff, 757 "utf8", utf8, 758 "utf-8", utf8, 759 "xbmp", bmp, 760 "xcaf", caf, 761 "xflac", flac, 762 "xicon", ico, 763 "xm4v", m4v, 764 "xsqlite3", sqlite3, 765 "xwav", wav, 766 "xwave", wav, 767 "x-bmp", bmp, 768 "x-caf", caf, 769 "x-flac", flac, 770 "x-icon", ico, 771 "x-m4v", m4v, 772 "x-sqlite3", sqlite3, 773 "x-wav", wav, 774 "wave", wav, 775 "zstd", zst, 776 }; 777 778 const char* resolve_alias(const char* name) { 779 const size_t n = sizeof(fallback_aliases) / sizeof(fallback_aliases[0]); 780 for (size_t i = 0; i < n; i += 2) { 781 if (strcmp(name, fallback_aliases[i]) == 0) { 782 return fallback_aliases[i + 1]; 783 } 784 } 785 return name; 786 } 787 788 // run returns the number of errors 789 int run(int argc, char** argv, FILE* w) { 790 unsigned char outbuf[OBUF_SIZE]; 791 bufwriter bw; 792 init_bufwriter(&bw, w, outbuf, sizeof(outbuf)); 793 794 size_t files = 0; 795 size_t errors = 0; 796 bool change_fallback = false; 797 798 // handle all filenames given 799 for (size_t i = 1; i < argc && !feof(w); i++) { 800 if (change_fallback) { 801 fallback_mime_type = resolve_alias(argv[i]); 802 change_fallback = false; 803 continue; 804 } 805 806 if (is_fallback_option(argv[i])) { 807 change_fallback = true; 808 continue; 809 } 810 811 if (!handle_file(&bw, argv[i])) { 812 errors++; 813 } 814 files++; 815 } 816 817 if (change_fallback) { 818 flush(&bw); 819 fprintf(stderr, ERROR_LINE("forgot new fallback MIME-type")); 820 errors++; 821 return errors; 822 } 823 824 // no filenames means use stdin as the only input 825 if (files == 0) { 826 if (!handle_reader(&bw, stdin, stdin_name)) { 827 errors++; 828 } 829 } 830 831 flush(&bw); 832 return errors; 833 } 834 835 int main(int argc, char** argv) { 836 #ifdef _WIN32 837 setmode(fileno(stdin), O_BINARY); 838 // ensure output lines end in LF instead of CRLF on windows 839 setmode(fileno(stdout), O_BINARY); 840 setmode(fileno(stderr), O_BINARY); 841 #endif 842 843 if (argc > 1 && is_help_option(argv[1])) { 844 printf("%s", info); 845 return 0; 846 } 847 848 // fill entries in the type-detection dispatch table 849 hdr_dispatch[0] = hdr_dispatch_0; // 0 850 hdr_dispatch[26] = hdr_dispatch_26; // 26 851 hdr_dispatch[31] = hdr_dispatch_31; // 31 852 hdr_dispatch[35] = hdr_dispatch_35; // 35 # 853 hdr_dispatch[37] = hdr_dispatch_37; // 37 % 854 hdr_dispatch[40] = hdr_dispatch_40; // 40 ( 855 hdr_dispatch[46] = hdr_dispatch_46; // 46 . 856 hdr_dispatch[56] = hdr_dispatch_56; // 56 8 857 hdr_dispatch[60] = hdr_dispatch_60; // 60 < 858 hdr_dispatch[65] = hdr_dispatch_65; // 65 A 859 hdr_dispatch[66] = hdr_dispatch_66; // 66 B 860 hdr_dispatch[70] = hdr_dispatch_70; // 70 F 861 hdr_dispatch[71] = hdr_dispatch_71; // 71 G 862 hdr_dispatch[73] = hdr_dispatch_73; // 73 I 863 hdr_dispatch[77] = hdr_dispatch_77; // 77 M 864 hdr_dispatch[79] = hdr_dispatch_79; // 79 O 865 hdr_dispatch[80] = hdr_dispatch_80; // 80 P 866 hdr_dispatch[82] = hdr_dispatch_82; // 82 R 867 hdr_dispatch[83] = hdr_dispatch_83; // 83 S 868 hdr_dispatch[99] = hdr_dispatch_99; // 99 c 869 hdr_dispatch[102] = hdr_dispatch_102; // 102 f 870 hdr_dispatch[123] = hdr_dispatch_123; // 123 { 871 hdr_dispatch[127] = hdr_dispatch_127; // 127 872 hdr_dispatch[137] = hdr_dispatch_137; // 137 873 hdr_dispatch[255] = hdr_dispatch_255; // 255 874 875 return run(argc, argv, stdout) == 0 ? 0 : 1; 876 }