File: datauri.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2024 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 cc -Wall -s -O2 -o ./datauri ./datauri.c 28 */ 29 30 #include <fcntl.h> 31 #include <math.h> 32 #include <stdbool.h> 33 #include <stdint.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 38 #ifdef _WIN32 39 #include <windows.h> 40 #endif 41 42 const char* info = 43 "" 44 "datauri [options...] [filenames...]\n" 45 "\n" 46 "\n" 47 "Encode bytes as data-URIs, auto-detecting the file/data type using the first\n" 48 "few bytes from each data/file stream. When given multiple inputs, the output\n" 49 "will be multiple lines, one for each file given.\n" 50 "\n" 51 "Empty files/inputs result in empty lines. A simple dash (-) stands for the\n" 52 "standard-input, which is also used automatically when not given any files.\n" 53 "\n" 54 "Data-URIs are base64-encoded text representations of arbitrary data, which\n" 55 "include their payload's MIME-type, and which are directly useable/shareable\n" 56 "in web-browsers as links, despite not looking like normal links/URIs.\n" 57 "\n" 58 "Some web-browsers limit the size of handled data-URIs to tens of kilobytes.\n" 59 "\n" 60 "\n" 61 "Options\n" 62 "\n" 63 " -h, -help, --h, --help show this help message\n" 64 " -f, -fallback, --f, --fallback change the fallback MIME type\n" 65 ""; 66 67 const char* stdin_name = "<stdin>"; 68 69 const char* fallback_mime_type = "application/octet-stream"; 70 71 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at 72 // stdout, it considerably speeds up this app, as intended 73 typedef struct bufwriter { 74 // buf is the buffer proper 75 unsigned char* buf; 76 77 // len is how many bytes of the buffer are currently being used 78 size_t len; 79 80 // cap is the capacity of the buffer, or the most bytes it can hold 81 size_t cap; 82 83 // out is the destination of all that's written into the buffer 84 FILE* out; 85 86 // done signals when/if no more output is accepted at the destination 87 bool done; 88 } bufwriter; 89 90 // init_bufwriter is the constructor for type bufwriter 91 void init_bufwriter(bufwriter* w, FILE* dst, unsigned char* buf, size_t cap) { 92 w->buf = buf; 93 w->len = 0; 94 w->cap = cap; 95 w->out = dst; 96 w->done = false; 97 } 98 99 // flush does as it says: it empties the buffer after ensuring its bytes end 100 // on their intended destination 101 void flush(bufwriter* w) { 102 if (w->len > 0 && fwrite(w->buf, w->len, 1, w->out) < 1) { 103 w->done = true; 104 } 105 w->len = 0; 106 } 107 108 // write_bytes does as it says, minimizing the number of calls to fwrite 109 void write_bytes(bufwriter* w, const unsigned char* src, size_t len) { 110 if (w->len + len < w->cap) { 111 // all bytes fit into buffer 112 memcpy(w->buf + w->len, src, len); 113 w->len += len; 114 return; 115 } 116 117 // ensure current buffer bytes go out, before crossing strides 118 flush(w); 119 120 // emit all chunks striding beyond/at the buffer's capacity 121 for (; len >= w->cap; src += w->cap, len -= w->cap) { 122 if (fwrite(src, w->cap, 1, w->out) < 1) { 123 w->done = true; 124 return; 125 } 126 } 127 128 // now all, if any, remaining bytes will fit into the buffer 129 memcpy(w->buf, src, len); 130 w->len += len; 131 } 132 133 // write_byte does as it says 134 void write_byte(bufwriter* w, unsigned char b) { 135 if (w->len >= w->cap) { 136 flush(w); 137 } 138 w->buf[w->len] = b; 139 w->len++; 140 } 141 142 // EMIT_CONST abstracts a common use-case of the bufwriter, which is 143 // emitting string constants without their final null byte 144 #define EMIT_CONST(w, x) write_bytes(w, (unsigned char*)x, sizeof(x) - 1) 145 146 // can be anything: ensure this value differs from all other literal bytes 147 // in the generic-headers table: failing that, its value could cause subtle 148 // type-misdetection bugs; the value is chosen to be `obviously` findable 149 // in the source, which also implies a constant beyond the ascii range, as 150 // ascii char-constants are also used in the tables 151 const unsigned char cba = 0xfd; // 253 152 153 #define aiff "audio/aiff" 154 #define au "audio/basic" 155 #define avi "video/avi" 156 #define avif "image/avif" 157 #define bmp "image/x-bmp" 158 #define caf "audio/x-caf" 159 #define cur "image/vnd.microsoft.icon" 160 #define css "text/css" 161 #define csv "text/csv" 162 #define djvu "image/x-djvu" 163 #define elf "application/x-elf" 164 #define exe "application/vnd.microsoft.portable-executable" 165 #define flac "audio/x-flac" 166 #define gif "image/gif" 167 #define gz "application/gzip" 168 #define heic "image/heic" 169 #define htm "text/html" 170 #define html "text/html" 171 #define ico "image/x-icon" 172 #define iso "application/octet-stream" 173 #define jpg "image/jpeg" 174 #define jpeg "image/jpeg" 175 #define js "application/javascript" 176 #define json "application/json" 177 #define m4a "audio/aac" 178 #define m4v "video/x-m4v" 179 #define mid "audio/midi" 180 #define mov "video/quicktime" 181 #define mp4 "video/mp4" 182 #define mp3 "audio/mpeg" 183 #define mpg "video/mpeg" 184 #define ogg "audio/ogg" 185 #define opus "audio/opus" 186 #define pdf "application/pdf" 187 #define png "image/png" 188 #define ps "application/postscript" 189 #define psd "image/vnd.adobe.photoshop" 190 #define rtf "application/rtf" 191 #define sqlite3 "application/x-sqlite3" 192 #define svg "image/svg+xml" 193 #define text "text/plain" 194 #define tiff "image/tiff" 195 #define tsv "text/tsv" 196 #define wasm "application/wasm" 197 #define wav "audio/x-wav" 198 #define webp "image/webp" 199 #define webm "video/webm" 200 #define xml "application/xml" 201 #define zip "application/zip" 202 #define zst "application/zstd" 203 204 // format_descriptor ties a file-header pattern to its data-format type 205 typedef struct format_descriptor { 206 unsigned char header_length; 207 unsigned char header_bytes[24]; 208 const char* mime; 209 } format_descriptor; 210 211 // starts_as tries to match header data to the pattern given: this includes 212 // allowing `any byte` when the pattern indicates so, using a value reserved 213 // for that purpose 214 bool starts_as(unsigned char* x, size_t xlen, unsigned char* y, size_t ylen) { 215 // when header data aren't enough for a pattern, there's no match 216 if (xlen < ylen) { 217 return false; 218 } 219 220 for (size_t i = 0; i < xlen; i++) { 221 if (y[i] == cba) { 222 // `can be anything` value always matches 223 continue; 224 } 225 226 if (x[i] != y[i]) { 227 return false; 228 } 229 } 230 231 return true; 232 } 233 234 // wrapper func to make func `starts_as` harder to miscall 235 inline bool match_header(unsigned char* d, size_t len, format_descriptor* to) { 236 return starts_as(d, len, to->header_bytes, to->header_length); 237 } 238 239 // not confident enough to actually use this, and replace all table entries 240 #define start_format_descriptor(...) \ 241 sizeof((unsigned char[]){ __VA_ARGS__ }) / sizeof(unsigned char), \ 242 { __VA_ARGS__ } 243 244 // format markers with leading wildcards, which should be checked before the 245 // normal ones: this is to prevent mismatches with the latter types, even 246 // though you can make probabilistic arguments which suggest these mismatches 247 // should be very unlikely in practice 248 format_descriptor special_headers[] = { 249 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a}, 250 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a}, 251 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4}, 252 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4}, 253 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v}, 254 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov}, 255 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic}, 256 {12, {cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif}, 257 { 258 24, 259 { 260 cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 261 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', 262 }, 263 m4a, 264 }, 265 {0}, 266 }; 267 268 format_descriptor hdr_dispatch_0[] = { 269 {4, {000, 000, 001, 0xBA}, mpg}, 270 {4, {000, 000, 001, 0xB3}, mpg}, 271 {4, {000, 000, 001, 000}, ico}, 272 {4, {000, 000, 002, 000}, cur}, 273 {4, {000, 'a', 's', 'm'}, wasm}, 274 {0}, 275 }; 276 277 format_descriptor hdr_dispatch_26[] = { 278 {4, {0x1A, 0x45, 0xDF, 0xA3}, webm}, 279 {0}, 280 }; 281 282 format_descriptor hdr_dispatch_31[] = { 283 // {4, {0x1F, 0x8B, 0x08, 0x08}, gz}, 284 {3, {0x1F, 0x8B, 0x08}, gz}, 285 {0}, 286 }; 287 288 format_descriptor hdr_dispatch_35[] = { 289 {3, "#! ", text}, 290 {3, "#!/", text}, 291 {0}, 292 }; 293 294 format_descriptor hdr_dispatch_37[] = { 295 {4, "%PDF", pdf}, 296 {4, "%!PS", ps}, 297 {0}, 298 }; 299 300 format_descriptor hdr_dispatch_40[] = { 301 {4, {0x28, 0xB5, 0x2F, 0xFD}, zst}, 302 {0}, 303 }; 304 305 format_descriptor hdr_dispatch_46[] = { 306 {4, ".snd", au}, 307 {0}, 308 }; 309 310 format_descriptor hdr_dispatch_56[] = { 311 {4, "8BPS", psd}, 312 {0}, 313 }; 314 315 format_descriptor hdr_dispatch_60[] = { 316 {14, "<!DOCTYPE html", html}, 317 {4, "<svg", svg}, 318 {5, "<html", html}, 319 {5, "<head", html}, 320 {5, "<body", html}, 321 {5, "<?xml", xml}, 322 {0}, 323 }; 324 325 format_descriptor hdr_dispatch_65[] = { 326 { 327 15, 328 { 329 'A', 'T', '&', 'T', 'F', 'O', 'R', 'M', 330 cba, cba, cba, cba, 'D', 'J', 'V', 331 }, 332 djvu, 333 }, 334 {0}, 335 }; 336 337 format_descriptor hdr_dispatch_66[] = { 338 { 339 15, 340 { 341 'B', 'M', cba, cba, cba, cba, cba, cba, 342 cba, cba, cba, cba, cba, cba, 0x28, 343 }, 344 bmp, 345 }, 346 {0}, 347 }; 348 349 format_descriptor hdr_dispatch_70[] = { 350 {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff}, 351 {12, {'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff}, 352 {0}, 353 }; 354 355 format_descriptor hdr_dispatch_71[] = { 356 {6, "GIF87a", gif}, 357 {6, "GIF89a", gif}, 358 {0}, 359 }; 360 361 format_descriptor hdr_dispatch_73[] = { 362 {4, {'I', 'D', '3', 2}, mp3}, // ID3-format metadata 363 {4, {'I', 'D', '3', 3}, mp3}, // ID3-format metadata 364 {4, {'I', 'D', '3', 4}, mp3}, // ID3-format metadata 365 {4, {'I', 'I', '*', 000}, tiff}, 366 {0}, 367 }; 368 369 format_descriptor hdr_dispatch_77[] = { 370 {4, {'M', 'M', 000, '*'}, tiff}, 371 {4, "MThd", mid}, 372 {6, {'M', 'Z', cba, 000, cba, 000}, exe}, 373 // {6, {'M', 'Z', 0x90, 000, 003, 000}, exe}, 374 // {6, {'M', 'Z', 0x78, 000, 001, 000}, exe}, 375 // {6, {'M', 'Z', 'P', 000, 002, 000}, exe}, 376 {0}, 377 }; 378 379 format_descriptor hdr_dispatch_79[] = { 380 {4, "OggS", ogg}, 381 {0}, 382 }; 383 384 format_descriptor hdr_dispatch_80[] = { 385 {4, {'P', 'K', 003, 004}, zip}, 386 {0}, 387 }; 388 389 format_descriptor hdr_dispatch_82[] = { 390 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp}, 391 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav}, 392 {12, {'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi}, 393 {0}, 394 }; 395 396 format_descriptor hdr_dispatch_83[] = { 397 {16, "SQLite format 3\x00", sqlite3}, 398 {0}, 399 }; 400 401 format_descriptor hdr_dispatch_99[] = { 402 {8, {'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf}, 403 {0}, 404 }; 405 406 format_descriptor hdr_dispatch_102[] = { 407 {4, "fLaC", flac}, 408 {0}, 409 }; 410 411 format_descriptor hdr_dispatch_123[] = { 412 {4, "{\\rtf", rtf}, 413 {0}, 414 }; 415 416 format_descriptor hdr_dispatch_127[] = { 417 {4, {127, 'E', 'L', 'F'}, elf}, 418 {0}, 419 }; 420 421 format_descriptor hdr_dispatch_137[] = { 422 {8, {0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png}, 423 {0}, 424 }; 425 426 format_descriptor hdr_dispatch_255[] = { 427 {3, {0xFF, 0xD8, 0xFF}, jpg}, 428 {5, {0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3}, 429 {2, {0xFF, 0xFB}, mp3}, 430 {0}, 431 }; 432 433 // hdr_dispatch groups format-description-groups by their first byte, thus 434 // shortening total lookups for some data header 435 // 436 // notice how the `ftyp` data formats aren't handled here, since these can 437 // start with any byte, instead of the literal value of the any-byte markers 438 // they use 439 // 440 // all entries are arrays which must always end with a special entry whose 441 // pattern-length is declared to be 0, since there's no explicit way to know 442 // the length of these arrays when looping on them 443 // 444 // all non-null entries are setup explicitly, later in the code 445 format_descriptor* hdr_dispatch[256] = { 446 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 447 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 448 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 449 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 450 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 451 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 452 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 453 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 454 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 455 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 456 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 457 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 458 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 459 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 460 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 461 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 462 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 463 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 464 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 465 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 466 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 467 NULL, NULL, NULL, NULL, 468 }; 469 470 // guess_mime tries to auto-detect a MIME-type from the header bytes given, 471 // using the lookup-tables 472 const char* guess_mime(unsigned char* buf, size_t len) { 473 if (len == 0) { 474 return NULL; 475 } 476 477 // try the patterns which allow any bytes at the very start 478 for (size_t i = 0; special_headers[i].header_length > 0; i++) { 479 if (match_header(buf, len, &special_headers[i])) { 480 return special_headers[i].mime; 481 } 482 } 483 484 format_descriptor* guesses = hdr_dispatch[buf[0]]; 485 if (guesses == NULL) { 486 return fallback_mime_type; 487 } 488 489 for (size_t i = 0; guesses[i].header_length > 0; i++) { 490 if (match_header(buf, len, &guesses[i])) { 491 return guesses[i].mime; 492 } 493 } 494 return fallback_mime_type; 495 } 496 497 bool is_mime_disabled(const char* mime) { 498 return (mime[0] == 'n') && ( 499 strcmp(mime, "no") == 0 || 500 strcmp(mime, "nomime") == 0 || strcmp(mime, "no-mime") == 0 || 501 strcmp(mime, "none") == 0 || strcmp(mime, "not") 502 ); 503 } 504 505 // start_data_uri starts the output by declaring the data-URI to be an 506 // auto-detected MIME-type; the return value is the auto-detection success 507 bool start_data_uri(bufwriter* w, unsigned char* buf, size_t len) { 508 const char* mime = guess_mime(buf, len); 509 if (is_mime_disabled(mime)) { 510 return true; 511 } 512 if (mime == NULL || mime[0] == 0) { 513 return false; 514 } 515 516 EMIT_CONST(w, "data:"); 517 for (size_t i = 0; mime[i] != 0; i++) { 518 write_byte(w, mime[i]); 519 } 520 EMIT_CONST(w, ";base64,"); 521 return true; 522 } 523 524 const unsigned char base64_lookup[] = 525 "" 526 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" 527 ""; 528 529 inline uint32_t combine_triple(unsigned char data[4]) { 530 return (data[0] << 16) | (data[1] << 8) | data[2]; 531 } 532 533 inline void emit_triple(bufwriter* w, uint32_t v) { 534 write_byte(w, base64_lookup[0x3f & (v >> 18)]); 535 write_byte(w, base64_lookup[0x3f & (v >> 12)]); 536 write_byte(w, base64_lookup[0x3f & (v >> 6)]); 537 write_byte(w, base64_lookup[0x3f & v]); 538 } 539 540 inline void emit_couple(bufwriter* w, uint32_t v) { 541 write_byte(w, base64_lookup[0x3f & (v >> 18)]); 542 write_byte(w, base64_lookup[0x3f & (v >> 12)]); 543 write_byte(w, base64_lookup[0x3f & (v >> 6)]); 544 write_byte(w, '='); 545 } 546 547 inline void emit_single(bufwriter* w, uint32_t v) { 548 write_byte(w, base64_lookup[0x3f & (v >> 18)]); 549 write_byte(w, base64_lookup[0x3f & (v >> 12)]); 550 write_byte(w, '='); 551 write_byte(w, '='); 552 } 553 554 bool handle_reader(bufwriter* w, FILE* src, const char* path) { 555 // size of the input-buffer must be a multiple of 3 556 unsigned char buf[48 * 1024]; 557 size_t chunks = 0; 558 size_t where = 0; 559 unsigned char triple[4]; 560 561 triple[0] = 0; 562 triple[1] = 0; 563 triple[2] = 0; 564 triple[3] = 0; 565 566 while (!w->done) { 567 const size_t n = fread(&buf, sizeof(unsigned char), sizeof(buf), src); 568 if (n < 1) { 569 // assume input is over when no bytes were read 570 break; 571 } 572 573 if (chunks == 0) { 574 char* fmt = "\x1b[31mcan't auto-detect MIME type for %s\x1b[0m\n"; 575 if (!start_data_uri(w, buf, n)) { 576 write_byte(w, '\n'); 577 flush(w); 578 fprintf(stderr, fmt, path); 579 return false; 580 } 581 } 582 chunks++; 583 584 for (size_t i = 0; i < n; i++) { 585 triple[where] = buf[i]; 586 if (where < 2) { 587 where++; 588 } else { 589 where = 0; 590 emit_triple(w, combine_triple(triple)); 591 } 592 } 593 } 594 595 // empty inputs result in empty outputs 596 if (chunks == 0) { 597 return true; 598 } 599 600 // don't forget unemitted trailing bytes, if any: these need special 601 // handling, as they include `=` signs; if the input bytes were a 602 // multiple of 3, there won't be any trailing bytes 603 switch (where) { 604 case 1: 605 triple[1] = 0; 606 triple[2] = 0; 607 emit_single(w, combine_triple(triple)); 608 break; 609 case 2: 610 triple[2] = 0; 611 emit_couple(w, combine_triple(triple)); 612 break; 613 } 614 615 // end with a line-feed, so multiple input streams are each encoded in 616 // their own line 617 if (chunks > 0) { 618 write_byte(w, '\n'); 619 } 620 flush(w); 621 return true; 622 } 623 624 // handle_file handles data from the filename given; returns false only when 625 // an error happened 626 bool handle_file(bufwriter* w, const char* path) { 627 // a `-` filename stands for the standard input 628 if (path[0] == '-' && path[1] == 0) { 629 return handle_reader(w, stdin, stdin_name); 630 } 631 632 FILE* f = fopen(path, "rb"); 633 if (f == NULL) { 634 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 635 return false; 636 } 637 638 const bool ok = handle_reader(w, f, path); 639 fclose(f); 640 return ok; 641 } 642 643 // is_help_option simplifies control-flow for func run 644 bool is_help_option(char* s) { 645 return s[0] == '-' && ( 646 strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 || 647 strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0 648 ); 649 } 650 651 // is_fallback_option simplifies control-flow for func run 652 bool is_fallback_option(char* s) { 653 return s[0] == '-' && ( 654 strcmp(s, "-f") == 0 || strcmp(s, "-fallback") == 0 || 655 strcmp(s, "--f") == 0 || strcmp(s, "--fallback") == 0 656 ); 657 } 658 659 const char* fallback_aliases[] = { 660 // "text/json", "application/json", 661 662 // "xbmp", "image/x-bmp", 663 // "xflac", "audio/x-flac", 664 // "xicon", "image/x-icon", 665 // "xm4v", "video/x-m4v", 666 // "xsqlite3", "application/x-sqlite3", 667 // "xwav", "audio/x-wav", 668 // "xwave", "audio/x-wav", 669 // "x-bmp", "image/x-bmp", 670 // "x-flac", "audio/x-flac", 671 // "x-icon", "image/x-icon", 672 // "x-m4v", "video/x-m4v", 673 // "x-sqlite3", "application/x-sqlite3", 674 // "x-wav", "audio/x-wav", 675 676 "b", "application/octet-stream", 677 "j", "application/json", 678 "t", "text/plain", 679 "u", "text/plain; charset=UTF-8", 680 681 "e", "", 682 "err", "", 683 "error", "", 684 "f", "", 685 "fail", "", 686 687 "aac", "audio/aac", 688 "aif", "audio/aiff", 689 "bin", "application/octet-stream", 690 "binary", "application/octet-stream", 691 "gzip", "application/gzip", 692 "midi", "audio/midi", 693 "mpeg", "video/mpeg", 694 "octet", "application/octet-stream", 695 "octetstream", "application/octet-stream", 696 "octet-stream", "application/octet-stream", 697 "plain", "text/plain", 698 "sqlite", "application/x-sqlite3", 699 "svg+xml", "image/svg+xml", 700 "tif", "image/tiff", 701 "utf8", "text/plain; charset=UTF-8", 702 "utf-8", "text/plain; charset=UTF-8", 703 "wave", "audio/x-wav", 704 "zstd", "application/zstd", 705 706 "aiff", "audio/aiff", 707 "au", "audio/basic", 708 "avi", "video/avi", 709 "avif", "image/avif", 710 "bmp", "image/x-bmp", 711 "caf", "audio/x-caf", 712 "cur", "image/vnd.microsoft.icon", 713 "css", "text/css", 714 "csv", "text/csv", 715 "djvu", "image/x-djvu", 716 "elf", "application/x-elf", 717 "exe", "application/vnd.microsoft.portable-executable", 718 "flac", "audio/x-flac", 719 "gif", "image/gif", 720 "gz", "application/gzip", 721 "heic", "image/heic", 722 "htm", "text/html", 723 "html", "text/html", 724 "ico", "image/x-icon", 725 "iso", "application/octet-stream", 726 "jpg", "image/jpeg", 727 "jpeg", "image/jpeg", 728 "js", "application/javascript", 729 "json", "application/json", 730 "m4a", "audio/aac", 731 "m4v", "video/x-m4v", 732 "mid", "audio/midi", 733 "mov", "video/quicktime", 734 "mp4", "video/mp4", 735 "mp3", "audio/mpeg", 736 "mpg", "video/mpeg", 737 "ogg", "audio/ogg", 738 "opus", "audio/opus", 739 "pdf", "application/pdf", 740 "png", "image/png", 741 "ps", "application/postscript", 742 "psd", "image/vnd.adobe.photoshop", 743 "rtf", "application/rtf", 744 "sqlite3", "application/x-sqlite3", 745 "svg", "image/svg+xml", 746 "text", "text/plain", 747 "tiff", "image/tiff", 748 "tsv", "text/tsv", 749 "wasm", "application/wasm", 750 "wav", "audio/x-wav", 751 "webp", "image/webp", 752 "webm", "video/webm", 753 "xml", "application/xml", 754 "zip", "application/zip", 755 "zst", "application/zstd", 756 }; 757 758 const char* adapt_fallback(char* name) { 759 for (size_t i = 0; i < sizeof(fallback_aliases) / sizeof(char*); i += 2) { 760 if (strcmp(name, fallback_aliases[i]) == 0) { 761 return fallback_aliases[i + 1]; 762 } 763 } 764 return name; 765 } 766 767 // run returns the number of errors 768 size_t run(int argc, char** argv) { 769 // handle special cmd-line options 770 for (size_t i = 1; i < argc; i++) { 771 if (is_help_option(argv[i])) { 772 // help option is handled right away, also quitting the app 773 puts(info); 774 return 0; 775 } 776 } 777 778 bufwriter w; 779 unsigned char buf[48 * 1024]; 780 init_bufwriter(&w, stdout, buf, sizeof(buf)); 781 782 size_t files = 0; 783 size_t errors = 0; 784 bool change_fallback = false; 785 786 // handle all filenames given 787 for (size_t i = 1; i < argc && !w.done; i++) { 788 if (change_fallback) { 789 fallback_mime_type = adapt_fallback(argv[i]); 790 change_fallback = false; 791 continue; 792 } 793 794 if (is_fallback_option(argv[i])) { 795 change_fallback = true; 796 continue; 797 } 798 799 if (!handle_file(&w, argv[i])) { 800 errors++; 801 } 802 files++; 803 } 804 805 if (change_fallback) { 806 fprintf(stderr, "\x1b[31mforgot new fallback MIME-type\x1b[0m\n"); 807 errors++; 808 } 809 810 // no filenames means use stdin as the only input 811 if (files == 0) { 812 if (!handle_reader(&w, stdin, stdin_name)) { 813 errors++; 814 } 815 } 816 817 return errors; 818 } 819 820 int main(int argc, char** argv) { 821 #ifdef _WIN32 822 setmode(fileno(stdin), O_BINARY); 823 // ensure output lines end in LF instead of CRLF on windows 824 setmode(fileno(stdout), O_BINARY); 825 setmode(fileno(stderr), O_BINARY); 826 #endif 827 828 // disable automatic stdio buffering, in favor of explicit buffering 829 setvbuf(stdin, NULL, _IONBF, 0); 830 setvbuf(stdout, NULL, _IONBF, 0); 831 setvbuf(stderr, NULL, _IONBF, 0); 832 833 // fill entries in the type-detect dispatch table 834 hdr_dispatch[0] = hdr_dispatch_0; // 0 835 hdr_dispatch[26] = hdr_dispatch_26; // 26 836 hdr_dispatch[31] = hdr_dispatch_31; // 31 837 hdr_dispatch[35] = hdr_dispatch_35; // 35 # 838 hdr_dispatch[37] = hdr_dispatch_37; // 37 % 839 hdr_dispatch[40] = hdr_dispatch_40; // 40 ( 840 hdr_dispatch[46] = hdr_dispatch_46; // 46 . 841 hdr_dispatch[56] = hdr_dispatch_56; // 56 8 842 hdr_dispatch[60] = hdr_dispatch_60; // 60 < 843 hdr_dispatch[65] = hdr_dispatch_65; // 65 A 844 hdr_dispatch[66] = hdr_dispatch_66; // 66 B 845 hdr_dispatch[70] = hdr_dispatch_70; // 70 F 846 hdr_dispatch[71] = hdr_dispatch_71; // 71 G 847 hdr_dispatch[73] = hdr_dispatch_73; // 73 I 848 hdr_dispatch[77] = hdr_dispatch_77; // 77 M 849 hdr_dispatch[79] = hdr_dispatch_79; // 79 O 850 hdr_dispatch[80] = hdr_dispatch_80; // 80 P 851 hdr_dispatch[82] = hdr_dispatch_82; // 82 R 852 hdr_dispatch[83] = hdr_dispatch_83; // 83 S 853 hdr_dispatch[99] = hdr_dispatch_99; // 99 c 854 hdr_dispatch[102] = hdr_dispatch_102; // 102 f 855 hdr_dispatch[123] = hdr_dispatch_123; // 123 { 856 hdr_dispatch[127] = hdr_dispatch_127; // 127 857 hdr_dispatch[137] = hdr_dispatch_137; // 137 858 hdr_dispatch[255] = hdr_dispatch_255; // 255 859 860 return run(argc, argv) == 0 ? 0 : 1; 861 }