File: get.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 sudo apt install libcurl4-openssl-dev
  29 cc -Wall -s -O3 -march=native -mtune=native -flto -o ./get ./get.c -l curl
  30 */
  31 
  32 #include <stdbool.h>
  33 #include <stdint.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 #include <sys/stat.h>
  38 
  39 #ifdef _WIN32
  40 #include <fcntl.h>
  41 #include <windows.h>
  42 #endif
  43 
  44 #include <curl/curl.h>
  45 
  46 #ifdef RED_ERRORS
  47 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  48 #ifdef __APPLE__
  49 #define ERROR_STYLE "\x1b[31m"
  50 #endif
  51 #define RESET_STYLE "\x1b[0m"
  52 #else
  53 #define ERROR_STYLE
  54 #define RESET_STYLE
  55 #endif
  56 
  57 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  58 
  59 const char* info = ""
  60 "get [options...] [filenames/data-URIs/URIs...]\n"
  61 "\n"
  62 "\n"
  63 "Load bytes from all the named sources given, be them files, base64-encoded\n"
  64 "URIs, or HTTP/HTTPS URIs. The name `-` stands for the standard input. When\n"
  65 "no names are given, the standard input is used by default.\n"
  66 "\n"
  67 "The help option is `-h`, `--h`, `-help`, or `--help`.\n"
  68 "";
  69 
  70 void handle_reader(FILE* w, FILE* r) {
  71     const size_t bufsize = 32 * 1024;
  72     unsigned char buf[bufsize];
  73 
  74     while (!feof(w)) {
  75         size_t len = fread(&buf, sizeof(buf[0]), sizeof(buf), r);
  76         if (len < 1) {
  77             break;
  78         }
  79         fwrite(&buf, 1, len, w);
  80     }
  81 
  82     fflush(w);
  83 }
  84 
  85 // handle_file handles data from the filename given; returns false only when
  86 // the file can't be opened
  87 bool handle_file(FILE* w, const char* path) {
  88     FILE* f = fopen(path, "rb");
  89     if (f == NULL) {
  90         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
  91         return false;
  92     }
  93 
  94     handle_reader(w, f);
  95     fclose(f);
  96     return true;
  97 }
  98 
  99 bool fetch(FILE* w, CURL* curl, const char* uri) {
 100     curl_easy_setopt(curl, CURLOPT_URL, uri);
 101 
 102     CURLcode code = curl_easy_perform(curl);
 103     if (code != CURLE_OK) {
 104         fputc('\n', w);
 105         const char* msg = curl_easy_strerror(code);
 106         fprintf(stderr, ERROR_LINE("%s: %s"), uri, msg);
 107         return false;
 108     }
 109 
 110     fflush(w);
 111     return true;
 112 }
 113 
 114 // INVALID signals an input byte isn't allowed in a base64 stream
 115 #define INVALID 0xff
 116 
 117 const unsigned char base64_rev_lookup[256] = {
 118     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 119     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 120     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 121     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 122     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 123     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 124     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 125     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 126     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 127     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 128     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 129     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 130     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 131     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 132     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 133     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 134     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 135     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 136     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 137     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 138     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 139     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 140     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 141     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 142     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 143     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 144     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 145     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 146     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 147     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 148     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 149     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 150 };
 151 
 152 // unsigned char rev_lookup_byte(unsigned char b) {
 153 //     if ('A' <= b && b <= 'Z') {
 154 //         return b - 'A';
 155 //     }
 156 //     if ('a' <= b && b <= 'z') {
 157 //         return (b - 'a') + 26;
 158 //     }
 159 //     if ('0' <= b && b <= '9') {
 160 //         return (b - '0') + 52;
 161 //     }
 162 //
 163 //     switch (b) {
 164 //     case '+':
 165 //         return 62;
 166 //     case '/':
 167 //         return 63;
 168 //     default:
 169 //         return INVALID;
 170 //     }
 171 // }
 172 
 173 static inline unsigned char rev_lookup_byte(unsigned char b) {
 174     return base64_rev_lookup[b];
 175 }
 176 
 177 void show_invalid_byte(unsigned char b, size_t line, size_t pos) {
 178     const char* msg = "invalid base64 data";
 179     const char* fmt = ERROR_LINE("%s (byte %d, line: %ld, pos: %ld)");
 180     fprintf(stderr, fmt, msg, b, (long)line, (long)pos);
 181 }
 182 
 183 bool match_lead(unsigned char* buf, size_t n, char* to) {
 184     for (; n > 0 && *to != 0; buf++, to++, n--) {
 185         if (*buf != *to) {
 186             return false;
 187         }
 188     }
 189     return true;
 190 }
 191 
 192 size_t skip_data_uri(unsigned char* buf, size_t n) {
 193     for (size_t i = 0; i < n; i++) {
 194         if (match_lead(buf + i, n - i, ";base64,")) {
 195             return i + (sizeof(";base64,") - 1);
 196         }
 197     }
 198     return 0;
 199 }
 200 
 201 bool handle_data_uri(FILE* w, char* uri) {
 202     uint64_t line = 1;
 203     uint64_t pos = 1;
 204     uint64_t payload = 0;
 205     uint64_t padding = 0;
 206 
 207     unsigned char quad[4];
 208     quad[0] = 0;
 209     quad[1] = 0;
 210     quad[2] = 0;
 211     quad[3] = 0;
 212 
 213     size_t n = strlen(uri);
 214     unsigned char* chunk = (unsigned char*)uri;
 215 
 216     // skip leading utf-8 byte-order-mark bytes, if present
 217     if (n >= 3 && match_lead(chunk, n, "\xef\xbb\xbf")) {
 218         chunk += 3;
 219         n -= 3;
 220     }
 221 
 222     // skip leading data-URI prelude, if present
 223     if (match_lead(chunk, n, "data:")) {
 224         const int skip = skip_data_uri(chunk, n);
 225         chunk += skip;
 226         n -= skip;
 227     }
 228 
 229     for (size_t i = 0; i < n; i++) {
 230         const unsigned char v = chunk[i];
 231 
 232         // ignore carriage-returns to support CRLF lines
 233         if (v == '\r') {
 234             continue;
 235         }
 236 
 237         // base64 streams can span multiple lines
 238         if (v == '\n') {
 239             line++;
 240             pos = 1;
 241             continue;
 242         }
 243 
 244         pos++;
 245 
 246         if (v == '=') {
 247             padding++;
 248             continue;
 249         }
 250 
 251         if (padding > 0 && v != '=') {
 252             fputc('\n', w);
 253             fflush(w);
 254 
 255             const char* msg = "equal signs are only valid at the end";
 256             const char* fmt = ERROR_LINE("%s (line %ld, pos %ld)");
 257             fprintf(stderr, fmt, msg, (long)line, (long)pos);
 258             return false;
 259         }
 260 
 261         unsigned char b = rev_lookup_byte(v);
 262 
 263         if (b == INVALID) {
 264             show_invalid_byte(v, line, pos);
 265             return false;
 266         }
 267 
 268         const size_t step = payload % 4;
 269         quad[step] = b;
 270         payload++;
 271 
 272         if (step == 3) {
 273             // 01234567 01234567 01234567 01234567
 274             // 00000000 11111111 22222222 33333333
 275             // xx000000 xx001111 xx111122 xx222222
 276             fputc((quad[0] << 2) | (quad[1] >> 4), w);
 277             fputc((quad[1] << 4) | (quad[2] >> 2), w);
 278             fputc((quad[2] << 6) | (quad[3] >> 0), w);
 279         }
 280     }
 281 
 282     // try to be resilient to missing trailing/padding equals
 283     // if (padding == 0 && payload > 0) {
 284     //     padding = 4 - (payload % 4);
 285     // }
 286 
 287     // don't forget unemitted trailing bytes, if any
 288     switch (padding) {
 289     case 1:
 290         fputc((quad[0] << 2) | (quad[1] >> 4), w);
 291         fputc((quad[1] << 4) | (quad[2] >> 2), w);
 292         break;
 293     case 2:
 294         fputc((quad[0] << 2) | (quad[1] >> 4), w);
 295         break;
 296     }
 297 
 298     fflush(w);
 299     return true;
 300 }
 301 
 302 bool starts_with(const char* s, const char* prefix) {
 303     for (size_t i = 0; prefix[i] != 0; i++) {
 304         if (s[i] == 0) {
 305             return prefix[i] == 0;
 306         }
 307         if (s[i] != prefix[i]) {
 308             return false;
 309         }
 310     }
 311 
 312     return true;
 313 }
 314 
 315 bool is_file(const char* path) {
 316     struct stat meta;
 317     return stat(path, &meta) == 0;
 318 }
 319 
 320 bool seems_curlable(const char* s) {
 321     return false ||
 322         starts_with(s, "https://") ||
 323         starts_with(s, "http://") ||
 324         starts_with(s, "ftp://") ||
 325         starts_with(s, "ftps://") ||
 326         starts_with(s, "gopher://") ||
 327         starts_with(s, "gophers://") ||
 328         starts_with(s, "rtmp://") ||
 329         starts_with(s, "rtsp://") ||
 330         starts_with(s, "scp://") ||
 331         starts_with(s, "sftp://") ||
 332         starts_with(s, "smb://") ||
 333         starts_with(s, "smbs://") ||
 334         starts_with(s, "telnet://") ||
 335         starts_with(s, "tftp://") ||
 336         false;
 337 }
 338 
 339 // run returns the number of errors
 340 int run(int argc, char** argv, FILE* w) {
 341     size_t dashes = 0;
 342     CURL* curl = NULL;
 343 
 344     for (int i = 1; i < argc; i++) {
 345         if (argv[i][0] == '-' && argv[i][1] == 0) {
 346             dashes++;
 347             continue;
 348         }
 349 
 350         if (curl != NULL || seems_curlable(argv[i])) {
 351             curl = curl_easy_init();
 352             if (curl == NULL) {
 353                 fprintf(stderr, ERROR_LINE("can't initialize libcurl"));
 354                 return 1;
 355             }
 356 
 357             curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
 358             curl_easy_setopt(curl, CURLOPT_WRITEDATA, w);
 359         }
 360     }
 361 
 362     if (dashes > 1) {
 363         const char* msg = "can't use the standard input (dash) more than once";
 364         fprintf(stderr, ERROR_LINE("%s"), msg);
 365         return 1;
 366     }
 367 
 368     // use stdin when not given any filepaths
 369     if (argc <= 1) {
 370         handle_reader(w, stdin);
 371         return 0;
 372     }
 373 
 374     for (int i = 1; i < argc && !feof(w); i++) {
 375         if (is_file(argv[i])) {
 376             if (!handle_file(w, argv[i])) {
 377                return 1;
 378             }
 379             continue;
 380         }
 381 
 382         if (argv[i][0] == '-' && argv[i][1] == 0) {
 383             handle_reader(w, stdin);
 384             continue;
 385         }
 386 
 387         if (starts_with(argv[i], "data:")) {
 388             if (!handle_data_uri(w, argv[i])) {
 389                 return 1;
 390             }
 391             continue;
 392         }
 393 
 394         if (starts_with(argv[i], "file://")) {
 395             if (!handle_file(w, argv[i] + sizeof("file://") - 1)) {
 396                 return 1;
 397             }
 398             continue;
 399         }
 400 
 401         if (seems_curlable(argv[i])) {
 402             if (!fetch(w, curl, argv[i])) {
 403                 return 1;
 404             }
 405             continue;
 406         }
 407 
 408         if (!handle_file(w, argv[i])) {
 409             return 1;
 410         }
 411     }
 412 
 413     if (curl != NULL) {
 414         curl_easy_cleanup(curl);
 415     }
 416     return 0;
 417 }
 418 
 419 int main(int argc, char** argv) {
 420 #ifdef _WIN32
 421     setmode(fileno(stdin), O_BINARY);
 422     // ensure output lines end in LF instead of CRLF on windows
 423     setmode(fileno(stdout), O_BINARY);
 424     setmode(fileno(stderr), O_BINARY);
 425 #endif
 426 
 427     if (argc > 1) {
 428         if (
 429             strcmp(argv[1], "-h") == 0 ||
 430             strcmp(argv[1], "-help") == 0 ||
 431             strcmp(argv[1], "--h") == 0 ||
 432             strcmp(argv[1], "--help") == 0
 433         ) {
 434             fprintf(stdout, "%s", info);
 435             return 0;
 436         }
 437     }
 438 
 439     // setvbuf(stdin, NULL, _IOFBF, 0);
 440     setvbuf(stdout, NULL, _IOFBF, 0);
 441     return run(argc, argv, stdout) == 0 ? 0 : 1;
 442 }