File: get.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 sudo apt install libcurl4-openssl-dev
  29 cc -Wall -s -O2 -o ./get ./get.c -l curl
  30 */
  31 
  32 #include <stdbool.h>
  33 #include <stdint.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 
  38 #ifdef _WIN32
  39 #include <fcntl.h>
  40 #include <windows.h>
  41 #endif
  42 
  43 #include <curl/curl.h>
  44 
  45 const char* info = ""
  46 "get [options...] [filenames/data-URIs/URIs...]\n"
  47 "\n"
  48 "\n"
  49 "Load bytes from all the named sources given, be them files, base64-encoded\n"
  50 "URIs, or HTTP/HTTPS URIs. The name `-` stands for the standard input. When\n"
  51 "no names are given, the standard input is used by default.\n"
  52 "\n"
  53 "The help option is `-h`, `--h`, `-help`, or `--help`."
  54 "";
  55 
  56 // handle_reader skips leading UTF-8 BOMs (byte-order marks), and turns all
  57 // CR-LF pairs into single LF bytes
  58 void handle_reader(FILE* w, FILE* r) {
  59     const int bufsize = 16 * 1024;
  60     unsigned char buf[bufsize];
  61 
  62     while (!feof(w)) {
  63         size_t len = fread(&buf, sizeof(buf[0]), sizeof(buf), r);
  64         if (len < 1) {
  65             break;
  66         }
  67         fwrite(&buf, len, 1, w);
  68     }
  69 
  70     fflush(w);
  71 }
  72 
  73 // handle_file handles data from the filename given; returns false only when
  74 // the file can't be opened
  75 bool handle_file(FILE* w, const char* fname) {
  76     FILE* f = fopen(fname, "rb");
  77     if (f == NULL) {
  78         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", fname);
  79         return false;
  80     }
  81 
  82     handle_reader(w, f);
  83     fclose(f);
  84     return true;
  85 }
  86 
  87 bool fetch(FILE* w, CURL* curl, const char* uri) {
  88     curl_easy_setopt(curl, CURLOPT_URL, uri);
  89 
  90     CURLcode code = curl_easy_perform(curl);
  91     if (code != CURLE_OK) {
  92         putc('\n', w);
  93         const char* msg = curl_easy_strerror(code);
  94         fprintf(stderr, "\x1b[31m%s: %s\x1b[0m\n", uri, msg);
  95         return false;
  96     }
  97 
  98     fflush(w);
  99     return true;
 100 }
 101 
 102 // INVALID signals an input byte isn't allowed in a base64 stream
 103 #define INVALID 0xff
 104 
 105 const unsigned char base64_rev_lookup[256] = {
 106     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 107     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 108     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 109     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 110     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 111     0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
 112     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
 113     0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 114     0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
 115     0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
 116     0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
 117     0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
 118     0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 119     0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
 120     0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
 121     0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
 122     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 123     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 124     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 125     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 126     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 127     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 128     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 129     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 130     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 131     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 132     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 133     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 134     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 135     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 136     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 137     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 138 };
 139 
 140 // unsigned char rev_lookup_byte(unsigned char b) {
 141 //     if ('A' <= b && b <= 'Z') {
 142 //         return b - 'A';
 143 //     }
 144 //     if ('a' <= b && b <= 'z') {
 145 //         return (b - 'a') + 26;
 146 //     }
 147 //     if ('0' <= b && b <= '9') {
 148 //         return (b - '0') + 52;
 149 //     }
 150 //
 151 //     switch (b) {
 152 //         case '+':
 153 //             return 62;
 154 //         case '/':
 155 //             return 63;
 156 //         default:
 157 //             return INVALID;
 158 //     }
 159 // }
 160 
 161 unsigned char rev_lookup_byte(unsigned char b) {
 162     return base64_rev_lookup[b];
 163 }
 164 
 165 void show_invalid_byte(unsigned char b, size_t line, size_t pos) {
 166     const char* msg = "invalid base64 data";
 167     const char* fmt = "\x1b[31m%s (byte %d, line: %ld, pos: %ld)\x1b[0m\n";
 168     fprintf(stderr, fmt, msg, b, (long)line, (long)pos);
 169 }
 170 
 171 bool match_lead(unsigned char* buf, size_t n, char* to) {
 172     for (; n > 0 && *to != 0; buf++, to++, n--) {
 173         if (*buf != *to) {
 174             return false;
 175         }
 176     }
 177     return true;
 178 }
 179 
 180 size_t skip_data_uri(unsigned char* buf, size_t n) {
 181     for (size_t i = 0; i < n; i++) {
 182         if (match_lead(buf + i, n - i, ";base64,")) {
 183             return i + (sizeof(";base64,") - 1);
 184         }
 185     }
 186     return 0;
 187 }
 188 
 189 bool handle_data_uri(FILE* w, char* uri) {
 190     uint64_t line = 1;
 191     uint64_t pos = 1;
 192     uint64_t payload = 0;
 193     uint64_t padding = 0;
 194 
 195     unsigned char quad[4];
 196     quad[0] = 0;
 197     quad[1] = 0;
 198     quad[2] = 0;
 199     quad[3] = 0;
 200 
 201     size_t n = strlen(uri);
 202     unsigned char* chunk = (unsigned char*)uri;
 203 
 204     // skip leading utf-8 byte-order-mark bytes, if present
 205     if (n >= 3 && match_lead(chunk, n, "\xef\xbb\xbf")) {
 206         chunk += 3;
 207         n -= 3;
 208     }
 209 
 210     // skip leading data-URI prelude, if present
 211     if (match_lead(chunk, n, "data:")) {
 212         const int skip = skip_data_uri(chunk, n);
 213         chunk += skip;
 214         n -= skip;
 215     }
 216 
 217     for (size_t i = 0; i < n; i++) {
 218         const unsigned char v = chunk[i];
 219 
 220         // ignore carriage-returns to support CRLF lines
 221         if (v == '\r') {
 222             continue;
 223         }
 224 
 225         // base64 streams can span multiple lines
 226         if (v == '\n') {
 227             line++;
 228             pos = 1;
 229             continue;
 230         }
 231 
 232         pos++;
 233 
 234         if (v == '=') {
 235             padding++;
 236             continue;
 237         }
 238 
 239         if (padding > 0 && v != '=') {
 240             putc('\n', w);
 241             fflush(w);
 242 
 243             const char* msg = "equal signs are only valid at the end";
 244             const char* fmt = "\x1b[31m%s (line %ld, pos %ld)\x1b[0m\n";
 245             fprintf(stderr, fmt, msg, (long)line, (long)pos);
 246             return false;
 247         }
 248 
 249         unsigned char b = rev_lookup_byte(v);
 250 
 251         if (b == INVALID) {
 252             show_invalid_byte(v, line, pos);
 253             return false;
 254         }
 255 
 256         const size_t step = payload % 4;
 257         quad[step] = b;
 258         payload++;
 259 
 260         if (step == 3) {
 261             // 01234567 01234567 01234567 01234567
 262             // 00000000 11111111 22222222 33333333
 263             // xx000000 xx001111 xx111122 xx222222
 264             putc((quad[0] << 2) | (quad[1] >> 4), w);
 265             putc((quad[1] << 4) | (quad[2] >> 2), w);
 266             putc((quad[2] << 6) | (quad[3] >> 0), w);
 267         }
 268     }
 269 
 270     // try to be resilient to missing trailing/padding equals
 271     // if (padding == 0 && payload > 0) {
 272     //     padding = 4 - (payload % 4);
 273     // }
 274 
 275     // don't forget unemitted trailing bytes, if any
 276     switch (padding) {
 277         case 1:
 278             putc((quad[0] << 2) | (quad[1] >> 4), w);
 279             putc((quad[1] << 4) | (quad[2] >> 2), w);
 280             break;
 281         case 2:
 282             putc((quad[0] << 2) | (quad[1] >> 4), w);
 283             break;
 284     }
 285 
 286     fflush(w);
 287     return true;
 288 }
 289 
 290 bool starts_with(const char* s, const char* prefix) {
 291     for (size_t i = 0; prefix[i] != 0; i++) {
 292         if (s[i] == 0) {
 293             return prefix[i] == 0;
 294         }
 295         if (s[i] != prefix[i]) {
 296             return false;
 297         }
 298     }
 299 
 300     return true;
 301 }
 302 
 303 bool seems_curlable(const char* s) {
 304     return false ||
 305         starts_with(s, "https://") ||
 306         starts_with(s, "http://") ||
 307         starts_with(s, "ftp://") ||
 308         starts_with(s, "ftps://") ||
 309         starts_with(s, "gopher://") ||
 310         starts_with(s, "gophers://") ||
 311         starts_with(s, "rtmp://") ||
 312         starts_with(s, "rtsp://") ||
 313         starts_with(s, "scp://") ||
 314         starts_with(s, "sftp://") ||
 315         starts_with(s, "smb://") ||
 316         starts_with(s, "smbs://") ||
 317         starts_with(s, "telnet://") ||
 318         starts_with(s, "tftp://") ||
 319         false;
 320 }
 321 
 322 // run returns the number of errors
 323 int run(int argc, char** argv, FILE* w) {
 324     size_t dashes = 0;
 325     CURL* curl = NULL;
 326 
 327     for (int i = 1; i < argc; i++) {
 328         if (argv[i][0] == '-' && argv[i][1] == 0) {
 329             if (dashes > 1) {
 330                 break;
 331             }
 332             dashes++;
 333         }
 334 
 335         if (curl != NULL || seems_curlable(argv[i])) {
 336             curl = curl_easy_init();
 337             if (curl == NULL) {
 338                 fprintf(stderr, "\x1b[31mcan't initialize libcurl\x1b[0m\n");
 339                 return 1;
 340             }
 341 
 342             curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
 343             curl_easy_setopt(curl, CURLOPT_WRITEDATA, w);
 344         }
 345     }
 346 
 347     if (dashes > 1) {
 348         const char* msg = "can't use the standard input (dash) more than once";
 349         fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg);
 350         return 1;
 351     }
 352 
 353     // use stdin when not given any filepaths
 354     if (argc <= 1) {
 355         handle_reader(w, stdin);
 356         return 0;
 357     }
 358 
 359     for (int i = 1; i < argc && !feof(stdout); i++) {
 360         if (argv[i][0] == '-' && argv[i][1] == 0) {
 361             handle_reader(w, stdin);
 362             continue;
 363         }
 364 
 365         if (starts_with(argv[i], "data:")) {
 366             if (!handle_data_uri(w, argv[i])) {
 367                 return 1;
 368             }
 369             continue;
 370         }
 371 
 372         if (starts_with(argv[i], "file://")) {
 373             if (!handle_file(w, argv[i] + sizeof("file://") - 1)) {
 374                 return 1;
 375             }
 376             continue;
 377         }
 378 
 379         if (seems_curlable(argv[i])) {
 380             if (!fetch(w, curl, argv[i])) {
 381                 return 1;
 382             }
 383             continue;
 384         }
 385 
 386         if (!handle_file(w, argv[i])) {
 387             return 1;
 388         }
 389     }
 390 
 391     if (curl != NULL) {
 392         curl_easy_cleanup(curl);
 393     }
 394     return 0;
 395 }
 396 
 397 int main(int argc, char** argv) {
 398 #ifdef _WIN32
 399     setmode(fileno(stdin), O_BINARY);
 400     // ensure output lines end in LF instead of CRLF on windows
 401     setmode(fileno(stdout), O_BINARY);
 402     setmode(fileno(stderr), O_BINARY);
 403 #endif
 404 
 405     if (argc > 1) {
 406         if (
 407             strcmp(argv[1], "-h") == 0 ||
 408             strcmp(argv[1], "-help") == 0 ||
 409             strcmp(argv[1], "--h") == 0 ||
 410             strcmp(argv[1], "--help") == 0
 411         ) {
 412             fprintf(stdout, "%s", info);
 413             return 0;
 414         }
 415     }
 416 
 417     setvbuf(stdout, NULL, _IOFBF, 0);
 418     return run(argc, argv, stdout) == 0 ? 0 : 1;
 419 }