File: get.c 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 You can build this command-line app by running 27 28 sudo apt install libcurl4-openssl-dev 29 cc -Wall -s -O2 -o ./get ./get.c -l curl 30 */ 31 32 #include <stdbool.h> 33 #include <stdint.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 38 #ifdef _WIN32 39 #include <fcntl.h> 40 #include <windows.h> 41 #endif 42 43 #include <curl/curl.h> 44 45 const char* info = "" 46 "get [options...] [filenames/data-URIs/URIs...]\n" 47 "\n" 48 "\n" 49 "Load bytes from all the named sources given, be them files, base64-encoded\n" 50 "URIs, or HTTP/HTTPS URIs. The name `-` stands for the standard input. When\n" 51 "no names are given, the standard input is used by default.\n" 52 "\n" 53 "The help option is `-h`, `--h`, `-help`, or `--help`." 54 ""; 55 56 // handle_reader skips leading UTF-8 BOMs (byte-order marks), and turns all 57 // CR-LF pairs into single LF bytes 58 void handle_reader(FILE* w, FILE* r) { 59 const int bufsize = 16 * 1024; 60 unsigned char buf[bufsize]; 61 62 while (!feof(w)) { 63 size_t len = fread(&buf, sizeof(buf[0]), sizeof(buf), r); 64 if (len < 1) { 65 break; 66 } 67 fwrite(&buf, len, 1, w); 68 } 69 70 fflush(w); 71 } 72 73 // handle_file handles data from the filename given; returns false only when 74 // the file can't be opened 75 bool handle_file(FILE* w, const char* fname) { 76 FILE* f = fopen(fname, "rb"); 77 if (f == NULL) { 78 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", fname); 79 return false; 80 } 81 82 handle_reader(w, f); 83 fclose(f); 84 return true; 85 } 86 87 bool fetch(FILE* w, CURL* curl, const char* uri) { 88 curl_easy_setopt(curl, CURLOPT_URL, uri); 89 90 CURLcode code = curl_easy_perform(curl); 91 if (code != CURLE_OK) { 92 putc('\n', w); 93 const char* msg = curl_easy_strerror(code); 94 fprintf(stderr, "\x1b[31m%s: %s\x1b[0m\n", uri, msg); 95 return false; 96 } 97 98 fflush(w); 99 return true; 100 } 101 102 // INVALID signals an input byte isn't allowed in a base64 stream 103 #define INVALID 0xff 104 105 const unsigned char base64_rev_lookup[256] = { 106 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 107 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 108 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 109 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 110 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 111 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, 112 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 113 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 114 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 115 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 116 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 117 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff, 118 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 119 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 120 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 121 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff, 122 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 123 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 124 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 125 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 126 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 127 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 128 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 129 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 130 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 131 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 132 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 133 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 134 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 135 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 136 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 137 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 138 }; 139 140 // unsigned char rev_lookup_byte(unsigned char b) { 141 // if ('A' <= b && b <= 'Z') { 142 // return b - 'A'; 143 // } 144 // if ('a' <= b && b <= 'z') { 145 // return (b - 'a') + 26; 146 // } 147 // if ('0' <= b && b <= '9') { 148 // return (b - '0') + 52; 149 // } 150 // 151 // switch (b) { 152 // case '+': 153 // return 62; 154 // case '/': 155 // return 63; 156 // default: 157 // return INVALID; 158 // } 159 // } 160 161 unsigned char rev_lookup_byte(unsigned char b) { 162 return base64_rev_lookup[b]; 163 } 164 165 void show_invalid_byte(unsigned char b, size_t line, size_t pos) { 166 const char* msg = "invalid base64 data"; 167 const char* fmt = "\x1b[31m%s (byte %d, line: %ld, pos: %ld)\x1b[0m\n"; 168 fprintf(stderr, fmt, msg, b, (long)line, (long)pos); 169 } 170 171 bool match_lead(unsigned char* buf, size_t n, char* to) { 172 for (; n > 0 && *to != 0; buf++, to++, n--) { 173 if (*buf != *to) { 174 return false; 175 } 176 } 177 return true; 178 } 179 180 size_t skip_data_uri(unsigned char* buf, size_t n) { 181 for (size_t i = 0; i < n; i++) { 182 if (match_lead(buf + i, n - i, ";base64,")) { 183 return i + (sizeof(";base64,") - 1); 184 } 185 } 186 return 0; 187 } 188 189 bool handle_data_uri(FILE* w, char* uri) { 190 uint64_t line = 1; 191 uint64_t pos = 1; 192 uint64_t payload = 0; 193 uint64_t padding = 0; 194 195 unsigned char quad[4]; 196 quad[0] = 0; 197 quad[1] = 0; 198 quad[2] = 0; 199 quad[3] = 0; 200 201 size_t n = strlen(uri); 202 unsigned char* chunk = (unsigned char*)uri; 203 204 // skip leading utf-8 byte-order-mark bytes, if present 205 if (n >= 3 && match_lead(chunk, n, "\xef\xbb\xbf")) { 206 chunk += 3; 207 n -= 3; 208 } 209 210 // skip leading data-URI prelude, if present 211 if (match_lead(chunk, n, "data:")) { 212 const int skip = skip_data_uri(chunk, n); 213 chunk += skip; 214 n -= skip; 215 } 216 217 for (size_t i = 0; i < n; i++) { 218 const unsigned char v = chunk[i]; 219 220 // ignore carriage-returns to support CRLF lines 221 if (v == '\r') { 222 continue; 223 } 224 225 // base64 streams can span multiple lines 226 if (v == '\n') { 227 line++; 228 pos = 1; 229 continue; 230 } 231 232 pos++; 233 234 if (v == '=') { 235 padding++; 236 continue; 237 } 238 239 if (padding > 0 && v != '=') { 240 putc('\n', w); 241 fflush(w); 242 243 const char* msg = "equal signs are only valid at the end"; 244 const char* fmt = "\x1b[31m%s (line %ld, pos %ld)\x1b[0m\n"; 245 fprintf(stderr, fmt, msg, (long)line, (long)pos); 246 return false; 247 } 248 249 unsigned char b = rev_lookup_byte(v); 250 251 if (b == INVALID) { 252 show_invalid_byte(v, line, pos); 253 return false; 254 } 255 256 const size_t step = payload % 4; 257 quad[step] = b; 258 payload++; 259 260 if (step == 3) { 261 // 01234567 01234567 01234567 01234567 262 // 00000000 11111111 22222222 33333333 263 // xx000000 xx001111 xx111122 xx222222 264 putc((quad[0] << 2) | (quad[1] >> 4), w); 265 putc((quad[1] << 4) | (quad[2] >> 2), w); 266 putc((quad[2] << 6) | (quad[3] >> 0), w); 267 } 268 } 269 270 // try to be resilient to missing trailing/padding equals 271 // if (padding == 0 && payload > 0) { 272 // padding = 4 - (payload % 4); 273 // } 274 275 // don't forget unemitted trailing bytes, if any 276 switch (padding) { 277 case 1: 278 putc((quad[0] << 2) | (quad[1] >> 4), w); 279 putc((quad[1] << 4) | (quad[2] >> 2), w); 280 break; 281 case 2: 282 putc((quad[0] << 2) | (quad[1] >> 4), w); 283 break; 284 } 285 286 fflush(w); 287 return true; 288 } 289 290 bool starts_with(const char* s, const char* prefix) { 291 for (size_t i = 0; prefix[i] != 0; i++) { 292 if (s[i] == 0) { 293 return prefix[i] == 0; 294 } 295 if (s[i] != prefix[i]) { 296 return false; 297 } 298 } 299 300 return true; 301 } 302 303 bool seems_curlable(const char* s) { 304 return false || 305 starts_with(s, "https://") || 306 starts_with(s, "http://") || 307 starts_with(s, "ftp://") || 308 starts_with(s, "ftps://") || 309 starts_with(s, "gopher://") || 310 starts_with(s, "gophers://") || 311 starts_with(s, "rtmp://") || 312 starts_with(s, "rtsp://") || 313 starts_with(s, "scp://") || 314 starts_with(s, "sftp://") || 315 starts_with(s, "smb://") || 316 starts_with(s, "smbs://") || 317 starts_with(s, "telnet://") || 318 starts_with(s, "tftp://") || 319 false; 320 } 321 322 // run returns the number of errors 323 int run(int argc, char** argv, FILE* w) { 324 size_t dashes = 0; 325 CURL* curl = NULL; 326 327 for (int i = 1; i < argc; i++) { 328 if (argv[i][0] == '-' && argv[i][1] == 0) { 329 if (dashes > 1) { 330 break; 331 } 332 dashes++; 333 } 334 335 if (curl != NULL || seems_curlable(argv[i])) { 336 curl = curl_easy_init(); 337 if (curl == NULL) { 338 fprintf(stderr, "\x1b[31mcan't initialize libcurl\x1b[0m\n"); 339 return 1; 340 } 341 342 curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); 343 curl_easy_setopt(curl, CURLOPT_WRITEDATA, w); 344 } 345 } 346 347 if (dashes > 1) { 348 const char* msg = "can't use the standard input (dash) more than once"; 349 fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg); 350 return 1; 351 } 352 353 // use stdin when not given any filepaths 354 if (argc <= 1) { 355 handle_reader(w, stdin); 356 return 0; 357 } 358 359 for (int i = 1; i < argc && !feof(stdout); i++) { 360 if (argv[i][0] == '-' && argv[i][1] == 0) { 361 handle_reader(w, stdin); 362 continue; 363 } 364 365 if (starts_with(argv[i], "data:")) { 366 if (!handle_data_uri(w, argv[i])) { 367 return 1; 368 } 369 continue; 370 } 371 372 if (starts_with(argv[i], "file://")) { 373 if (!handle_file(w, argv[i] + sizeof("file://") - 1)) { 374 return 1; 375 } 376 continue; 377 } 378 379 if (seems_curlable(argv[i])) { 380 if (!fetch(w, curl, argv[i])) { 381 return 1; 382 } 383 continue; 384 } 385 386 if (!handle_file(w, argv[i])) { 387 return 1; 388 } 389 } 390 391 if (curl != NULL) { 392 curl_easy_cleanup(curl); 393 } 394 return 0; 395 } 396 397 int main(int argc, char** argv) { 398 #ifdef _WIN32 399 setmode(fileno(stdin), O_BINARY); 400 // ensure output lines end in LF instead of CRLF on windows 401 setmode(fileno(stdout), O_BINARY); 402 setmode(fileno(stderr), O_BINARY); 403 #endif 404 405 if (argc > 1) { 406 if ( 407 strcmp(argv[1], "-h") == 0 || 408 strcmp(argv[1], "-help") == 0 || 409 strcmp(argv[1], "--h") == 0 || 410 strcmp(argv[1], "--help") == 0 411 ) { 412 fprintf(stdout, "%s", info); 413 return 0; 414 } 415 } 416 417 setvbuf(stdout, NULL, _IOFBF, 0); 418 return run(argc, argv, stdout) == 0 ? 0 : 1; 419 }