File: filesizes.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./filesizes ./filesizes.c
  29 */
  30 
  31 #include <dirent.h>
  32 #include <stdbool.h>
  33 #include <stdint.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <string.h>
  37 #include <sys/stat.h>
  38 
  39 #ifdef _WIN32
  40 #include <fcntl.h>
  41 #include <windows.h>
  42 #endif
  43 
  44 // info is the multi-line help message
  45 const char* info = ""
  46     "filesizes [options...] [filenames...]\n"
  47     "\n"
  48     "Show the byte-counts for all the files given: output is lines, each with 2\n"
  49     "tab-separated items, the name and the byte-count. First line has the column\n"
  50     "names.\n"
  51     "\n"
  52     "\n"
  53     "Options\n"
  54     "\n"
  55     "    -h, --h            show this help message\n"
  56     "    -help, --help      aliases for option -h\n"
  57     "";
  58 
  59 // handle_stdin counts the standard-input's bytes
  60 void handle_stdin() {
  61     unsigned char buf[32 * 1024];
  62     uint64_t bytes = 0;
  63 
  64     putc('-', stdout);
  65 
  66     while (!feof(stdin)) {
  67         size_t n = fread(&buf, sizeof(buf[0]), sizeof(buf), stdin);
  68         if (n < 1) {
  69             // assume input is over when no bytes were read
  70             break;
  71         }
  72         bytes += n;
  73     }
  74 
  75     printf("\t%lu\n", (long unsigned int)bytes);
  76 }
  77 
  78 bool handle_file(const char* path);
  79 
  80 // fail_fullpath_alloc gives up by quitting, and is used in func handle_folder
  81 void fail_fullpath_alloc(DIR* entries) {
  82     const char* msg = "can't get memory for the full file names";
  83     closedir(entries);
  84     putc('\n', stdout);
  85     fprintf(stderr, "\x1b[31m%s\x1b[0m\n", msg);
  86     exit(1);
  87 }
  88 
  89 // handle_folder handles folder entries for func handle_files: these 2 funcs
  90 // may involve mutual recursion, when nested files/folders are involved
  91 bool handle_folder(const char* path) {
  92     DIR* entries = opendir(path);
  93 
  94     if (entries == NULL) {
  95         return false;
  96     }
  97 
  98     size_t path_len = strlen(path);
  99     // find the slash's position, whether included in the folder path or not
 100     bool trailing_slash = path[path_len - 1] == '/';
 101     size_t slash = trailing_slash ? path_len - 1 : path_len;
 102     // ensure starting capacity can fit a slash either way
 103     size_t cap = slash + 2;
 104 
 105     // fullpath is a reusable string-area to keep appending the final parts
 106     // of full pathnames for all the folder's entries
 107     char* fullpath = malloc(cap);
 108 
 109     // if allocation fails, simply give and quit the app with a message
 110     if (fullpath == NULL) {
 111         fail_fullpath_alloc(entries);
 112         return false;
 113     }
 114 
 115     // start full-path string with the folder's path
 116     strcpy(fullpath, path);
 117 
 118     // ensure a slash is between the folder's path and its entries' names
 119     if (fullpath[slash] != '/') {
 120         fullpath[slash + 0] = '/';
 121         fullpath[slash + 1] = 0;
 122     }
 123 
 124     // remember where to start appending entry names in the full-path string
 125     size_t start = slash + 1;
 126 
 127     while (!feof(stdout)) {
 128         const struct dirent* item = readdir(entries);
 129         if (item == NULL) {
 130             break;
 131         }
 132 
 133         const char* name = item->d_name;
 134 
 135         // ignore entries `.` and `..`
 136         if (name[0] == '.') {
 137             if ((name[1] == 0) || (name[1] == '.' && name[2] == 0)) {
 138                 continue;
 139             }
 140         }
 141 
 142         // ensure capacity of the full-path is enough for this entry's name
 143         size_t extra = strlen(name);
 144         if (start + extra >= cap) {
 145             char* old = fullpath;
 146             cap = start + extra + 2;
 147             fullpath = realloc(fullpath, cap);
 148 
 149             // if allocation fails, simply give and quit the app with a message
 150             if (fullpath == NULL) {
 151                 free(old);
 152                 fail_fullpath_alloc(entries);
 153                 return false;
 154             }
 155         }
 156 
 157         // complete full-path using the name of the current entry
 158         strcpy(fullpath + slash + 1, name);
 159 
 160         // handle entry, possibly recursively in case of another folder
 161         if (!handle_file(fullpath)) {
 162             free(fullpath);
 163             return false;
 164         }
 165     }
 166 
 167     closedir(entries);
 168     free(fullpath);
 169     return true;
 170 }
 171 
 172 // handle_file handles data from the filename given; returns false only when
 173 // the file can't be queried for its size, likely because it doesn't exist
 174 bool handle_file(const char* path) {
 175     struct stat st;
 176     if (stat(path, &st) != 0) {
 177         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 178         return false;
 179     }
 180 
 181     if (!S_ISDIR(st.st_mode)) {
 182         printf("%s\t%ld\n", path, st.st_size);
 183         return true;
 184     }
 185 
 186     return handle_folder(path);
 187 }
 188 
 189 // is_help_option simplifies control-flow for func run
 190 bool is_help_option(const char* s) {
 191     return (s[0] == '-') && (s[1] != 0) && (
 192         strcmp(s, "-h") == 0 ||
 193         strcmp(s, "-help") == 0 ||
 194         strcmp(s, "--h") == 0 ||
 195         strcmp(s, "--help") == 0
 196     );
 197 }
 198 
 199 // run returns the number of errors
 200 int run(int argc, char** argv) {
 201     size_t errors = 0;
 202 
 203     // handle special cmd-line options
 204     for (size_t i = 1; i < argc; i++) {
 205         if (is_help_option(argv[i])) {
 206             // help option is handled right away, also quitting the app
 207             fprintf(stderr, "%s", info);
 208             return 0;
 209         }
 210     }
 211 
 212     puts("file\tbytes");
 213 
 214     for (size_t i = 1; i < argc && !feof(stdout); i++) {
 215         // a `-` filename stands for the standard input
 216         if (argv[i][0] == '-' && argv[i][1] == 0) {
 217             handle_stdin();
 218             continue;
 219         }
 220 
 221         if (!handle_file(argv[i])) {
 222             errors++;
 223         }
 224     }
 225 
 226     // no filenames means use stdin as the only input
 227     if (argc < 2) {
 228         handle_stdin();
 229     }
 230 
 231     return errors;
 232 }
 233 
 234 int main(int argc, char** argv) {
 235 #ifdef _WIN32
 236     setmode(fileno(stdin), O_BINARY);
 237     // ensure output lines end in LF instead of CRLF on windows
 238     setmode(fileno(stdout), O_BINARY);
 239     setmode(fileno(stderr), O_BINARY);
 240 #endif
 241 
 242     return run(argc, argv) == 0 ? 0 : 1;
 243 }