File: catl.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./catl ./catl.c
  29 */
  30 
  31 #include <stdbool.h>
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 
  36 #ifdef _WIN32
  37 #include <fcntl.h>
  38 #include <windows.h>
  39 #endif
  40 
  41 // #ifndef RAW_LINES
  42 // #define FIX_LINES
  43 // #endif
  44 
  45 const char* info = ""
  46 "catl [filenames...]\n"
  47 "\n"
  48 "Concatenate lines from all the named sources given. The name `-` stands for\n"
  49 "the standard input. When no names are given, the standard input is used by\n"
  50 "default.\n"
  51 "";
  52 
  53 const char* no_line_memory_msg = "can't get enough memory to read lines";
  54 
  55 // slice is a growable region of bytes in memory
  56 typedef struct slice {
  57     // ptr is the starting place of the region
  58     unsigned char* ptr;
  59 
  60     // len is how many bytes are currently being used
  61     size_t len;
  62 
  63     // cap is how many bytes the memory region has available
  64     size_t cap;
  65 } slice;
  66 
  67 #ifdef FIX_LINES
  68 bool starts_with_bom(const unsigned char* b, const size_t n) {
  69     return (n >= 3 && b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf);
  70 }
  71 
  72 // handle_reader skips leading UTF-8 BOMs (byte-order marks), and turns all
  73 // CR-LF pairs into single LF bytes
  74 bool handle_reader(FILE* w, FILE* r, slice* line) {
  75     slice trimmed;
  76 
  77     for (size_t i = 0; !feof(w); i++) {
  78         ssize_t len = getline((char**)&line->ptr, &line->cap, r);
  79         if (len < 0) {
  80             break;
  81         }
  82 
  83         if (line->ptr == NULL) {
  84             putc('\n', w);
  85             fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg);
  86             return false;
  87         }
  88 
  89         line->len = len;
  90         trimmed.ptr = line->ptr;
  91         trimmed.len = line->len;
  92 
  93         // get rid of leading UTF-8 BOM (byte-order mark) if 1st line has it
  94         if (i == 0 && starts_with_bom(trimmed.ptr, trimmed.len)) {
  95             trimmed.ptr += 3;
  96             trimmed.len -= 3;
  97             len = trimmed.len;
  98         }
  99 
 100         const unsigned char* p = trimmed.ptr;
 101         // get rid of trailing line-feeds and CRLF end-of-line byte-pairs
 102         if (len >= 2 && p[len - 2] == '\r' && p[len - 1] == '\n') {
 103             trimmed.len -= 2;
 104         } else if (len >= 1 && p[len - 1] == '\n') {
 105             trimmed.len--;
 106         }
 107 
 108         fwrite(trimmed.ptr, trimmed.len, 1, w);
 109         putc('\n', w);
 110         fflush(w);
 111     }
 112 
 113     return true;
 114 }
 115 #else
 116 // handle_reader skips leading UTF-8 BOMs (byte-order marks), and turns all
 117 // CR-LF pairs into single LF bytes
 118 bool handle_reader(FILE* w, FILE* r, slice* line) {
 119     while (!feof(w)) {
 120         ssize_t len = getline((char**)&line->ptr, &line->cap, r);
 121         if (len < 0) {
 122             break;
 123         }
 124 
 125         if (line->ptr == NULL) {
 126             putc('\n', w);
 127             fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg);
 128             return false;
 129         }
 130 
 131         if (len >= 1 && line->ptr[len - 1] == '\n') {
 132             len--;
 133         }
 134 
 135         fwrite(line->ptr, len, 1, w);
 136         putc('\n', w);
 137         fflush(w);
 138     }
 139 
 140     return true;
 141 }
 142 #endif
 143 
 144 // handle_file handles data from the filename given; returns false only when
 145 // the file can't be opened
 146 bool handle_file(FILE* w, const char* fname, slice* line) {
 147     FILE* f = fopen(fname, "rb");
 148     if (f == NULL) {
 149         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", fname);
 150         return false;
 151     }
 152 
 153     const bool ok = handle_reader(w, f, line);
 154     fclose(f);
 155     return ok;
 156 }
 157 
 158 // run returns the number of errors
 159 int run(int argc, char** argv, FILE* w) {
 160     size_t dashes = 0;
 161     for (int i = 1; i < argc; i++) {
 162         if (argv[i][0] == '-' && argv[i][1] == 0) {
 163             dashes++;
 164         }
 165     }
 166 
 167     if (dashes > 1) {
 168         const char* m = "can't use the standard input (dash) more than once";
 169         fprintf(stderr, "\x1b[31m%s\x1b[0m\n", m);
 170         return 1;
 171     }
 172 
 173     slice line;
 174     line.len = 0;
 175     line.cap = 32 * 1024;
 176     line.ptr = malloc(line.cap);
 177 
 178     if (line.ptr == NULL) {
 179         fprintf(stderr, "\x1b[31m%s\x1b[0m\n", no_line_memory_msg);
 180         return 1;
 181     }
 182 
 183     size_t errors = 0;
 184     for (int i = 1; i < argc && !feof(stdout) && line.ptr != NULL; i++) {
 185         if (argv[i][0] == '-' && argv[i][1] == 0) {
 186             if (!handle_reader(w, stdin, &line)) {
 187                 errors++;
 188             }
 189             continue;
 190         }
 191 
 192         if (!handle_file(w, argv[i], &line)) {
 193             errors++;
 194         }
 195     }
 196 
 197     // use stdin when not given any filepaths
 198     if (argc <= 1) {
 199         if (!handle_reader(w, stdin, &line)) {
 200             errors++;
 201         }
 202     }
 203 
 204     free(line.ptr);
 205     return errors;
 206 }
 207 
 208 int main(int argc, char** argv) {
 209 #ifdef _WIN32
 210     setmode(fileno(stdin), O_BINARY);
 211     // ensure output lines end in LF instead of CRLF on windows
 212     setmode(fileno(stdout), O_BINARY);
 213     setmode(fileno(stderr), O_BINARY);
 214 #endif
 215 
 216     if (argc > 1) {
 217         if (
 218             strcmp(argv[1], "-h") == 0 ||
 219             strcmp(argv[1], "-help") == 0 ||
 220             strcmp(argv[1], "--h") == 0 ||
 221             strcmp(argv[1], "--help") == 0
 222         ) {
 223             fprintf(stdout, "%s", info);
 224             return 0;
 225         }
 226     }
 227 
 228     return run(argc, argv, stdout) == 0 ? 0 : 1;
 229 }