File: avoid.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 avoid [regular expressions...]
  27 
  28 Avoid/ignore lines which match any of the regexes given. When not given any
  29 regex, ignore all empty lines by default.
  30 */
  31 
  32 /*
  33 You can build this command-line app by running
  34 
  35 cc -Wall -s -O2 -march=native -mtune=native -flto -o ./avoid ./avoid.c
  36 */
  37 
  38 #include <regex.h>
  39 #include <stdbool.h>
  40 #include <stdint.h>
  41 #include <stdio.h>
  42 #include <stdlib.h>
  43 #include <string.h>
  44 #include <unistd.h>
  45 
  46 #ifdef _WIN32
  47 #include <fcntl.h>
  48 #include <windows.h>
  49 #endif
  50 
  51 #ifdef RED_ERRORS
  52 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  53 #ifdef __APPLE__
  54 #define ERROR_STYLE "\x1b[31m"
  55 #endif
  56 #define RESET_STYLE "\x1b[0m"
  57 #else
  58 #define ERROR_STYLE
  59 #define RESET_STYLE
  60 #endif
  61 
  62 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  63 
  64 #define NO_MATCHES 1
  65 #define BAD_ALLOC 2
  66 #define BAD_REGEX 3
  67 
  68 // flags is used to compile all regexes
  69 const int flags = REG_EXTENDED | REG_NEWLINE;
  70 
  71 // slice is a growable region of bytes in memory
  72 typedef struct slice {
  73     // ptr is the starting place of the region
  74     unsigned char* ptr;
  75 
  76     // cap is how many bytes the memory region has available
  77     size_t cap;
  78 } slice;
  79 
  80 // config just organizes most arguments given to the input-handling function
  81 typedef struct config {
  82     size_t nexprs;
  83     regex_t* exprs;
  84     slice* line;
  85     bool live_lines;
  86 } config;
  87 
  88 static inline bool match(const char* s, size_t nexprs, const regex_t* re) {
  89     regmatch_t match;
  90     for (size_t i = 0; i < nexprs; i++) {
  91         if (regexec(&re[i], s, sizeof(match), &match, 0) == 0) {
  92             return true;
  93         }
  94     }
  95     return false;
  96 }
  97 
  98 static inline bool emptyish(const unsigned char* s) {
  99     const unsigned char b = s[0];
 100     return b == 0 || b == '\n' || b == '\r';
 101 }
 102 
 103 size_t handle_reader(FILE* w, FILE* r, config cfg) {
 104     size_t matches = 0;
 105     slice* line = cfg.line;
 106 
 107     while (!feof(w)) {
 108         ssize_t len = getline((char**)&line->ptr, &line->cap, r);
 109         if (line->ptr == NULL) {
 110             fprintf(stderr, "\n");
 111             fprintf(stderr, ERROR_LINE("out of memory"));
 112             exit(BAD_ALLOC);
 113         }
 114 
 115         if (len < 0) {
 116             break;
 117         }
 118 
 119         // when not given regexes, just avoid all empty(ish) lines
 120         if (cfg.nexprs == 0 && emptyish(line->ptr)) {
 121             continue;
 122         }
 123 
 124         // avoid showing matching lines
 125         if (match((char*)line->ptr, cfg.nexprs, cfg.exprs)) {
 126             continue;
 127         }
 128         matches++;
 129 
 130         fwrite(line->ptr, 1, len, w);
 131         const bool has_lf = len >= 1 && line->ptr[len - 1] == '\n';
 132         if (!has_lf) {
 133             fputc('\n', w);
 134         }
 135         if (cfg.live_lines) {
 136             fflush(w);
 137         }
 138     }
 139 
 140     if (!cfg.live_lines && matches > 0) {
 141         fflush(w);
 142     }
 143     return matches;
 144 }
 145 
 146 // run returns the error code for the app
 147 int run(int argc, char** argv, bool live_lines) {
 148     slice line;
 149     line.cap = 32 * 1024;
 150     line.ptr = malloc(line.cap);
 151     if (line.ptr == NULL) {
 152         fprintf(stderr, ERROR_LINE("out of memory"));
 153         exit(BAD_ALLOC);
 154     }
 155 
 156     size_t nexprs = argc - 1;
 157     regex_t* exprs = NULL;
 158     if (nexprs > 0) {
 159         const size_t n = nexprs * sizeof(regex_t);
 160         exprs = malloc(n);
 161         if (exprs == NULL) {
 162             fprintf(stderr, ERROR_LINE("out of memory"));
 163             exit(BAD_ALLOC);
 164         }
 165         memset(exprs, 0, n);
 166     }
 167 
 168     // compile all arguments as regexes
 169     size_t errors = 0;
 170     for (size_t i = 1; i < argc; i++) {
 171         regex_t* expr = &exprs[i - 1];
 172         const int err = regcomp(expr, argv[i], flags);
 173         if (err == 0) {
 174             continue;
 175         }
 176 
 177         char msg[1024];
 178         regerror(err, expr, msg, sizeof(msg));
 179         fprintf(stderr, "%s\t%s\n", argv[i], msg);
 180         errors++;
 181     }
 182 
 183     if (errors > 0) {
 184         for (size_t i = 0; i < nexprs; i++) {
 185             regfree(&exprs[i]);
 186         }
 187         free(exprs);
 188         free(line.ptr);
 189         return BAD_REGEX;
 190     }
 191 
 192     // match lines from the standard input
 193     config cfg;
 194     cfg.line = &line;
 195     cfg.nexprs = nexprs;
 196     cfg.exprs = exprs;
 197     cfg.live_lines = live_lines;
 198     const size_t matches = handle_reader(stdout, stdin, cfg);
 199 
 200     for (size_t i = 0; i < nexprs; i++) {
 201         regfree(&exprs[i]);
 202     }
 203     free(exprs);
 204     free(line.ptr);
 205     return matches > 0 ? 0 : NO_MATCHES;
 206 }
 207 
 208 int main(int argc, char** argv) {
 209 #ifdef _WIN32
 210     setmode(fileno(stdin), O_BINARY);
 211     // ensure output lines end in LF instead of CRLF on windows
 212     setmode(fileno(stdout), O_BINARY);
 213     setmode(fileno(stderr), O_BINARY);
 214 #endif
 215 
 216     const bool live_lines = lseek(fileno(stdout), 0, SEEK_CUR) != 0;
 217     if (!live_lines) {
 218         setvbuf(stdout, NULL, _IOFBF, 0);
 219     }
 220     return run(argc, argv, live_lines);
 221 }