File: minibox.cpp
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2024 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 Minibox is a busybox/toybox-like app which has several small tools in it. You
  27 simply run it with a specific tool to run as the first argument, without any
  28 dashes in front of it. Running minibox without arguments will show a help
  29 message, which in turn shows all the tools/names you can use.
  30 
  31 You can build the minibox app by running the command below, which should take
  32 several seconds to complete:
  33 
  34 g++ -Wall -s -O2 -o minibox minibox.cpp
  35 */
  36 
  37 const auto default_tabstop = 4;
  38 const auto default_breathe_period = 5;
  39 
  40 const char* gray_style = "\x1b[38;5;248m";
  41 // const char* green_style = "\x1b[38;5;26m";
  42 const char* green_style = "\x1b[38;5;29m";
  43 const char* red_style = "\x1b[38;5;1m";
  44 
  45 // const char* error_style = "\x1b[41m\x1b[97m";
  46 const char* error_style = "\x1b[31m";
  47 
  48 #include <algorithm>
  49 #include <chrono>
  50 #include <cmath>
  51 #include <cstdint>
  52 #include <cstring>
  53 #include <iostream>
  54 #include <map>
  55 #include <regex>
  56 #include <set>
  57 #include <sstream>
  58 #include <string>
  59 #include <thread>
  60 
  61 using namespace std;
  62 
  63 map<string, string> name2help = {
  64     {"args", "args [args...]\n\nShow all arguments given, one per output line"},
  65     {
  66         "avoid",
  67         "avoid [regexes...]\n\n"
  68         "Ignore all lines case-sensitively matching any of the JS-style regular\n"
  69         "expressions given",
  70     },
  71     {"begin", "begin [args...]\n\nPrecede input lines with the args/lines given"},
  72     {"begintsv", "begintsv [items...]\n\nPrecede input lines with a single TSV line"},
  73     {"bh", "bh [every = 5]\n\nAdd an extra emtpy line after the 1st, then every few"},
  74     {"bl", "bl [every = 5]\n\nAdd an extra emtpy line every few"},
  75     {"blow", "blow [tabstop = 4]\n\nBlow/expand tabs into runs of spaces"},
  76     {"book", "book [page height]\n\nLay out lines into 2 columns, just like a book"},
  77     {"choplf", "choplf\n\nIgnore the last line-feed from the input, if present"},
  78     {"delay", "delay [seconds = 1.0]\n\nWait some time before emitting each line"},
  79     {
  80         "drop",
  81         "drop [what...]\n\n"
  82         "Ignore all occurrences of all the regular expressions given, in the\n"
  83         "order given",
  84     },
  85     {"first", "first [lines = 1]\n\nLimit output up to the first few input lines"},
  86     {"gbm", "gbm [good...] [bad...] [meh...]\n\nANSI-style lines by regex-matches"},
  87     {"help", "help\n\nShow a help message about this app"},
  88     {
  89         "iavoid",
  90         "iavoid [regexes...]\n\n"
  91         "Ignore all lines case-insensitively matching any of the JS-style regular\n"
  92         "expressions given",
  93     },
  94     {
  95         "idrop",
  96         "idrop [what...]\n\n"
  97         "Ignore all case-insensitively-matched occurrences of all the regular\n"
  98         "expressions given, in the order given",
  99     },
 100     {
 101         "imatch",
 102         "imatch [regexes...]\n\n"
 103         "Only keep lines case-insensitively matching any of the JS-style regular\n"
 104         "expressions given",
 105     },
 106     {"last", "last [lines = 1]\n\nLimit output up to the last few input lines"},
 107     {"leak", "leak\n\nHelp debug pipes, by copying input lines to stderr and stdout"},
 108     {
 109         "lines",
 110         "lines\n\n"
 111         "Ignore trailing carriage returns, making sure the last line ends\n"
 112         "with a line-feed, whether the input ended with one or not",
 113     },
 114     {
 115         "lineup",
 116         "lineup [max = 0]\n\n"
 117         "Join lines into TSV lines up to the number of items given; when\n"
 118         "not given a max value, or when given 0 or a negative value, join\n"
 119         "all input lines into a single TSV output line",
 120     },
 121     {"links", "links\n\nFind all hyperlinks-like substrings from the input"},
 122     {"lower", "lower\n\nASCII-lowercase all input lines"},
 123     {
 124         "match",
 125         "match [regexes...]\n\n"
 126         "Only keep lines case-sensitively matching any of the JS-style regular\n"
 127         "expressions given",
 128     },
 129     {"n", "n [start = 1]\n\nNumber all input lines"},
 130     {"nn", "nn\n\nNice Numbers makes long numbers easier to read via ANSI-styles"},
 131     {"nothing", "nothing\n\nRead/write nothing, effectively doing nothing"},
 132     {"plain", "plain\n\nIgnore all ANSI-style sequences"},
 133     {"primes", "primes [count]\n\nShow the first few prime numbers"},
 134     {"size", "size\n\nCount input bytes"},
 135     {"skip", "skip [lines = 1]\n\nIgnore the first few input lines"},
 136     {"skiplast", "skiplast [lines = 1]\n\nIgnore the last few input lines"},
 137     {"trim", "trim\n\nIgnore leading/trailing spaces on all lines"},
 138     {"trimend", "trimend\n\nIgnore trailing spaces on all lines"},
 139     {"unique", "unique\n\nAvoid emitting the same input line more than once"},
 140 };
 141 
 142 int make_regexes(int argc, char** argv, vector<regex>& res, bool case_sens) {
 143     size_t errors = 0;
 144     const auto dialect = regex_constants::ECMAScript;
 145     const auto options = dialect | (case_sens ? 0 : regex_constants::icase);
 146 
 147     for (int i = 1; i < argc; i++) {
 148         try {
 149             regex expr(argv[i], options);
 150             res.push_back(expr);
 151         } catch (regex_error& e) {
 152             const auto es = error_style;
 153             cerr << es << e.code() << ": " << e.what() << "\x1b[0m" << endl;
 154             errors++;
 155         }
 156     }
 157 
 158     return errors;
 159 }
 160 
 161 uint64_t count_runes(string& s) {
 162     uint64_t n = 0;
 163     for (auto c : s) {
 164         n += ((c & 0xc0) != 0x80);
 165     }
 166     return n;
 167 }
 168 
 169 void de_bom(string &s) {
 170     // s.starts_with("\xef\xbb\xbf")
 171     if (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') {
 172         s.erase(0, 3);
 173     }
 174 }
 175 
 176 void no_cr(string &s) {
 177     s.erase(remove(s.begin(), s.end(), '\r'), s.end());
 178 }
 179 
 180 istream& get_unix_line(istream& src, string& dest) {
 181     getline(src, dest);
 182     no_cr(dest);
 183     return src;
 184 }
 185 
 186 void left_trim(string &s) {
 187     auto trimmed = find_if(s.begin(), s.end(), [](char c) {
 188         return !isspace<char>(c, locale::classic());
 189     });
 190     s.erase(s.begin(), trimmed);
 191 }
 192 
 193 bool matches_any(string& s, vector<regex>& expressions) {
 194     for (regex e : expressions) {
 195         if (regex_search(s, e)) {
 196             return true;
 197         }
 198     }
 199     return false;
 200 }
 201 
 202 void remove_all(string& dest, char* ignore) {
 203     auto len = strlen(ignore);
 204     auto pos = string::npos;
 205     while (1) {
 206         pos = dest.find(ignore);
 207         if (pos == string::npos) {
 208             return;
 209         }
 210         dest.erase(pos, len);
 211     }
 212 }
 213 
 214 void right_trim(string &s) {
 215     auto trimmed = find_if(s.rbegin(), s.rend(), [](char c) {
 216         return !isspace<char>(c, locale::classic());
 217     });
 218     s.erase(trimmed.base(), s.end());
 219 }
 220 
 221 void show_error(string msg) {
 222     cerr << error_style << msg << "\x1b[0m" << endl;
 223 }
 224 
 225 inline int no_args_expected() {
 226     show_error("no args expected");
 227     return 1;
 228 }
 229 
 230 /*
 231 The next few funcs allow changing the auto-flushing/line-buffering stdout
 232 behavior across tools.
 233 */
 234 
 235 inline void output_line(const char* s) {
 236     cout << s << endl;
 237 }
 238 
 239 inline void output_line(uint64_t n) {
 240     cout << n << endl;
 241 }
 242 
 243 inline void output_line(const string& s) {
 244     cout << s << endl;
 245 }
 246 
 247 inline void output_styled_line(const string& style, string &line) {
 248     cout << style << line << "\x1b[0m" << endl;
 249 }
 250 
 251 inline void end_output_line() {
 252     cout << endl;
 253 }
 254 
 255 // args emits each string given to it on its own output line, ignoring any
 256 // input
 257 int args(int argc, char** argv) {
 258     for (int i = 1; i < argc; i++) {
 259         output_line(argv[i]);
 260     }
 261     return 0;
 262 }
 263 
 264 int avoid(int argc, char** argv) {
 265     vector<regex> expressions;
 266     size_t errors = make_regexes(argc, argv, expressions, true);
 267     if (errors > 0) {
 268         return 1;
 269     }
 270 
 271     string line;
 272     while (getline(cin, line)) {
 273         if (!matches_any(line, expressions)) {
 274             output_line(line);
 275             if (cout.eof()) {
 276                 return 0;
 277             }
 278         }
 279     }
 280     return 0;
 281 }
 282 
 283 // begin emits the few strings given as their own lines, before emitting back
 284 // all input lines
 285 int begin(int argc, char** argv) {
 286     for (int i = 1; i < argc; i++) {
 287         output_line(argv[i]);
 288     }
 289 
 290     string line;
 291     while (getline(cin, line)) {
 292         output_line(line);
 293         if (cout.eof()) {
 294             return 0;
 295         }
 296     }
 297     return 0;
 298 }
 299 
 300 // begin_tsv emits a line of tab-separated values (TSV), before emitting back
 301 // all input lines
 302 int begin_tsv(int argc, char** argv) {
 303     for (int i = 1; i < argc; i++) {
 304         if (i > 1) {
 305             cout << '\t';
 306         }
 307         cout << argv[i];
 308     }
 309     if (argc > 1) {
 310         end_output_line();
 311     }
 312 
 313     string line;
 314     while (getline(cin, line)) {
 315         output_line(line);
 316         if (cout.eof()) {
 317             return 0;
 318         }
 319     }
 320     return 0;
 321 }
 322 
 323 // bh (breathe header) adds an extra empty line after the first/header line,
 324 // and then adds a single extra empty line every few
 325 int bh(int argc, char** argv) {
 326     string line;
 327     int64_t every = default_breathe_period;
 328 
 329     if (argc > 1) {
 330         try {
 331             every = stol(argv[1]);
 332         } catch (...) { }
 333     }
 334 
 335     if (!getline(cin, line)) {
 336         return 0;
 337     }
 338     output_line(line);
 339 
 340     auto i = 0;
 341     while (getline(cin, line)) {
 342         if (every > 0 && i % every == 0) {
 343             end_output_line();
 344         }
 345 
 346         output_line(line);
 347         if (cout.eof()) {
 348             return 0;
 349         }
 350 
 351         i++;
 352     }
 353 
 354     return 0;
 355 }
 356 
 357 
 358 // bl (breathe lines) adds single extra empty lines every few
 359 int bl(int argc, char** argv) {
 360     string line;
 361     int64_t every = default_breathe_period;
 362 
 363     if (argc > 1) {
 364         try {
 365             every = stol(argv[1]);
 366         } catch (...) { }
 367     }
 368 
 369     auto i = 0;
 370     while (getline(cin, line)) {
 371         if (every > 0 && i % every == 0 && i > 0) {
 372             end_output_line();
 373         }
 374 
 375         output_line(line);
 376         if (cout.eof()) {
 377             return 0;
 378         }
 379 
 380         i++;
 381     }
 382 
 383     return 0;
 384 }
 385 
 386 // blow blows/expands tabs into runs of spaces
 387 int blow(int argc, char** argv) {
 388     string line;
 389     int64_t tabstop = default_tabstop;
 390 
 391     if (argc > 1) {
 392         try {
 393             tabstop = stol(argv[1]);
 394         } catch (...) { }
 395     }
 396 
 397     if (tabstop < 1) {
 398         while (getline(cin, line)) {
 399             output_line(line);
 400             if (cout.eof()) {
 401                 return 0;
 402             }
 403         }
 404         return 0;
 405     }
 406 
 407     while (getline(cin, line)) {
 408         int64_t i = 0;
 409 
 410         for (auto c : line) {
 411             if (c != '\t') {
 412                 i += ((c & 0xc0) != 0x80);
 413                 cout << c;
 414                 continue;
 415             }
 416 
 417             auto spaces = tabstop - i % tabstop;
 418             i += spaces;
 419             for (auto j = 0; j < spaces; j++) {
 420                 cout << ' ';
 421             }
 422         }
 423 
 424         end_output_line();
 425     }
 426 
 427     return 0;
 428 }
 429 
 430 // book lays out input lines on 2 columns, the same way books do it
 431 int book(int argc, char** argv) {
 432     if (argc != 2) {
 433         show_error("expected 1 argument");
 434         return 1;
 435     }
 436 
 437     uint64_t page_height = 0;
 438     try {
 439         page_height = stol(argv[1]);
 440     } catch (...) {
 441         show_error("invalid page-height");
 442         return 1;
 443     }
 444 
 445     if (page_height < 2) {
 446         show_error("page height can't be less than 2");
 447         return 1;
 448     }
 449 
 450     string line;
 451     vector<string> lines;
 452     while (getline(cin, line)) {
 453         lines.push_back(line);
 454     }
 455 
 456     auto nlines = lines.size();
 457     auto inner = page_height - 1;
 458 
 459     auto side = 0;
 460     uint64_t widths[2] = {0, 0};
 461 
 462     for (uint64_t i = 0; i < nlines; i += inner) {
 463         for (uint64_t j = i; j < nlines && j < i + inner; j++) {
 464             auto w = count_runes(lines[j]);
 465             if (widths[side] < w) {
 466                 widths[side] = w;
 467             }
 468         }
 469 
 470         side = 1 - side;
 471     }
 472 
 473     for (uint64_t i = 0; i < nlines; i += 2*inner) {
 474         if (i > 0) {
 475             auto rest = (widths[1] > 0) ? (3 + widths[1]) : 2;
 476             auto maxw = widths[0] + rest;
 477             for (uint64_t j = 0; j < maxw; j++) {
 478                 // cout << '-';
 479                 cout << "·";
 480             }
 481             end_output_line();
 482         }
 483 
 484         for (auto j = i; j < nlines && j < i + inner; j++) {
 485             auto l = lines[j];
 486             auto w = count_runes(l);
 487             auto r = (j + inner < nlines) ? lines[j + inner] : string();
 488 
 489             cout << l;
 490             if (widths[0] > w) {
 491                 for (uint64_t j = 0; j < widths[0] - w; j++) {
 492                     cout << ' ';
 493                 }
 494             }
 495 
 496             w = r.size();
 497             if (w == 0) {
 498                 // cout << " |";
 499                 cout << "";
 500             } else {
 501                 // cout << " | ";
 502                 cout << "";
 503             }
 504 
 505             output_line(r);
 506             if (cout.eof()) {
 507                 return 0;
 508             }
 509         }
 510     }
 511 
 512     return 0;
 513 }
 514 
 515 // chop_lf ignore the last line-feed from the input, if present
 516 int chop_lf(int argc, char**) {
 517     if (argc > 1) {
 518         return no_args_expected();
 519     }
 520 
 521     auto i = 0;
 522     string line;
 523     while (getline(cin, line)) {
 524         if (i > 0) {
 525             end_output_line();
 526         }
 527         cout << line;
 528         i++;
 529     }
 530 
 531     return 0;
 532 }
 533 
 534 // delay waits the given number of seconds before emitting back each line
 535 // from the input
 536 int delay(int argc, char** argv) {
 537     double seconds = 1.0;
 538 
 539     if (argc > 1) {
 540         try {
 541             seconds = stof(argv[1]);
 542         } catch (...) { }
 543     }
 544 
 545     if (isnan(seconds) || isinf(seconds)) {
 546         show_error("invalid number");
 547         return 1;
 548     }
 549 
 550     if (seconds < 0) {
 551         seconds = 0;
 552     }
 553 
 554     string line;
 555     auto ms = (int)(1000 * seconds);
 556 
 557     while (getline(cin, line)) {
 558         this_thread::sleep_for(chrono::milliseconds(ms));
 559 
 560         output_line(line);
 561         if (cout.eof()) {
 562             return 0;
 563         }
 564     }
 565 
 566     return 0;
 567 }
 568 
 569 // drop ignores all occurrences of all the regular expressions given, in the
 570 // order given; regex matches are case-sensitive
 571 int drop(int argc, char** argv) {
 572     vector<regex> expressions;
 573     size_t errors = make_regexes(argc, argv, expressions, true);
 574     if (errors > 0) {
 575         return 1;
 576     }
 577 
 578     string line;
 579     while (getline(cin, line)) {
 580         for (regex e : expressions) {
 581             if (!regex_search(line, e)) {
 582                 continue;
 583             }
 584 
 585             string s = regex_replace(line, e, "");
 586             line.clear();
 587             line.append(s);
 588         }
 589 
 590         output_line(line);
 591         if (cout.eof()) {
 592             return 0;
 593         }
 594     }
 595 
 596     return 0;
 597 }
 598 
 599 // first limits input up to its first few lines
 600 int first(int argc, char** argv) {
 601     string line;
 602     int64_t max = 1;
 603 
 604     if (argc > 1) {
 605         try {
 606             max = stol(argv[1]);
 607         } catch (...) { }
 608     }
 609 
 610     if (max < 1) {
 611         return 0;
 612     }
 613 
 614     auto i = 0;
 615     while (getline(cin, line)) {
 616         output_line(line);
 617         if (cout.eof()) {
 618             return 0;
 619         }
 620         i++;
 621 
 622         if (i >= max) {
 623             return 0;
 624         }
 625     }
 626 
 627     return 0;
 628 }
 629 
 630 // gbm (good, bad, meh) ANSI-styles/colors lines by regex-matching
 631 int gbm(int argc, char** argv) {
 632     string line;
 633     regex good, bad, meh;
 634 
 635     const string good_style = green_style;
 636     const string bad_style = red_style;
 637     const string meh_style = gray_style;
 638 
 639     switch (argc) {
 640     case 2:
 641         good = regex(argv[1]);
 642 
 643         while (getline(cin, line)) {
 644             if (cout.eof()) {
 645                 return 0;
 646             }
 647 
 648             if (regex_search(line, good)) {
 649                 output_styled_line(good_style, line);
 650                 continue;
 651             }
 652 
 653             output_line(line);
 654         }
 655 
 656         return 0;
 657 
 658     case 3:
 659         good = regex(argv[1]);
 660         bad = regex(argv[2]);
 661 
 662         while (getline(cin, line)) {
 663             if (cout.eof()) {
 664                 return 0;
 665             }
 666 
 667             if (regex_search(line, good)) {
 668                 output_styled_line(good_style, line);
 669                 continue;
 670             }
 671             if (regex_search(line, bad)) {
 672                 output_styled_line(bad_style, line);
 673                 continue;
 674             }
 675 
 676             output_line(line);
 677         }
 678 
 679         return 0;
 680 
 681     case 4:
 682         good = regex(argv[1]);
 683         bad = regex(argv[2]);
 684         meh = regex(argv[3]);
 685 
 686         while (getline(cin, line)) {
 687             if (cout.eof()) {
 688                 return 0;
 689             }
 690 
 691             if (regex_search(line, good)) {
 692                 output_styled_line(good_style, line);
 693                 continue;
 694             }
 695             if (regex_search(line, bad)) {
 696                 output_styled_line(bad_style, line);
 697                 continue;
 698             }
 699             if (regex_search(line, meh)) {
 700                 output_styled_line(meh_style, line);
 701                 continue;
 702             }
 703 
 704             output_line(line);
 705         }
 706 
 707         return 0;
 708 
 709     default:
 710         show_error("only up to 3 args are supported");
 711         return 1;
 712     }
 713 }
 714 
 715 extern map<string, string> tool_aliases;
 716 extern map<string, int(*)(int, char**)> name2tool;
 717 
 718 // help_general handles the no-arguments case for func help
 719 int help_general(int, char**) {
 720     output_line("minibox [tool] [args...]");
 721     end_output_line();
 722     output_line("This is a busybox/toybox-like app which has several small");
 723     output_line("tools in it.");
 724 
 725     vector<string> keys;
 726     for (auto kv : tool_aliases) {
 727         keys.push_back(kv.first);
 728     }
 729     sort(keys.begin(), keys.end());
 730 
 731     end_output_line();
 732     end_output_line();
 733     output_line("Aliases");
 734     end_output_line();
 735 
 736     for (auto k : keys) {
 737         cout << k << '\t' << tool_aliases[k] << endl;
 738     }
 739 
 740     keys.clear();
 741     for (auto kv : name2tool) {
 742         keys.push_back(kv.first);
 743     }
 744     sort(keys.begin(), keys.end());
 745 
 746     end_output_line();
 747     end_output_line();
 748     output_line("Tools");
 749 
 750     for (auto k : keys) {
 751         end_output_line();
 752         end_output_line();
 753         output_line(name2help[k]);
 754         if (cout.eof()) {
 755             return 0;
 756         }
 757     }
 758 
 759     return 0;
 760 }
 761 
 762 int help(int argc, char** argv) {
 763     if (argc <= 1) {
 764         return help_general(argc, argv);
 765     }
 766 
 767     auto nerr = 0;
 768 
 769     for (auto i = 1; i < argc; i++) {
 770         auto name = argv[i];
 771         if (name2help.find(name) == name2help.end()) {
 772             stringstream msg;
 773             msg << name << ": no help message found";
 774             show_error(msg.str());
 775             continue;
 776         }
 777 
 778         if (i > 1) {
 779             end_output_line();
 780             end_output_line();
 781         }
 782         output_line(name2help[name]);
 783     }
 784 
 785     if (nerr > 0) {
 786         return 1;
 787     }
 788     return 0;
 789 }
 790 
 791 int iavoid(int argc, char** argv) {
 792     vector<regex> expressions;
 793     size_t errors = make_regexes(argc, argv, expressions, false);
 794     if (errors > 0) {
 795         return 1;
 796     }
 797 
 798     string line;
 799     while (getline(cin, line)) {
 800         if (!matches_any(line, expressions)) {
 801             output_line(line);
 802             if (cout.eof()) {
 803                 return 0;
 804             }
 805         }
 806     }
 807     return 0;
 808 }
 809 
 810 // idrop ignores all case-insensitively-matched occurrences of all the regular
 811 // expressions given, in the order given
 812 int idrop(int argc, char** argv) {
 813     vector<regex> expressions;
 814     size_t errors = make_regexes(argc, argv, expressions, false);
 815     if (errors > 0) {
 816         return 1;
 817     }
 818 
 819     string line;
 820     while (getline(cin, line)) {
 821         for (regex e : expressions) {
 822             if (!regex_search(line, e)) {
 823                 continue;
 824             }
 825 
 826             string s = regex_replace(line, e, "");
 827             line.clear();
 828             line.append(s);
 829         }
 830 
 831         output_line(line);
 832         if (cout.eof()) {
 833             return 0;
 834         }
 835     }
 836 
 837     return 0;
 838 }
 839 
 840 int imatch(int argc, char** argv) {
 841     if (argc == 1) {
 842         return 0;
 843     }
 844 
 845     vector<regex> expressions;
 846     size_t errors = make_regexes(argc, argv, expressions, false);
 847     if (errors > 0) {
 848         return 1;
 849     }
 850 
 851     string line;
 852     while (getline(cin, line)) {
 853         if (matches_any(line, expressions)) {
 854             output_line(line);
 855             if (cout.eof()) {
 856                 return 0;
 857             }
 858         }
 859     }
 860     return 0;
 861 }
 862 
 863 // last limits input up to its last few lines
 864 int last(int argc, char** argv) {
 865     string line;
 866     vector<string> latest;
 867     int64_t max = 1;
 868 
 869     if (argc > 1) {
 870         try {
 871             max = stol(argv[1]);
 872         } catch (...) { }
 873     }
 874 
 875     if (max < 1) {
 876         return 0;
 877     }
 878 
 879     size_t i = 0;
 880     while (getline(cin, line)) {
 881         if ((int64_t)latest.size() < max) {
 882             latest.push_back(line);
 883         } else {
 884             latest[i] = line;
 885         }
 886         i = (i + 1) % max;
 887     }
 888 
 889     for (size_t j = i; j < latest.size(); j++) {
 890         output_line(latest[j]);
 891         if (cout.eof()) {
 892             return 0;
 893         }
 894     }
 895     for (size_t j = 0; j < i; j++) {
 896         output_line(latest[j]);
 897         if (cout.eof()) {
 898             return 0;
 899         }
 900     }
 901     return 0;
 902 }
 903 
 904 // leak helps debug pipes, by copying all input lines both to stderr and to
 905 // stdout
 906 int leak(int argc, char**) {
 907     if (argc > 1) {
 908         return no_args_expected();
 909     }
 910 
 911     string line;
 912     while (getline(cin, line)) {
 913         cerr << line << endl;
 914         output_line(line);
 915         if (cout.eof()) {
 916             return 0;
 917         }
 918     }
 919     return 0;
 920 }
 921 
 922 // lines ignores trailing carriage-returns from input lines, and ensures the
 923 // last line ends with a line-feed, whether the input had that or not
 924 int lines(int argc, char**) {
 925     if (argc > 1) {
 926         return no_args_expected();
 927     }
 928 
 929     string line;
 930     if (!get_unix_line(cin, line)) {
 931         return 0;
 932     }
 933     de_bom(line);
 934     output_line(line);
 935 
 936     while (get_unix_line(cin, line)) {
 937         output_line(line);
 938         if (cout.eof()) {
 939             return 0;
 940         }
 941     }
 942     return 0;
 943 }
 944 
 945 // line_up joins input lines via tabs, up to the number given: whenever that
 946 // number is exceeded, a new output line starts; when not given a number, or
 947 // when that number is 0 or negative, all input lines are tab-joined into a
 948 // single output line
 949 int line_up(int argc, char** argv) {
 950     string line;
 951     int64_t max = 0;
 952 
 953     if (argc > 1) {
 954         try {
 955             max = stol(argv[1]);
 956         } catch (...) { }
 957     }
 958 
 959     auto i = 0;
 960     while (getline(cin, line)) {
 961         if (i >= max && max > 0) {
 962             i = 0;
 963             end_output_line();
 964         } else if (i > 0) {
 965             cout << '\t';
 966             if (cout.eof()) {
 967                 return 0;
 968             }
 969         }
 970 
 971         cout << line;
 972         i++;
 973     }
 974 
 975     if (i > 0) {
 976         end_output_line();
 977     }
 978     return 0;
 979 }
 980 
 981 // links gets all hyperlink-type substrings from the input, each match shown
 982 // on its own output line
 983 int links(int argc, char**) {
 984     if (argc > 1) {
 985         return no_args_expected();
 986     }
 987 
 988     string line;
 989     auto pat = regex("https?://[A-Za-z0-9+_.:%-]+(/[A-Za-z0-9+_.%/,#?&=-]*)*");
 990 
 991     while (getline(cin, line)) {
 992         auto iter = sregex_iterator(line.begin(), line.end(), pat);
 993         for (; iter != sregex_iterator(); iter++) {
 994             output_line(iter->str());
 995             if (cout.eof()) {
 996                 return 0;
 997             }
 998         }
 999     }
1000 
1001     return 0;
1002 }
1003 
1004 // lower ASCII-lowercases all symbols in all lines
1005 int lower(int argc, char**) {
1006     if (argc > 1) {
1007         return no_args_expected();
1008     }
1009 
1010     string line;
1011     while (getline(cin, line)) {
1012         transform(line.begin(), line.end(), line.begin(), [](char c) {
1013             return tolower(c);
1014         });
1015 
1016         output_line(line);
1017         if (cout.eof()) {
1018             return 0;
1019         }
1020     }
1021 
1022     return 0;
1023 }
1024 
1025 int match(int argc, char** argv) {
1026     if (argc == 1) {
1027         return 0;
1028     }
1029 
1030     vector<regex> expressions;
1031     size_t errors = make_regexes(argc, argv, expressions, true);
1032     if (errors > 0) {
1033         return 1;
1034     }
1035 
1036     string line;
1037     while (getline(cin, line)) {
1038         if (matches_any(line, expressions)) {
1039             output_line(line);
1040             if (cout.eof()) {
1041                 return 0;
1042             }
1043         }
1044     }
1045     return 0;
1046 }
1047 
1048 // n numbers lines using the optional starting counter given, which is 1 by
1049 // default; each output line starts with the current counter, followed by a
1050 // tab, ending with the original input line
1051 int n(int argc, char** argv) {
1052     string line;
1053     int64_t n = 1;
1054 
1055     if (argc > 1) {
1056         try {
1057             n = stol(argv[1]);
1058         } catch (...) { }
1059     }
1060 
1061     while (getline(cin, line)) {
1062         cout << n << '\t' << line << endl;
1063         n++;
1064     }
1065 
1066     return 0;
1067 }
1068 
1069 // style_digits helps func nn do its job
1070 void style_digits(string digits) {
1071     auto l = digits.length();
1072     if (l < 4) {
1073         cout << digits;
1074         return;
1075     }
1076 
1077     auto alt = false;
1078     auto n = 3 - (l % 3);
1079 
1080     for (auto c : digits) {
1081         cout << c;
1082         n++;
1083         if (n % 3 == 0) {
1084             alt = !alt;
1085             cout << (alt ? gray_style : "\x1b[0m");
1086         }
1087     }
1088 
1089     if (alt) {
1090         cout << "\x1b[0m";
1091     }
1092 }
1093 
1094 // nn stands for `nice numbers`, alternating ANSI-styles for long-enough runs
1095 // of digits, the result being easier to read/parse visually, especially on
1096 // output full of such long numbers, such as with tables
1097 int nn(int argc, char**) {
1098     if (argc > 1) {
1099         return no_args_expected();
1100     }
1101 
1102     string line;
1103     stringstream digits;
1104     auto in_digits = false;
1105 
1106     while (getline(cin, line)) {
1107         for (auto c : line) {
1108             auto is_digit = '0' <= c && c <= '9';
1109 
1110             if (in_digits) {
1111                 if (!is_digit) {
1112                     in_digits = false;
1113                     style_digits(digits.str());
1114                     digits.str(string());
1115                     cout << c;
1116                 } else{
1117                     digits << c;
1118                 }
1119                 continue;
1120             }
1121 
1122             if (is_digit) {
1123                 in_digits = true;
1124                 digits << c;
1125                 continue;
1126             }
1127 
1128             cout << c;
1129         }
1130 
1131         if (in_digits) {
1132             in_digits = false;
1133             style_digits(digits.str());
1134             digits.str(string());
1135         }
1136 
1137         end_output_line();
1138         if (cout.eof()) {
1139             return 0;
1140         }
1141     }
1142 
1143     return 0;
1144 }
1145 
1146 // nothing reads nothing and writes, effectively doing nothing
1147 int nothing(int argc, char**) {
1148     if (argc > 1) {
1149         return no_args_expected();
1150     }
1151     return 0;
1152 }
1153 
1154 // plain ignores all ANSI-style sequences, leaving actual `plain` plain-text
1155 int plain(int argc, char**) {
1156     if (argc > 1) {
1157         return no_args_expected();
1158     }
1159 
1160     string line;
1161 
1162     while (getline(cin, line)) {
1163         int state = 0;
1164         for (auto c : line) {
1165             switch (state) {
1166                 case 0:
1167                     if (c == '\x1b') {
1168                         state = 1;
1169                     } else {
1170                         cout << c;
1171                     }
1172                     break;
1173 
1174                 case 1:
1175                     if (c == '[') {
1176                         state = 2;
1177                     } else {
1178                         cout << '\x1b';
1179                         cout << c;
1180                         state = 1;
1181                     }
1182                     break;
1183 
1184                 case 2:
1185                     if ('A' <= c && c <= 'Z') {
1186                         state = 0;
1187                     } else if ('a' <= c && c <= 'z') {
1188                         state = 0;
1189                     }
1190                     break;
1191             }
1192         }
1193 
1194         end_output_line();
1195         if (cout.eof()) {
1196             return 0;
1197         }
1198     }
1199 
1200     return 0;
1201 }
1202 
1203 // primes shows the first few prime numbers, one per output line
1204 int primes(int argc, char** argv) {
1205     uint64_t count = 1000000;
1206 
1207     if (argc > 1) {
1208         try {
1209             count = stol(argv[1]);
1210         } catch (...) { }
1211     }
1212 
1213     for (uint64_t n = 3; count > 0; n += 2) {
1214         uint64_t max = sqrt((double)n);
1215         for (uint64_t div = 3; div <= max; div += 2) {
1216             if (n % div == 0) {
1217                 goto skip;
1218             }
1219         }
1220 
1221         output_line(n);
1222         if (cout.eof()) {
1223             return 0;
1224         }
1225         count--;
1226 
1227         skip:;
1228     }
1229 
1230     return 0;
1231 }
1232 
1233 // skip ignores up to the given number of input lines, then emitting the rest
1234 int skip(int argc, char** argv) {
1235     string line;
1236     int64_t skip = 1;
1237 
1238     if (argc > 1) {
1239         try {
1240             skip = stol(argv[1]);
1241         } catch (...) { }
1242     }
1243 
1244     auto i = 0;
1245     while (getline(cin, line)) {
1246         if (i < skip) {
1247             i++;
1248             continue;
1249         }
1250 
1251         output_line(line);
1252         if (cout.eof()) {
1253             return 0;
1254         }
1255         i++;
1256     }
1257 
1258     return 0;
1259 }
1260 
1261 // skip_last emits all but the last few input lines
1262 int skip_last(int argc, char** argv) {
1263     string line;
1264     vector<string> latest;
1265     int64_t max = 1;
1266 
1267     if (argc > 1) {
1268         try {
1269             max = stol(argv[1]);
1270         } catch (...) { }
1271     }
1272 
1273     if (max < 1) {
1274         while (getline(cin, line)) {
1275             output_line(line);
1276             if (cout.eof()) {
1277                 return 0;
1278             }
1279         }
1280         return 0;
1281     }
1282 
1283     size_t i = 0;
1284     while (getline(cin, line)) {
1285         if ((int64_t)latest.size() < max) {
1286             latest.push_back(line);
1287         } else {
1288             output_line(latest[i]);
1289             if (cout.eof()) {
1290                 return 0;
1291             }
1292             latest[i] = line;
1293         }
1294 
1295         i = (i + 1) % max;
1296     }
1297 
1298     return 0;
1299 }
1300 
1301 int size(int argc, char**) {
1302     if (argc > 1) {
1303         return no_args_expected();
1304     }
1305 
1306     uint64_t n = 0;
1307     freopen(0, "rb", stdin);
1308     while (getchar_unlocked() != EOF) {
1309         n++;
1310     }
1311     output_line(n);
1312     return 0;
1313 }
1314 
1315 // teletype simulates the cadence of old teletype machines
1316 int teletype(int argc, char**) {
1317     if (argc > 1) {
1318         return no_args_expected();
1319     }
1320 
1321     string line;
1322     while (getline(cin, line)) {
1323         auto l = line.size();
1324         if (l == 0) {
1325             end_output_line();
1326             continue;
1327         }
1328 
1329         end_output_line();
1330     }
1331 
1332     return 0;
1333 }
1334 
1335 // trim ignores leading/trailing spaces in all lines
1336 int trim(int argc, char**) {
1337     if (argc > 1) {
1338         return no_args_expected();
1339     }
1340 
1341     string line;
1342     while (getline(cin, line)) {
1343         // no_cr(line);
1344         left_trim(line);
1345         right_trim(line);
1346 
1347         output_line(line);
1348         if (cout.eof()) {
1349             return 0;
1350         }
1351     }
1352 
1353     return 0;
1354 }
1355 
1356 // trim_end ignores trailing spaces in all lines
1357 int trim_end(int argc, char**) {
1358     if (argc > 1) {
1359         return no_args_expected();
1360     }
1361 
1362     string line;
1363     while (getline(cin, line)) {
1364         // no_cr(line);
1365         right_trim(line);
1366 
1367         output_line(line);
1368         if (cout.eof()) {
1369             return 0;
1370         }
1371     }
1372 
1373     return 0;
1374 }
1375 
1376 // unique avoids emitting the same input line more than once
1377 int unique(int argc, char**) {
1378     if (argc > 1) {
1379         return no_args_expected();
1380     }
1381 
1382     string line;
1383     set<string> seen;
1384 
1385     while (getline(cin, line)) {
1386         if (seen.find(line) != seen.end()) {
1387             continue;
1388         }
1389 
1390         output_line(line);
1391         if (cout.eof()) {
1392             return 0;
1393         }
1394         seen.insert(line);
1395     }
1396 
1397     return 0;
1398 }
1399 
1400 map<string, string> tool_aliases = {
1401     {"arguments", "args"},
1402     {"blowtabs", "blow"},
1403     {"breathe", "bl"},
1404     {"breatheheader", "bh"},
1405     {"breathelines", "bl"},
1406     {"butlast", "skiplast"},
1407     {"catl", "lines"},
1408     {"countbytes", "size"},
1409     {"dedup", "unique"},
1410     {"dropall", "drop"},
1411     {"erase", "drop"},
1412     {"eraseall", "drop"},
1413     {"expand", "blow"},
1414     {"expandtabs", "blow"},
1415     {"final", "last"},
1416     {"idropall", "idrop"},
1417     {"ierase", "idrop"},
1418     {"ieraseall", "idrop"},
1419     {"nil", "nothing"},
1420     {"null", "nothing"},
1421     {"pretsv", "begintsv"},
1422     {"rstrip", "trimend"},
1423     {"rtrim", "trimend"},
1424     {"skipfinal", "skiplast"},
1425     {"strip", "trim"},
1426     {"stripend", "trimend"},
1427     {"stripspace", "trim"},
1428     {"stripspaces", "trim"},
1429     {"striptrail", "trimend"},
1430     {"striptrails", "trimend"},
1431     {"trimspace", "trim"},
1432     {"trimspaces", "trim"},
1433     {"trimtrail", "trimend"},
1434     {"trimtrails", "trimend"},
1435     {"unixify", "lines"},
1436 };
1437 
1438 map<string, int(*)(int, char**)> name2tool = {
1439     {"args", args},
1440     {"avoid", avoid},
1441     {"begin", begin},
1442     {"begintsv", begin_tsv},
1443     {"bh", bh},
1444     {"bl", bl},
1445     {"blow", blow},
1446     {"book", book},
1447     {"choplf", chop_lf},
1448     {"delay", delay},
1449     {"drop", drop},
1450     {"first", first},
1451     {"gbm", gbm},
1452     {"help", help},
1453     {"iavoid", iavoid},
1454     {"idrop", idrop},
1455     {"imatch", imatch},
1456     {"last", last},
1457     {"leak", leak},
1458     {"lines", lines},
1459     {"lineup", line_up},
1460     {"links", links},
1461     {"lower", lower},
1462     {"match", match},
1463     {"n", n},
1464     {"nn", nn},
1465     {"nothing", nothing},
1466     {"plain", plain},
1467     {"primes", primes},
1468     {"skip", skip},
1469     {"skiplast", skip_last},
1470     {"size", size},
1471     {"trim", trim},
1472     {"trimend", trim_end},
1473     {"unique", unique},
1474 };
1475 
1476 int main(int argc, char** argv) {
1477     cin.tie(NULL);
1478     ios_base::sync_with_stdio(false);
1479 
1480     argc--;
1481     argv++;
1482 
1483     if (argc < 1) {
1484         help(argc, argv);
1485         return 0;
1486     }
1487 
1488     string key;
1489     string name = argv[0];
1490     key.append(name);
1491     key.erase(remove(key.begin(), key.end(), '-'), key.end());
1492     key.erase(remove(key.begin(), key.end(), '_'), key.end());
1493 
1494     if (tool_aliases.find(name) != tool_aliases.end()) {
1495         key = tool_aliases[name];
1496     }
1497     if (name2tool.find(key) == name2tool.end()) {
1498         stringstream msg;
1499         msg << name << ": no such tool";
1500         show_error(msg.str());
1501         return 1;
1502     }
1503 
1504     try {
1505         auto tool = name2tool[key];
1506         return tool(argc, argv);
1507     } catch (...) {
1508         show_error("generic error");
1509         return 1;
1510     }
1511 }