File: minibox.cpp
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2024 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 Minibox is a busybox/toybox-like app which has several small tools in it. You
  27 simply run it with a specific tool to run as the first argument, without any
  28 dashes in front of it. Running minibox without arguments will show a help
  29 message, which in turn shows all the tools/names you can use.
  30 
  31 You can build the minibox app by running the command below, which should take
  32 several seconds to complete:
  33 
  34 g++ -Wall -s -O2 -o minibox minibox.cpp
  35 */
  36 
  37 const auto default_tabstop = 4;
  38 const auto default_breathe_period = 5;
  39 
  40 const char* gray_style = "\x1b[38;5;248m";
  41 // const char* green_style = "\x1b[38;5;26m";
  42 const char* green_style = "\x1b[38;5;29m";
  43 const char* red_style = "\x1b[38;5;1m";
  44 
  45 // const char* error_style = "\x1b[41m\x1b[97m";
  46 const char* error_style = "\x1b[31m";
  47 
  48 #include <algorithm>
  49 #include <chrono>
  50 #include <cmath>
  51 #include <cstdint>
  52 #include <cstring>
  53 #include <iostream>
  54 #include <map>
  55 #include <regex>
  56 #include <set>
  57 #include <sstream>
  58 #include <string>
  59 #include <thread>
  60 
  61 using namespace std;
  62 
  63 map<string, string> name2help = {
  64     {"args", "args [args...]\n\nShow all arguments given, one per output line"},
  65     {
  66         "avoid",
  67         "avoid [regexes...]\n\n"
  68         "Ignore all lines case-sensitively matching any of the JS-style regular\n"
  69         "expressions given",
  70     },
  71     {"begin", "begin [args...]\n\nPrecede input lines with the args/lines given"},
  72     {"begintsv", "begintsv [items...]\n\nPrecede input lines with a single TSV line"},
  73     {"bh", "bh [every = 5]\n\nAdd an extra emtpy line after the 1st, then every few"},
  74     {"bl", "bl [every = 5]\n\nAdd an extra emtpy line every few"},
  75     {"blow", "blow [tabstop = 4]\n\nBlow/expand tabs into runs of spaces"},
  76     {"book", "book [page height]\n\nLay out lines into 2 columns, just like a book"},
  77     {"choplf", "choplf\n\nIgnore the last line-feed from the input, if present"},
  78     {"delay", "delay [seconds = 1.0]\n\nWait some time before emitting each line"},
  79     {
  80         "drop",
  81         "drop [what...]\n\n"
  82         "Ignore all occurrences of all the regular expressions given, in the\n"
  83         "order given",
  84     },
  85     {"first", "first [lines = 1]\n\nLimit output up to the first few input lines"},
  86     {"gbm", "gbm [good...] [bad...] [meh...]\n\nANSI-style lines by regex-matches"},
  87     {"help", "help\n\nShow a help message about this app"},
  88     {
  89         "iavoid",
  90         "iavoid [regexes...]\n\n"
  91         "Ignore all lines case-insensitively matching any of the JS-style regular\n"
  92         "expressions given",
  93     },
  94     {
  95         "idrop",
  96         "idrop [what...]\n\n"
  97         "Ignore all case-insensitively-matched occurrences of all the regular\n"
  98         "expressions given, in the order given",
  99     },
 100     {
 101         "imatch",
 102         "imatch [regexes...]\n\n"
 103         "Only keep lines case-insensitively matching any of the JS-style regular\n"
 104         "expressions given",
 105     },
 106     {"last", "last [lines = 1]\n\nLimit output up to the last few input lines"},
 107     {"leak", "leak\n\nHelp debug pipes, by copying input lines to stderr and stdout"},
 108     {
 109         "lines",
 110         "lines\n\n"
 111         "Ignore trailing carriage returns, making sure the last line ends\n"
 112         "with a line-feed, whether the input ended with one or not",
 113     },
 114     {
 115         "lineup",
 116         "lineup [max = 0]\n\n"
 117         "Join lines into TSV lines up to the number of items given; when\n"
 118         "not given a max value, or when given 0 or a negative value, join\n"
 119         "all input lines into a single TSV output line",
 120     },
 121     {"links", "links\n\nFind all hyperlinks-like substrings from the input"},
 122     {"lower", "lower\n\nASCII-lowercase all input lines"},
 123     {
 124         "match",
 125         "match [regexes...]\n\n"
 126         "Only keep lines case-sensitively matching any of the JS-style regular\n"
 127         "expressions given",
 128     },
 129     {"n", "n [start = 1]\n\nNumber all input lines"},
 130     {"nn", "nn\n\nNice Numbers makes long numbers easier to read via ANSI-styles"},
 131     {"nothing", "nothing\n\nRead/write nothing, effectively doing nothing"},
 132     {"plain", "plain\n\nIgnore all ANSI-style sequences"},
 133     {"primes", "primes [count]\n\nShow the first few prime numbers"},
 134     {"size", "size\n\nCount input bytes"},
 135     {"skip", "skip [lines = 1]\n\nIgnore the first few input lines"},
 136     {"skiplast", "skiplast [lines = 1]\n\nIgnore the last few input lines"},
 137     {"trim", "trim\n\nIgnore leading/trailing spaces on all lines"},
 138     {"trimend", "trimend\n\nIgnore trailing spaces on all lines"},
 139     {"unique", "unique\n\nAvoid emitting the same input line more than once"},
 140 };
 141 
 142 int make_regexes(int argc, char** argv, vector<regex>& res, bool case_sens) {
 143     size_t errors = 0;
 144     const auto dialect = regex_constants::ECMAScript;
 145     const auto options = dialect | (case_sens ? 0 : regex_constants::icase);
 146 
 147     for (int i = 1; i < argc; i++) {
 148         try {
 149             regex expr(argv[i], options);
 150             res.push_back(expr);
 151         } catch (regex_error& e) {
 152             const auto es = error_style;
 153             cerr << es << e.code() << ": " << e.what() << "\x1b[0m" << endl;
 154             errors++;
 155         }
 156     }
 157 
 158     return errors;
 159 }
 160 
 161 uint64_t count_runes(const string& s) {
 162     uint64_t n = 0;
 163     for (auto c : s) {
 164         n += ((c & 0xc0) != 0x80);
 165     }
 166     return n;
 167 }
 168 
 169 void de_bom(string &s) {
 170     // s.starts_with("\xef\xbb\xbf")
 171     if (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') {
 172         s.erase(0, 3);
 173     }
 174 }
 175 
 176 void no_cr(string &s) {
 177     s.erase(remove(s.begin(), s.end(), '\r'), s.end());
 178 }
 179 
 180 istream& get_unix_line(istream& src, string& dest) {
 181     getline(src, dest);
 182     no_cr(dest);
 183     return src;
 184 }
 185 
 186 void left_trim(string &s) {
 187     auto trimmed = find_if(s.begin(), s.end(), [](char c) {
 188         return !isspace<char>(c, locale::classic());
 189     });
 190     s.erase(s.begin(), trimmed);
 191 }
 192 
 193 bool matches_any(string& s, const vector<regex>& expressions) {
 194     for (regex e : expressions) {
 195         if (regex_search(s, e)) {
 196             return true;
 197         }
 198     }
 199     return false;
 200 }
 201 
 202 void remove_all(string& dest, char* ignore) {
 203     auto len = strlen(ignore);
 204     auto pos = string::npos;
 205     while (1) {
 206         pos = dest.find(ignore);
 207         if (pos == string::npos) {
 208             return;
 209         }
 210         dest.erase(pos, len);
 211     }
 212 }
 213 
 214 void right_trim(string &s) {
 215     auto trimmed = find_if(s.rbegin(), s.rend(), [](char c) {
 216         return !isspace<char>(c, locale::classic());
 217     });
 218     s.erase(trimmed.base(), s.end());
 219 }
 220 
 221 void show_error(const string& msg) {
 222     cerr << error_style << msg << "\x1b[0m" << endl;
 223 }
 224 
 225 inline int no_args_expected() {
 226     show_error("no args expected");
 227     return 1;
 228 }
 229 
 230 /*
 231 The next few funcs allow changing the auto-flushing/line-buffering stdout
 232 behavior across tools.
 233 */
 234 
 235 inline void output_line(const char* s) {
 236     cout << s << endl;
 237 }
 238 
 239 inline void output_line(uint64_t n) {
 240     cout << n << endl;
 241 }
 242 
 243 inline void output_line(const string& s) {
 244     cout << s << endl;
 245 }
 246 
 247 inline void output_styled_line(const string& style, const string &line) {
 248     cout << style << line << "\x1b[0m" << endl;
 249 }
 250 
 251 inline void end_output_line() {
 252     cout << endl;
 253 }
 254 
 255 // args emits each string given to it on its own output line, ignoring any
 256 // input
 257 int args(int argc, char** argv) {
 258     for (int i = 1; i < argc; i++) {
 259         output_line(argv[i]);
 260     }
 261     return 0;
 262 }
 263 
 264 int avoid(int argc, char** argv) {
 265     vector<regex> expressions;
 266     size_t errors = make_regexes(argc, argv, expressions, true);
 267     if (errors > 0) {
 268         return 1;
 269     }
 270 
 271     string line;
 272     while (getline(cin, line)) {
 273         if (!matches_any(line, expressions)) {
 274             output_line(line);
 275             if (cout.eof()) {
 276                 return 0;
 277             }
 278         }
 279     }
 280     return 0;
 281 }
 282 
 283 // begin emits the few strings given as their own lines, before emitting back
 284 // all input lines
 285 int begin(int argc, char** argv) {
 286     for (int i = 1; i < argc; i++) {
 287         output_line(argv[i]);
 288     }
 289 
 290     string line;
 291     while (getline(cin, line)) {
 292         output_line(line);
 293         if (cout.eof()) {
 294             return 0;
 295         }
 296     }
 297     return 0;
 298 }
 299 
 300 // begin_tsv emits a line of tab-separated values (TSV), before emitting back
 301 // all input lines
 302 int begin_tsv(int argc, char** argv) {
 303     for (int i = 1; i < argc; i++) {
 304         if (i > 1) {
 305             cout << '\t';
 306         }
 307         cout << argv[i];
 308     }
 309     if (argc > 1) {
 310         end_output_line();
 311     }
 312 
 313     string line;
 314     while (getline(cin, line)) {
 315         output_line(line);
 316         if (cout.eof()) {
 317             return 0;
 318         }
 319     }
 320     return 0;
 321 }
 322 
 323 // bh (breathe header) adds an extra empty line after the first/header line,
 324 // and then adds a single extra empty line every few
 325 int bh(int argc, char** argv) {
 326     string line;
 327     int64_t every = default_breathe_period;
 328 
 329     if (argc > 1) {
 330         try {
 331             every = stol(argv[1]);
 332         } catch (...) { }
 333     }
 334 
 335     if (!getline(cin, line)) {
 336         return 0;
 337     }
 338     output_line(line);
 339 
 340     auto i = 0;
 341     while (getline(cin, line)) {
 342         if (every > 0 && i % every == 0) {
 343             end_output_line();
 344         }
 345 
 346         output_line(line);
 347         if (cout.eof()) {
 348             return 0;
 349         }
 350 
 351         i++;
 352     }
 353 
 354     return 0;
 355 }
 356 
 357 
 358 // bl (breathe lines) adds single extra empty lines every few
 359 int bl(int argc, char** argv) {
 360     string line;
 361     int64_t every = default_breathe_period;
 362 
 363     if (argc > 1) {
 364         try {
 365             every = stol(argv[1]);
 366         } catch (...) { }
 367     }
 368 
 369     auto i = 0;
 370     while (getline(cin, line)) {
 371         if (every > 0 && i % every == 0 && i > 0) {
 372             end_output_line();
 373         }
 374 
 375         output_line(line);
 376         if (cout.eof()) {
 377             return 0;
 378         }
 379 
 380         i++;
 381     }
 382 
 383     return 0;
 384 }
 385 
 386 // blow blows/expands tabs into runs of spaces
 387 int blow(int argc, char** argv) {
 388     string line;
 389     int64_t tabstop = default_tabstop;
 390 
 391     if (argc > 1) {
 392         try {
 393             tabstop = stol(argv[1]);
 394         } catch (...) { }
 395     }
 396 
 397     if (tabstop < 1) {
 398         while (getline(cin, line)) {
 399             output_line(line);
 400             if (cout.eof()) {
 401                 return 0;
 402             }
 403         }
 404         return 0;
 405     }
 406 
 407     while (getline(cin, line)) {
 408         int64_t i = 0;
 409 
 410         for (auto c : line) {
 411             if (c != '\t') {
 412                 i += ((c & 0xc0) != 0x80);
 413                 cout << c;
 414                 continue;
 415             }
 416 
 417             auto spaces = tabstop - i % tabstop;
 418             i += spaces;
 419             for (auto j = 0; j < spaces; j++) {
 420                 cout << ' ';
 421             }
 422         }
 423 
 424         end_output_line();
 425     }
 426 
 427     return 0;
 428 }
 429 
 430 // book lays out input lines on 2 columns, the same way books do it
 431 int book(int argc, char** argv) {
 432     if (argc != 2) {
 433         show_error("expected 1 argument");
 434         return 1;
 435     }
 436 
 437     uint64_t page_height = 0;
 438     try {
 439         page_height = stol(argv[1]);
 440     } catch (...) {
 441         show_error("invalid page-height");
 442         return 1;
 443     }
 444 
 445     if (page_height < 2) {
 446         show_error("page height can't be less than 2");
 447         return 1;
 448     }
 449 
 450     string line;
 451     vector<string> lines;
 452     while (getline(cin, line)) {
 453         lines.push_back(line);
 454     }
 455 
 456     auto nlines = lines.size();
 457     auto inner = page_height - 1;
 458 
 459     auto side = 0;
 460     uint64_t widths[2] = {0, 0};
 461 
 462     for (uint64_t i = 0; i < nlines; i += inner) {
 463         for (uint64_t j = i; j < nlines && j < i + inner; j++) {
 464             auto w = count_runes(lines[j]);
 465             if (widths[side] < w) {
 466                 widths[side] = w;
 467             }
 468         }
 469 
 470         side = 1 - side;
 471     }
 472 
 473     for (uint64_t i = 0; i < nlines; i += 2*inner) {
 474         if (i > 0) {
 475             auto rest = (widths[1] > 0) ? (3 + widths[1]) : 2;
 476             auto maxw = widths[0] + rest;
 477             for (uint64_t j = 0; j < maxw; j++) {
 478                 // cout << '-';
 479                 cout << "·";
 480             }
 481             end_output_line();
 482         }
 483 
 484         for (auto j = i; j < nlines && j < i + inner; j++) {
 485             auto l = lines[j];
 486             auto w = count_runes(l);
 487             auto r = (j + inner < nlines) ? lines[j + inner] : string();
 488 
 489             cout << l;
 490             if (widths[0] > w) {
 491                 for (uint64_t j = 0; j < widths[0] - w; j++) {
 492                     cout << ' ';
 493                 }
 494             }
 495 
 496             w = r.size();
 497             if (w == 0) {
 498                 // cout << " |";
 499                 cout << "";
 500             } else {
 501                 // cout << " | ";
 502                 cout << "";
 503             }
 504 
 505             output_line(r);
 506             if (cout.eof()) {
 507                 return 0;
 508             }
 509         }
 510     }
 511 
 512     return 0;
 513 }
 514 
 515 // chop_lf ignore the last line-feed from the input, if present
 516 int chop_lf(int argc, char**) {
 517     if (argc > 1) {
 518         return no_args_expected();
 519     }
 520 
 521     auto i = 0;
 522     string line;
 523     while (getline(cin, line)) {
 524         if (i > 0) {
 525             end_output_line();
 526         }
 527         cout << line;
 528         i++;
 529     }
 530 
 531     return 0;
 532 }
 533 
 534 // delay waits the given number of seconds before emitting back each line
 535 // from the input
 536 int delay(int argc, char** argv) {
 537     double seconds = 1.0;
 538 
 539     if (argc > 1) {
 540         try {
 541             seconds = stof(argv[1]);
 542         } catch (...) { }
 543     }
 544 
 545     if (isnan(seconds) || isinf(seconds)) {
 546         show_error("invalid number");
 547         return 1;
 548     }
 549 
 550     if (seconds < 0) {
 551         seconds = 0;
 552     }
 553 
 554     string line;
 555     auto ms = (int)(1000 * seconds);
 556 
 557     while (getline(cin, line)) {
 558         this_thread::sleep_for(chrono::milliseconds(ms));
 559 
 560         output_line(line);
 561         if (cout.eof()) {
 562             return 0;
 563         }
 564     }
 565 
 566     return 0;
 567 }
 568 
 569 // drop ignores all occurrences of all the regular expressions given, in the
 570 // order given; regex matches are case-sensitive
 571 int drop(int argc, char** argv) {
 572     vector<regex> expressions;
 573     size_t errors = make_regexes(argc, argv, expressions, true);
 574     if (errors > 0) {
 575         return 1;
 576     }
 577 
 578     string line;
 579     while (getline(cin, line)) {
 580         for (regex e : expressions) {
 581             if (!regex_search(line, e)) {
 582                 continue;
 583             }
 584 
 585             string s = regex_replace(line, e, "");
 586             line.clear();
 587             line.append(s);
 588         }
 589 
 590         output_line(line);
 591         if (cout.eof()) {
 592             return 0;
 593         }
 594     }
 595 
 596     return 0;
 597 }
 598 
 599 // first limits input up to its first few lines
 600 int first(int argc, char** argv) {
 601     string line;
 602     int64_t max = 1;
 603 
 604     if (argc > 1) {
 605         try {
 606             max = stol(argv[1]);
 607         } catch (...) { }
 608     }
 609 
 610     if (max < 1) {
 611         return 0;
 612     }
 613 
 614     auto i = 0;
 615     while (getline(cin, line)) {
 616         output_line(line);
 617         if (cout.eof()) {
 618             return 0;
 619         }
 620         i++;
 621 
 622         if (i >= max) {
 623             return 0;
 624         }
 625     }
 626 
 627     return 0;
 628 }
 629 
 630 // gbm (good, bad, meh) ANSI-styles/colors lines by regex-matching
 631 int gbm(int argc, char** argv) {
 632     string line;
 633     regex good, bad, meh;
 634 
 635     const string good_style = green_style;
 636     const string bad_style = red_style;
 637     const string meh_style = gray_style;
 638 
 639     switch (argc) {
 640     case 2:
 641         good = regex(argv[1]);
 642 
 643         while (getline(cin, line)) {
 644             if (cout.eof()) {
 645                 return 0;
 646             }
 647 
 648             if (regex_search(line, good)) {
 649                 output_styled_line(good_style, line);
 650                 continue;
 651             }
 652 
 653             output_line(line);
 654         }
 655 
 656         return 0;
 657 
 658     case 3:
 659         good = regex(argv[1]);
 660         bad = regex(argv[2]);
 661 
 662         while (getline(cin, line)) {
 663             if (cout.eof()) {
 664                 return 0;
 665             }
 666 
 667             if (regex_search(line, good)) {
 668                 output_styled_line(good_style, line);
 669                 continue;
 670             }
 671             if (regex_search(line, bad)) {
 672                 output_styled_line(bad_style, line);
 673                 continue;
 674             }
 675 
 676             output_line(line);
 677         }
 678 
 679         return 0;
 680 
 681     case 4:
 682         good = regex(argv[1]);
 683         bad = regex(argv[2]);
 684         meh = regex(argv[3]);
 685 
 686         while (getline(cin, line)) {
 687             if (cout.eof()) {
 688                 return 0;
 689             }
 690 
 691             if (regex_search(line, good)) {
 692                 output_styled_line(good_style, line);
 693                 continue;
 694             }
 695             if (regex_search(line, bad)) {
 696                 output_styled_line(bad_style, line);
 697                 continue;
 698             }
 699             if (regex_search(line, meh)) {
 700                 output_styled_line(meh_style, line);
 701                 continue;
 702             }
 703 
 704             output_line(line);
 705         }
 706 
 707         return 0;
 708 
 709     default:
 710         show_error("only up to 3 args are supported");
 711         return 1;
 712     }
 713 }
 714 
 715 extern map<string, string> tool_aliases;
 716 extern map<string, int(*)(int, char**)> name2tool;
 717 
 718 // help_general handles the no-arguments case for func help
 719 int help_general(int, char**) {
 720     output_line("minibox [tool] [args...]");
 721     end_output_line();
 722     output_line("This is a busybox/toybox-like app which has several small");
 723     output_line("tools in it.");
 724 
 725     vector<string> keys;
 726     for (auto kv : tool_aliases) {
 727         keys.push_back(kv.first);
 728     }
 729     sort(keys.begin(), keys.end());
 730 
 731     end_output_line();
 732     end_output_line();
 733     output_line("Aliases");
 734     end_output_line();
 735 
 736     for (auto k : keys) {
 737         cout << k << '\t' << tool_aliases[k] << endl;
 738     }
 739 
 740     keys.clear();
 741     for (auto kv : name2tool) {
 742         keys.push_back(kv.first);
 743     }
 744     sort(keys.begin(), keys.end());
 745 
 746     end_output_line();
 747     end_output_line();
 748     output_line("Tools");
 749 
 750     for (auto k : keys) {
 751         end_output_line();
 752         end_output_line();
 753         output_line(name2help[k]);
 754         if (cout.eof()) {
 755             return 0;
 756         }
 757     }
 758 
 759     return 0;
 760 }
 761 
 762 int help(int argc, char** argv) {
 763     if (argc <= 1) {
 764         return help_general(argc, argv);
 765     }
 766 
 767     auto nerr = 0;
 768 
 769     for (auto i = 1; i < argc; i++) {
 770         auto name = argv[i];
 771         if (name2help.find(name) == name2help.end()) {
 772             stringstream msg;
 773             msg << name << ": no help message found";
 774             show_error(msg.str());
 775             nerr++;
 776             continue;
 777         }
 778 
 779         if (i > 1) {
 780             end_output_line();
 781             end_output_line();
 782         }
 783         output_line(name2help[name]);
 784     }
 785 
 786     if (nerr > 0) {
 787         return 1;
 788     }
 789     return 0;
 790 }
 791 
 792 int iavoid(int argc, char** argv) {
 793     vector<regex> expressions;
 794     size_t errors = make_regexes(argc, argv, expressions, false);
 795     if (errors > 0) {
 796         return 1;
 797     }
 798 
 799     string line;
 800     while (getline(cin, line)) {
 801         if (!matches_any(line, expressions)) {
 802             output_line(line);
 803             if (cout.eof()) {
 804                 return 0;
 805             }
 806         }
 807     }
 808     return 0;
 809 }
 810 
 811 // idrop ignores all case-insensitively-matched occurrences of all the regular
 812 // expressions given, in the order given
 813 int idrop(int argc, char** argv) {
 814     vector<regex> expressions;
 815     size_t errors = make_regexes(argc, argv, expressions, false);
 816     if (errors > 0) {
 817         return 1;
 818     }
 819 
 820     string line;
 821     while (getline(cin, line)) {
 822         for (regex e : expressions) {
 823             if (!regex_search(line, e)) {
 824                 continue;
 825             }
 826 
 827             string s = regex_replace(line, e, "");
 828             line.clear();
 829             line.append(s);
 830         }
 831 
 832         output_line(line);
 833         if (cout.eof()) {
 834             return 0;
 835         }
 836     }
 837 
 838     return 0;
 839 }
 840 
 841 int imatch(int argc, char** argv) {
 842     if (argc == 1) {
 843         return 0;
 844     }
 845 
 846     vector<regex> expressions;
 847     size_t errors = make_regexes(argc, argv, expressions, false);
 848     if (errors > 0) {
 849         return 1;
 850     }
 851 
 852     string line;
 853     while (getline(cin, line)) {
 854         if (matches_any(line, expressions)) {
 855             output_line(line);
 856             if (cout.eof()) {
 857                 return 0;
 858             }
 859         }
 860     }
 861     return 0;
 862 }
 863 
 864 // last limits input up to its last few lines
 865 int last(int argc, char** argv) {
 866     string line;
 867     vector<string> latest;
 868     int64_t max = 1;
 869 
 870     if (argc > 1) {
 871         try {
 872             max = stol(argv[1]);
 873         } catch (...) { }
 874     }
 875 
 876     if (max < 1) {
 877         return 0;
 878     }
 879 
 880     size_t i = 0;
 881     while (getline(cin, line)) {
 882         if ((int64_t)latest.size() < max) {
 883             latest.push_back(line);
 884         } else {
 885             latest[i] = line;
 886         }
 887         i = (i + 1) % max;
 888     }
 889 
 890     for (size_t j = i; j < latest.size(); j++) {
 891         output_line(latest[j]);
 892         if (cout.eof()) {
 893             return 0;
 894         }
 895     }
 896     for (size_t j = 0; j < i; j++) {
 897         output_line(latest[j]);
 898         if (cout.eof()) {
 899             return 0;
 900         }
 901     }
 902     return 0;
 903 }
 904 
 905 // leak helps debug pipes, by copying all input lines both to stderr and to
 906 // stdout
 907 int leak(int argc, char**) {
 908     if (argc > 1) {
 909         return no_args_expected();
 910     }
 911 
 912     string line;
 913     while (getline(cin, line)) {
 914         cerr << line << endl;
 915         output_line(line);
 916         if (cout.eof()) {
 917             return 0;
 918         }
 919     }
 920     return 0;
 921 }
 922 
 923 // lines ignores trailing carriage-returns from input lines, and ensures the
 924 // last line ends with a line-feed, whether the input had that or not
 925 int lines(int argc, char**) {
 926     if (argc > 1) {
 927         return no_args_expected();
 928     }
 929 
 930     string line;
 931     if (!get_unix_line(cin, line)) {
 932         return 0;
 933     }
 934     de_bom(line);
 935     output_line(line);
 936 
 937     while (get_unix_line(cin, line)) {
 938         output_line(line);
 939         if (cout.eof()) {
 940             return 0;
 941         }
 942     }
 943     return 0;
 944 }
 945 
 946 // line_up joins input lines via tabs, up to the number given: whenever that
 947 // number is exceeded, a new output line starts; when not given a number, or
 948 // when that number is 0 or negative, all input lines are tab-joined into a
 949 // single output line
 950 int line_up(int argc, char** argv) {
 951     string line;
 952     int64_t max = 0;
 953 
 954     if (argc > 1) {
 955         try {
 956             max = stol(argv[1]);
 957         } catch (...) { }
 958     }
 959 
 960     auto i = 0;
 961     while (getline(cin, line)) {
 962         if (i >= max && max > 0) {
 963             i = 0;
 964             end_output_line();
 965         } else if (i > 0) {
 966             cout << '\t';
 967             if (cout.eof()) {
 968                 return 0;
 969             }
 970         }
 971 
 972         cout << line;
 973         i++;
 974     }
 975 
 976     if (i > 0) {
 977         end_output_line();
 978     }
 979     return 0;
 980 }
 981 
 982 // links gets all hyperlink-type substrings from the input, each match shown
 983 // on its own output line
 984 int links(int argc, char**) {
 985     if (argc > 1) {
 986         return no_args_expected();
 987     }
 988 
 989     string line;
 990     auto pat = regex("https?://[A-Za-z0-9+_.:%-]+(/[A-Za-z0-9+_.%/,#?&=-]*)*");
 991 
 992     while (getline(cin, line)) {
 993         auto iter = sregex_iterator(line.begin(), line.end(), pat);
 994         for (; iter != sregex_iterator(); iter++) {
 995             output_line(iter->str());
 996             if (cout.eof()) {
 997                 return 0;
 998             }
 999         }
1000     }
1001 
1002     return 0;
1003 }
1004 
1005 // lower ASCII-lowercases all symbols in all lines
1006 int lower(int argc, char**) {
1007     if (argc > 1) {
1008         return no_args_expected();
1009     }
1010 
1011     string line;
1012     while (getline(cin, line)) {
1013         transform(line.begin(), line.end(), line.begin(), [](char c) {
1014             return tolower(c);
1015         });
1016 
1017         output_line(line);
1018         if (cout.eof()) {
1019             return 0;
1020         }
1021     }
1022 
1023     return 0;
1024 }
1025 
1026 int match(int argc, char** argv) {
1027     if (argc == 1) {
1028         return 0;
1029     }
1030 
1031     vector<regex> expressions;
1032     size_t errors = make_regexes(argc, argv, expressions, true);
1033     if (errors > 0) {
1034         return 1;
1035     }
1036 
1037     string line;
1038     while (getline(cin, line)) {
1039         if (matches_any(line, expressions)) {
1040             output_line(line);
1041             if (cout.eof()) {
1042                 return 0;
1043             }
1044         }
1045     }
1046     return 0;
1047 }
1048 
1049 // n numbers lines using the optional starting counter given, which is 1 by
1050 // default; each output line starts with the current counter, followed by a
1051 // tab, ending with the original input line
1052 int n(int argc, char** argv) {
1053     string line;
1054     int64_t n = 1;
1055 
1056     if (argc > 1) {
1057         try {
1058             n = stol(argv[1]);
1059         } catch (...) { }
1060     }
1061 
1062     while (getline(cin, line)) {
1063         cout << n << '\t' << line << endl;
1064         n++;
1065     }
1066 
1067     return 0;
1068 }
1069 
1070 // style_digits helps func nn do its job
1071 void style_digits(string digits) {
1072     auto l = digits.length();
1073     if (l < 4) {
1074         cout << digits;
1075         return;
1076     }
1077 
1078     auto alt = false;
1079     auto n = 3 - (l % 3);
1080 
1081     for (auto c : digits) {
1082         cout << c;
1083         n++;
1084         if (n % 3 == 0) {
1085             alt = !alt;
1086             cout << (alt ? gray_style : "\x1b[0m");
1087         }
1088     }
1089 
1090     if (alt) {
1091         cout << "\x1b[0m";
1092     }
1093 }
1094 
1095 // nn stands for `nice numbers`, alternating ANSI-styles for long-enough runs
1096 // of digits, the result being easier to read/parse visually, especially on
1097 // output full of such long numbers, such as with tables
1098 int nn(int argc, char**) {
1099     if (argc > 1) {
1100         return no_args_expected();
1101     }
1102 
1103     string line;
1104     stringstream digits;
1105     auto in_digits = false;
1106 
1107     while (getline(cin, line)) {
1108         for (auto c : line) {
1109             auto is_digit = '0' <= c && c <= '9';
1110 
1111             if (in_digits) {
1112                 if (!is_digit) {
1113                     in_digits = false;
1114                     style_digits(digits.str());
1115                     digits.str(string());
1116                     cout << c;
1117                 } else{
1118                     digits << c;
1119                 }
1120                 continue;
1121             }
1122 
1123             if (is_digit) {
1124                 in_digits = true;
1125                 digits << c;
1126                 continue;
1127             }
1128 
1129             cout << c;
1130         }
1131 
1132         if (in_digits) {
1133             in_digits = false;
1134             style_digits(digits.str());
1135             digits.str(string());
1136         }
1137 
1138         end_output_line();
1139         if (cout.eof()) {
1140             return 0;
1141         }
1142     }
1143 
1144     return 0;
1145 }
1146 
1147 // nothing reads nothing and writes, effectively doing nothing
1148 int nothing(int argc, char**) {
1149     if (argc > 1) {
1150         return no_args_expected();
1151     }
1152     return 0;
1153 }
1154 
1155 // plain ignores all ANSI-style sequences, leaving actual `plain` plain-text
1156 int plain(int argc, char**) {
1157     if (argc > 1) {
1158         return no_args_expected();
1159     }
1160 
1161     string line;
1162 
1163     while (getline(cin, line)) {
1164         int state = 0;
1165         for (auto c : line) {
1166             switch (state) {
1167                 case 0:
1168                     if (c == '\x1b') {
1169                         state = 1;
1170                     } else {
1171                         cout << c;
1172                     }
1173                     break;
1174 
1175                 case 1:
1176                     if (c == '[') {
1177                         state = 2;
1178                     } else {
1179                         cout << '\x1b';
1180                         cout << c;
1181                         state = 1;
1182                     }
1183                     break;
1184 
1185                 case 2:
1186                     if ('A' <= c && c <= 'Z') {
1187                         state = 0;
1188                     } else if ('a' <= c && c <= 'z') {
1189                         state = 0;
1190                     }
1191                     break;
1192             }
1193         }
1194 
1195         end_output_line();
1196         if (cout.eof()) {
1197             return 0;
1198         }
1199     }
1200 
1201     return 0;
1202 }
1203 
1204 // primes shows the first few prime numbers, one per output line
1205 int primes(int argc, char** argv) {
1206     uint64_t count = 1000000;
1207 
1208     if (argc > 1) {
1209         try {
1210             count = stol(argv[1]);
1211         } catch (...) { }
1212     }
1213 
1214     for (uint64_t n = 3; count > 0; n += 2) {
1215         uint64_t max = sqrt((double)n);
1216         for (uint64_t div = 3; div <= max; div += 2) {
1217             if (n % div == 0) {
1218                 goto skip;
1219             }
1220         }
1221 
1222         output_line(n);
1223         if (cout.eof()) {
1224             return 0;
1225         }
1226         count--;
1227 
1228         skip:;
1229     }
1230 
1231     return 0;
1232 }
1233 
1234 // skip ignores up to the given number of input lines, then emitting the rest
1235 int skip(int argc, char** argv) {
1236     string line;
1237     int64_t skip = 1;
1238 
1239     if (argc > 1) {
1240         try {
1241             skip = stol(argv[1]);
1242         } catch (...) { }
1243     }
1244 
1245     auto i = 0;
1246     while (getline(cin, line)) {
1247         if (i < skip) {
1248             i++;
1249             continue;
1250         }
1251 
1252         output_line(line);
1253         if (cout.eof()) {
1254             return 0;
1255         }
1256         i++;
1257     }
1258 
1259     return 0;
1260 }
1261 
1262 // skip_last emits all but the last few input lines
1263 int skip_last(int argc, char** argv) {
1264     string line;
1265     vector<string> latest;
1266     int64_t max = 1;
1267 
1268     if (argc > 1) {
1269         try {
1270             max = stol(argv[1]);
1271         } catch (...) { }
1272     }
1273 
1274     if (max < 1) {
1275         while (getline(cin, line)) {
1276             output_line(line);
1277             if (cout.eof()) {
1278                 return 0;
1279             }
1280         }
1281         return 0;
1282     }
1283 
1284     size_t i = 0;
1285     while (getline(cin, line)) {
1286         if ((int64_t)latest.size() < max) {
1287             latest.push_back(line);
1288         } else {
1289             output_line(latest[i]);
1290             if (cout.eof()) {
1291                 return 0;
1292             }
1293             latest[i] = line;
1294         }
1295 
1296         i = (i + 1) % max;
1297     }
1298 
1299     return 0;
1300 }
1301 
1302 int size(int argc, char**) {
1303     if (argc > 1) {
1304         return no_args_expected();
1305     }
1306 
1307     uint64_t n = 0;
1308     freopen(0, "rb", stdin);
1309     while (getchar_unlocked() != EOF) {
1310         n++;
1311     }
1312     output_line(n);
1313     return 0;
1314 }
1315 
1316 // teletype simulates the cadence of old teletype machines
1317 int teletype(int argc, char**) {
1318     if (argc > 1) {
1319         return no_args_expected();
1320     }
1321 
1322     string line;
1323     while (getline(cin, line)) {
1324         auto l = line.size();
1325         if (l == 0) {
1326             end_output_line();
1327             continue;
1328         }
1329 
1330         end_output_line();
1331     }
1332 
1333     return 0;
1334 }
1335 
1336 // trim ignores leading/trailing spaces in all lines
1337 int trim(int argc, char**) {
1338     if (argc > 1) {
1339         return no_args_expected();
1340     }
1341 
1342     string line;
1343     while (getline(cin, line)) {
1344         // no_cr(line);
1345         left_trim(line);
1346         right_trim(line);
1347 
1348         output_line(line);
1349         if (cout.eof()) {
1350             return 0;
1351         }
1352     }
1353 
1354     return 0;
1355 }
1356 
1357 // trim_end ignores trailing spaces in all lines
1358 int trim_end(int argc, char**) {
1359     if (argc > 1) {
1360         return no_args_expected();
1361     }
1362 
1363     string line;
1364     while (getline(cin, line)) {
1365         // no_cr(line);
1366         right_trim(line);
1367 
1368         output_line(line);
1369         if (cout.eof()) {
1370             return 0;
1371         }
1372     }
1373 
1374     return 0;
1375 }
1376 
1377 // unique avoids emitting the same input line more than once
1378 int unique(int argc, char**) {
1379     if (argc > 1) {
1380         return no_args_expected();
1381     }
1382 
1383     string line;
1384     set<string> seen;
1385 
1386     while (getline(cin, line)) {
1387         if (seen.find(line) != seen.end()) {
1388             continue;
1389         }
1390 
1391         output_line(line);
1392         if (cout.eof()) {
1393             return 0;
1394         }
1395         seen.insert(line);
1396     }
1397 
1398     return 0;
1399 }
1400 
1401 map<string, string> tool_aliases = {
1402     {"arguments", "args"},
1403     {"blowtabs", "blow"},
1404     {"breathe", "bl"},
1405     {"breatheheader", "bh"},
1406     {"breathelines", "bl"},
1407     {"butlast", "skiplast"},
1408     {"catl", "lines"},
1409     {"countbytes", "size"},
1410     {"dedup", "unique"},
1411     {"dropall", "drop"},
1412     {"erase", "drop"},
1413     {"eraseall", "drop"},
1414     {"expand", "blow"},
1415     {"expandtabs", "blow"},
1416     {"final", "last"},
1417     {"idropall", "idrop"},
1418     {"ierase", "idrop"},
1419     {"ieraseall", "idrop"},
1420     {"nil", "nothing"},
1421     {"null", "nothing"},
1422     {"pretsv", "begintsv"},
1423     {"rstrip", "trimend"},
1424     {"rtrim", "trimend"},
1425     {"skipfinal", "skiplast"},
1426     {"strip", "trim"},
1427     {"stripend", "trimend"},
1428     {"stripspace", "trim"},
1429     {"stripspaces", "trim"},
1430     {"striptrail", "trimend"},
1431     {"striptrails", "trimend"},
1432     {"trimspace", "trim"},
1433     {"trimspaces", "trim"},
1434     {"trimtrail", "trimend"},
1435     {"trimtrails", "trimend"},
1436     {"unixify", "lines"},
1437 };
1438 
1439 map<string, int(*)(int, char**)> name2tool = {
1440     {"args", args},
1441     {"avoid", avoid},
1442     {"begin", begin},
1443     {"begintsv", begin_tsv},
1444     {"bh", bh},
1445     {"bl", bl},
1446     {"blow", blow},
1447     {"book", book},
1448     {"choplf", chop_lf},
1449     {"delay", delay},
1450     {"drop", drop},
1451     {"first", first},
1452     {"gbm", gbm},
1453     {"help", help},
1454     {"iavoid", iavoid},
1455     {"idrop", idrop},
1456     {"imatch", imatch},
1457     {"last", last},
1458     {"leak", leak},
1459     {"lines", lines},
1460     {"lineup", line_up},
1461     {"links", links},
1462     {"lower", lower},
1463     {"match", match},
1464     {"n", n},
1465     {"nn", nn},
1466     {"nothing", nothing},
1467     {"plain", plain},
1468     {"primes", primes},
1469     {"skip", skip},
1470     {"skiplast", skip_last},
1471     {"size", size},
1472     {"trim", trim},
1473     {"trimend", trim_end},
1474     {"unique", unique},
1475 };
1476 
1477 int main(int argc, char** argv) {
1478     cin.tie(NULL);
1479     ios_base::sync_with_stdio(false);
1480 
1481     argc--;
1482     argv++;
1483 
1484     if (argc < 1) {
1485         help(argc, argv);
1486         return 0;
1487     }
1488 
1489     string key;
1490     string name = argv[0];
1491     key.append(name);
1492     key.erase(remove(key.begin(), key.end(), '-'), key.end());
1493     key.erase(remove(key.begin(), key.end(), '_'), key.end());
1494 
1495     if (tool_aliases.find(name) != tool_aliases.end()) {
1496         key = tool_aliases[name];
1497     }
1498     if (name2tool.find(key) == name2tool.end()) {
1499         stringstream msg;
1500         msg << name << ": no such tool";
1501         show_error(msg.str());
1502         return 1;
1503     }
1504 
1505     try {
1506         auto tool = name2tool[key];
1507         return tool(argc, argv);
1508     } catch (...) {
1509         show_error("generic error");
1510         return 1;
1511     }
1512 }