File: minibox.cpp 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2024 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Minibox is a busybox/toybox-like app which has several small tools in it. You 27 simply run it with a specific tool to run as the first argument, without any 28 dashes in front of it. Running minibox without arguments will show a help 29 message, which in turn shows all the tools/names you can use. 30 31 You can build the minibox app by running the command below, which should take 32 several seconds to complete: 33 34 g++ -Wall -s -O2 -o minibox minibox.cpp 35 */ 36 37 const auto default_tabstop = 4; 38 const auto default_breathe_period = 5; 39 40 const char* gray_style = "\x1b[38;5;248m"; 41 // const char* green_style = "\x1b[38;5;26m"; 42 const char* green_style = "\x1b[38;5;29m"; 43 const char* red_style = "\x1b[38;5;1m"; 44 45 // const char* error_style = "\x1b[41m\x1b[97m"; 46 const char* error_style = "\x1b[31m"; 47 48 #include <algorithm> 49 #include <chrono> 50 #include <cmath> 51 #include <cstdint> 52 #include <cstring> 53 #include <iostream> 54 #include <map> 55 #include <regex> 56 #include <set> 57 #include <sstream> 58 #include <string> 59 #include <thread> 60 61 using namespace std; 62 63 map<string, string> name2help = { 64 {"args", "args [args...]\n\nShow all arguments given, one per output line"}, 65 { 66 "avoid", 67 "avoid [regexes...]\n\n" 68 "Ignore all lines case-sensitively matching any of the JS-style regular\n" 69 "expressions given", 70 }, 71 {"begin", "begin [args...]\n\nPrecede input lines with the args/lines given"}, 72 {"begintsv", "begintsv [items...]\n\nPrecede input lines with a single TSV line"}, 73 {"bh", "bh [every = 5]\n\nAdd an extra emtpy line after the 1st, then every few"}, 74 {"bl", "bl [every = 5]\n\nAdd an extra emtpy line every few"}, 75 {"blow", "blow [tabstop = 4]\n\nBlow/expand tabs into runs of spaces"}, 76 {"book", "book [page height]\n\nLay out lines into 2 columns, just like a book"}, 77 {"choplf", "choplf\n\nIgnore the last line-feed from the input, if present"}, 78 {"delay", "delay [seconds = 1.0]\n\nWait some time before emitting each line"}, 79 { 80 "drop", 81 "drop [what...]\n\n" 82 "Ignore all occurrences of all the regular expressions given, in the\n" 83 "order given", 84 }, 85 {"first", "first [lines = 1]\n\nLimit output up to the first few input lines"}, 86 {"gbm", "gbm [good...] [bad...] [meh...]\n\nANSI-style lines by regex-matches"}, 87 {"help", "help\n\nShow a help message about this app"}, 88 { 89 "iavoid", 90 "iavoid [regexes...]\n\n" 91 "Ignore all lines case-insensitively matching any of the JS-style regular\n" 92 "expressions given", 93 }, 94 { 95 "idrop", 96 "idrop [what...]\n\n" 97 "Ignore all case-insensitively-matched occurrences of all the regular\n" 98 "expressions given, in the order given", 99 }, 100 { 101 "imatch", 102 "imatch [regexes...]\n\n" 103 "Only keep lines case-insensitively matching any of the JS-style regular\n" 104 "expressions given", 105 }, 106 {"last", "last [lines = 1]\n\nLimit output up to the last few input lines"}, 107 {"leak", "leak\n\nHelp debug pipes, by copying input lines to stderr and stdout"}, 108 { 109 "lines", 110 "lines\n\n" 111 "Ignore trailing carriage returns, making sure the last line ends\n" 112 "with a line-feed, whether the input ended with one or not", 113 }, 114 { 115 "lineup", 116 "lineup [max = 0]\n\n" 117 "Join lines into TSV lines up to the number of items given; when\n" 118 "not given a max value, or when given 0 or a negative value, join\n" 119 "all input lines into a single TSV output line", 120 }, 121 {"links", "links\n\nFind all hyperlinks-like substrings from the input"}, 122 {"lower", "lower\n\nASCII-lowercase all input lines"}, 123 { 124 "match", 125 "match [regexes...]\n\n" 126 "Only keep lines case-sensitively matching any of the JS-style regular\n" 127 "expressions given", 128 }, 129 {"n", "n [start = 1]\n\nNumber all input lines"}, 130 {"nn", "nn\n\nNice Numbers makes long numbers easier to read via ANSI-styles"}, 131 {"nothing", "nothing\n\nRead/write nothing, effectively doing nothing"}, 132 {"plain", "plain\n\nIgnore all ANSI-style sequences"}, 133 {"primes", "primes [count]\n\nShow the first few prime numbers"}, 134 {"size", "size\n\nCount input bytes"}, 135 {"skip", "skip [lines = 1]\n\nIgnore the first few input lines"}, 136 {"skiplast", "skiplast [lines = 1]\n\nIgnore the last few input lines"}, 137 {"trim", "trim\n\nIgnore leading/trailing spaces on all lines"}, 138 {"trimend", "trimend\n\nIgnore trailing spaces on all lines"}, 139 {"unique", "unique\n\nAvoid emitting the same input line more than once"}, 140 }; 141 142 int make_regexes(int argc, char** argv, vector<regex>& res, bool case_sens) { 143 size_t errors = 0; 144 const auto dialect = regex_constants::ECMAScript; 145 const auto options = dialect | (case_sens ? 0 : regex_constants::icase); 146 147 for (int i = 1; i < argc; i++) { 148 try { 149 regex expr(argv[i], options); 150 res.push_back(expr); 151 } catch (regex_error& e) { 152 const auto es = error_style; 153 cerr << es << e.code() << ": " << e.what() << "\x1b[0m" << endl; 154 errors++; 155 } 156 } 157 158 return errors; 159 } 160 161 uint64_t count_runes(const string& s) { 162 uint64_t n = 0; 163 for (auto c : s) { 164 n += ((c & 0xc0) != 0x80); 165 } 166 return n; 167 } 168 169 void de_bom(string &s) { 170 // s.starts_with("\xef\xbb\xbf") 171 if (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') { 172 s.erase(0, 3); 173 } 174 } 175 176 void no_cr(string &s) { 177 s.erase(remove(s.begin(), s.end(), '\r'), s.end()); 178 } 179 180 istream& get_unix_line(istream& src, string& dest) { 181 getline(src, dest); 182 no_cr(dest); 183 return src; 184 } 185 186 void left_trim(string &s) { 187 auto trimmed = find_if(s.begin(), s.end(), [](char c) { 188 return !isspace<char>(c, locale::classic()); 189 }); 190 s.erase(s.begin(), trimmed); 191 } 192 193 bool matches_any(string& s, const vector<regex>& expressions) { 194 for (regex e : expressions) { 195 if (regex_search(s, e)) { 196 return true; 197 } 198 } 199 return false; 200 } 201 202 void remove_all(string& dest, char* ignore) { 203 auto len = strlen(ignore); 204 auto pos = string::npos; 205 while (1) { 206 pos = dest.find(ignore); 207 if (pos == string::npos) { 208 return; 209 } 210 dest.erase(pos, len); 211 } 212 } 213 214 void right_trim(string &s) { 215 auto trimmed = find_if(s.rbegin(), s.rend(), [](char c) { 216 return !isspace<char>(c, locale::classic()); 217 }); 218 s.erase(trimmed.base(), s.end()); 219 } 220 221 void show_error(const string& msg) { 222 cerr << error_style << msg << "\x1b[0m" << endl; 223 } 224 225 inline int no_args_expected() { 226 show_error("no args expected"); 227 return 1; 228 } 229 230 /* 231 The next few funcs allow changing the auto-flushing/line-buffering stdout 232 behavior across tools. 233 */ 234 235 inline void output_line(const char* s) { 236 cout << s << endl; 237 } 238 239 inline void output_line(uint64_t n) { 240 cout << n << endl; 241 } 242 243 inline void output_line(const string& s) { 244 cout << s << endl; 245 } 246 247 inline void output_styled_line(const string& style, const string &line) { 248 cout << style << line << "\x1b[0m" << endl; 249 } 250 251 inline void end_output_line() { 252 cout << endl; 253 } 254 255 // args emits each string given to it on its own output line, ignoring any 256 // input 257 int args(int argc, char** argv) { 258 for (int i = 1; i < argc; i++) { 259 output_line(argv[i]); 260 } 261 return 0; 262 } 263 264 int avoid(int argc, char** argv) { 265 vector<regex> expressions; 266 size_t errors = make_regexes(argc, argv, expressions, true); 267 if (errors > 0) { 268 return 1; 269 } 270 271 string line; 272 while (getline(cin, line)) { 273 if (!matches_any(line, expressions)) { 274 output_line(line); 275 if (cout.eof()) { 276 return 0; 277 } 278 } 279 } 280 return 0; 281 } 282 283 // begin emits the few strings given as their own lines, before emitting back 284 // all input lines 285 int begin(int argc, char** argv) { 286 for (int i = 1; i < argc; i++) { 287 output_line(argv[i]); 288 } 289 290 string line; 291 while (getline(cin, line)) { 292 output_line(line); 293 if (cout.eof()) { 294 return 0; 295 } 296 } 297 return 0; 298 } 299 300 // begin_tsv emits a line of tab-separated values (TSV), before emitting back 301 // all input lines 302 int begin_tsv(int argc, char** argv) { 303 for (int i = 1; i < argc; i++) { 304 if (i > 1) { 305 cout << '\t'; 306 } 307 cout << argv[i]; 308 } 309 if (argc > 1) { 310 end_output_line(); 311 } 312 313 string line; 314 while (getline(cin, line)) { 315 output_line(line); 316 if (cout.eof()) { 317 return 0; 318 } 319 } 320 return 0; 321 } 322 323 // bh (breathe header) adds an extra empty line after the first/header line, 324 // and then adds a single extra empty line every few 325 int bh(int argc, char** argv) { 326 string line; 327 int64_t every = default_breathe_period; 328 329 if (argc > 1) { 330 try { 331 every = stol(argv[1]); 332 } catch (...) { } 333 } 334 335 if (!getline(cin, line)) { 336 return 0; 337 } 338 output_line(line); 339 340 auto i = 0; 341 while (getline(cin, line)) { 342 if (every > 0 && i % every == 0) { 343 end_output_line(); 344 } 345 346 output_line(line); 347 if (cout.eof()) { 348 return 0; 349 } 350 351 i++; 352 } 353 354 return 0; 355 } 356 357 358 // bl (breathe lines) adds single extra empty lines every few 359 int bl(int argc, char** argv) { 360 string line; 361 int64_t every = default_breathe_period; 362 363 if (argc > 1) { 364 try { 365 every = stol(argv[1]); 366 } catch (...) { } 367 } 368 369 auto i = 0; 370 while (getline(cin, line)) { 371 if (every > 0 && i % every == 0 && i > 0) { 372 end_output_line(); 373 } 374 375 output_line(line); 376 if (cout.eof()) { 377 return 0; 378 } 379 380 i++; 381 } 382 383 return 0; 384 } 385 386 // blow blows/expands tabs into runs of spaces 387 int blow(int argc, char** argv) { 388 string line; 389 int64_t tabstop = default_tabstop; 390 391 if (argc > 1) { 392 try { 393 tabstop = stol(argv[1]); 394 } catch (...) { } 395 } 396 397 if (tabstop < 1) { 398 while (getline(cin, line)) { 399 output_line(line); 400 if (cout.eof()) { 401 return 0; 402 } 403 } 404 return 0; 405 } 406 407 while (getline(cin, line)) { 408 int64_t i = 0; 409 410 for (auto c : line) { 411 if (c != '\t') { 412 i += ((c & 0xc0) != 0x80); 413 cout << c; 414 continue; 415 } 416 417 auto spaces = tabstop - i % tabstop; 418 i += spaces; 419 for (auto j = 0; j < spaces; j++) { 420 cout << ' '; 421 } 422 } 423 424 end_output_line(); 425 } 426 427 return 0; 428 } 429 430 // book lays out input lines on 2 columns, the same way books do it 431 int book(int argc, char** argv) { 432 if (argc != 2) { 433 show_error("expected 1 argument"); 434 return 1; 435 } 436 437 uint64_t page_height = 0; 438 try { 439 page_height = stol(argv[1]); 440 } catch (...) { 441 show_error("invalid page-height"); 442 return 1; 443 } 444 445 if (page_height < 2) { 446 show_error("page height can't be less than 2"); 447 return 1; 448 } 449 450 string line; 451 vector<string> lines; 452 while (getline(cin, line)) { 453 lines.push_back(line); 454 } 455 456 auto nlines = lines.size(); 457 auto inner = page_height - 1; 458 459 auto side = 0; 460 uint64_t widths[2] = {0, 0}; 461 462 for (uint64_t i = 0; i < nlines; i += inner) { 463 for (uint64_t j = i; j < nlines && j < i + inner; j++) { 464 auto w = count_runes(lines[j]); 465 if (widths[side] < w) { 466 widths[side] = w; 467 } 468 } 469 470 side = 1 - side; 471 } 472 473 for (uint64_t i = 0; i < nlines; i += 2*inner) { 474 if (i > 0) { 475 auto rest = (widths[1] > 0) ? (3 + widths[1]) : 2; 476 auto maxw = widths[0] + rest; 477 for (uint64_t j = 0; j < maxw; j++) { 478 // cout << '-'; 479 cout << "·"; 480 } 481 end_output_line(); 482 } 483 484 for (auto j = i; j < nlines && j < i + inner; j++) { 485 auto l = lines[j]; 486 auto w = count_runes(l); 487 auto r = (j + inner < nlines) ? lines[j + inner] : string(); 488 489 cout << l; 490 if (widths[0] > w) { 491 for (uint64_t j = 0; j < widths[0] - w; j++) { 492 cout << ' '; 493 } 494 } 495 496 w = r.size(); 497 if (w == 0) { 498 // cout << " |"; 499 cout << " █"; 500 } else { 501 // cout << " | "; 502 cout << " █ "; 503 } 504 505 output_line(r); 506 if (cout.eof()) { 507 return 0; 508 } 509 } 510 } 511 512 return 0; 513 } 514 515 // chop_lf ignore the last line-feed from the input, if present 516 int chop_lf(int argc, char**) { 517 if (argc > 1) { 518 return no_args_expected(); 519 } 520 521 auto i = 0; 522 string line; 523 while (getline(cin, line)) { 524 if (i > 0) { 525 end_output_line(); 526 } 527 cout << line; 528 i++; 529 } 530 531 return 0; 532 } 533 534 // delay waits the given number of seconds before emitting back each line 535 // from the input 536 int delay(int argc, char** argv) { 537 double seconds = 1.0; 538 539 if (argc > 1) { 540 try { 541 seconds = stof(argv[1]); 542 } catch (...) { } 543 } 544 545 if (isnan(seconds) || isinf(seconds)) { 546 show_error("invalid number"); 547 return 1; 548 } 549 550 if (seconds < 0) { 551 seconds = 0; 552 } 553 554 string line; 555 auto ms = (int)(1000 * seconds); 556 557 while (getline(cin, line)) { 558 this_thread::sleep_for(chrono::milliseconds(ms)); 559 560 output_line(line); 561 if (cout.eof()) { 562 return 0; 563 } 564 } 565 566 return 0; 567 } 568 569 // drop ignores all occurrences of all the regular expressions given, in the 570 // order given; regex matches are case-sensitive 571 int drop(int argc, char** argv) { 572 vector<regex> expressions; 573 size_t errors = make_regexes(argc, argv, expressions, true); 574 if (errors > 0) { 575 return 1; 576 } 577 578 string line; 579 while (getline(cin, line)) { 580 for (regex e : expressions) { 581 if (!regex_search(line, e)) { 582 continue; 583 } 584 585 string s = regex_replace(line, e, ""); 586 line.clear(); 587 line.append(s); 588 } 589 590 output_line(line); 591 if (cout.eof()) { 592 return 0; 593 } 594 } 595 596 return 0; 597 } 598 599 // first limits input up to its first few lines 600 int first(int argc, char** argv) { 601 string line; 602 int64_t max = 1; 603 604 if (argc > 1) { 605 try { 606 max = stol(argv[1]); 607 } catch (...) { } 608 } 609 610 if (max < 1) { 611 return 0; 612 } 613 614 auto i = 0; 615 while (getline(cin, line)) { 616 output_line(line); 617 if (cout.eof()) { 618 return 0; 619 } 620 i++; 621 622 if (i >= max) { 623 return 0; 624 } 625 } 626 627 return 0; 628 } 629 630 // gbm (good, bad, meh) ANSI-styles/colors lines by regex-matching 631 int gbm(int argc, char** argv) { 632 string line; 633 regex good, bad, meh; 634 635 const string good_style = green_style; 636 const string bad_style = red_style; 637 const string meh_style = gray_style; 638 639 switch (argc) { 640 case 2: 641 good = regex(argv[1]); 642 643 while (getline(cin, line)) { 644 if (cout.eof()) { 645 return 0; 646 } 647 648 if (regex_search(line, good)) { 649 output_styled_line(good_style, line); 650 continue; 651 } 652 653 output_line(line); 654 } 655 656 return 0; 657 658 case 3: 659 good = regex(argv[1]); 660 bad = regex(argv[2]); 661 662 while (getline(cin, line)) { 663 if (cout.eof()) { 664 return 0; 665 } 666 667 if (regex_search(line, good)) { 668 output_styled_line(good_style, line); 669 continue; 670 } 671 if (regex_search(line, bad)) { 672 output_styled_line(bad_style, line); 673 continue; 674 } 675 676 output_line(line); 677 } 678 679 return 0; 680 681 case 4: 682 good = regex(argv[1]); 683 bad = regex(argv[2]); 684 meh = regex(argv[3]); 685 686 while (getline(cin, line)) { 687 if (cout.eof()) { 688 return 0; 689 } 690 691 if (regex_search(line, good)) { 692 output_styled_line(good_style, line); 693 continue; 694 } 695 if (regex_search(line, bad)) { 696 output_styled_line(bad_style, line); 697 continue; 698 } 699 if (regex_search(line, meh)) { 700 output_styled_line(meh_style, line); 701 continue; 702 } 703 704 output_line(line); 705 } 706 707 return 0; 708 709 default: 710 show_error("only up to 3 args are supported"); 711 return 1; 712 } 713 } 714 715 extern map<string, string> tool_aliases; 716 extern map<string, int(*)(int, char**)> name2tool; 717 718 // help_general handles the no-arguments case for func help 719 int help_general(int, char**) { 720 output_line("minibox [tool] [args...]"); 721 end_output_line(); 722 output_line("This is a busybox/toybox-like app which has several small"); 723 output_line("tools in it."); 724 725 vector<string> keys; 726 for (auto kv : tool_aliases) { 727 keys.push_back(kv.first); 728 } 729 sort(keys.begin(), keys.end()); 730 731 end_output_line(); 732 end_output_line(); 733 output_line("Aliases"); 734 end_output_line(); 735 736 for (auto k : keys) { 737 cout << k << '\t' << tool_aliases[k] << endl; 738 } 739 740 keys.clear(); 741 for (auto kv : name2tool) { 742 keys.push_back(kv.first); 743 } 744 sort(keys.begin(), keys.end()); 745 746 end_output_line(); 747 end_output_line(); 748 output_line("Tools"); 749 750 for (auto k : keys) { 751 end_output_line(); 752 end_output_line(); 753 output_line(name2help[k]); 754 if (cout.eof()) { 755 return 0; 756 } 757 } 758 759 return 0; 760 } 761 762 int help(int argc, char** argv) { 763 if (argc <= 1) { 764 return help_general(argc, argv); 765 } 766 767 auto nerr = 0; 768 769 for (auto i = 1; i < argc; i++) { 770 auto name = argv[i]; 771 if (name2help.find(name) == name2help.end()) { 772 stringstream msg; 773 msg << name << ": no help message found"; 774 show_error(msg.str()); 775 nerr++; 776 continue; 777 } 778 779 if (i > 1) { 780 end_output_line(); 781 end_output_line(); 782 } 783 output_line(name2help[name]); 784 } 785 786 if (nerr > 0) { 787 return 1; 788 } 789 return 0; 790 } 791 792 int iavoid(int argc, char** argv) { 793 vector<regex> expressions; 794 size_t errors = make_regexes(argc, argv, expressions, false); 795 if (errors > 0) { 796 return 1; 797 } 798 799 string line; 800 while (getline(cin, line)) { 801 if (!matches_any(line, expressions)) { 802 output_line(line); 803 if (cout.eof()) { 804 return 0; 805 } 806 } 807 } 808 return 0; 809 } 810 811 // idrop ignores all case-insensitively-matched occurrences of all the regular 812 // expressions given, in the order given 813 int idrop(int argc, char** argv) { 814 vector<regex> expressions; 815 size_t errors = make_regexes(argc, argv, expressions, false); 816 if (errors > 0) { 817 return 1; 818 } 819 820 string line; 821 while (getline(cin, line)) { 822 for (regex e : expressions) { 823 if (!regex_search(line, e)) { 824 continue; 825 } 826 827 string s = regex_replace(line, e, ""); 828 line.clear(); 829 line.append(s); 830 } 831 832 output_line(line); 833 if (cout.eof()) { 834 return 0; 835 } 836 } 837 838 return 0; 839 } 840 841 int imatch(int argc, char** argv) { 842 if (argc == 1) { 843 return 0; 844 } 845 846 vector<regex> expressions; 847 size_t errors = make_regexes(argc, argv, expressions, false); 848 if (errors > 0) { 849 return 1; 850 } 851 852 string line; 853 while (getline(cin, line)) { 854 if (matches_any(line, expressions)) { 855 output_line(line); 856 if (cout.eof()) { 857 return 0; 858 } 859 } 860 } 861 return 0; 862 } 863 864 // last limits input up to its last few lines 865 int last(int argc, char** argv) { 866 string line; 867 vector<string> latest; 868 int64_t max = 1; 869 870 if (argc > 1) { 871 try { 872 max = stol(argv[1]); 873 } catch (...) { } 874 } 875 876 if (max < 1) { 877 return 0; 878 } 879 880 size_t i = 0; 881 while (getline(cin, line)) { 882 if ((int64_t)latest.size() < max) { 883 latest.push_back(line); 884 } else { 885 latest[i] = line; 886 } 887 i = (i + 1) % max; 888 } 889 890 for (size_t j = i; j < latest.size(); j++) { 891 output_line(latest[j]); 892 if (cout.eof()) { 893 return 0; 894 } 895 } 896 for (size_t j = 0; j < i; j++) { 897 output_line(latest[j]); 898 if (cout.eof()) { 899 return 0; 900 } 901 } 902 return 0; 903 } 904 905 // leak helps debug pipes, by copying all input lines both to stderr and to 906 // stdout 907 int leak(int argc, char**) { 908 if (argc > 1) { 909 return no_args_expected(); 910 } 911 912 string line; 913 while (getline(cin, line)) { 914 cerr << line << endl; 915 output_line(line); 916 if (cout.eof()) { 917 return 0; 918 } 919 } 920 return 0; 921 } 922 923 // lines ignores trailing carriage-returns from input lines, and ensures the 924 // last line ends with a line-feed, whether the input had that or not 925 int lines(int argc, char**) { 926 if (argc > 1) { 927 return no_args_expected(); 928 } 929 930 string line; 931 if (!get_unix_line(cin, line)) { 932 return 0; 933 } 934 de_bom(line); 935 output_line(line); 936 937 while (get_unix_line(cin, line)) { 938 output_line(line); 939 if (cout.eof()) { 940 return 0; 941 } 942 } 943 return 0; 944 } 945 946 // line_up joins input lines via tabs, up to the number given: whenever that 947 // number is exceeded, a new output line starts; when not given a number, or 948 // when that number is 0 or negative, all input lines are tab-joined into a 949 // single output line 950 int line_up(int argc, char** argv) { 951 string line; 952 int64_t max = 0; 953 954 if (argc > 1) { 955 try { 956 max = stol(argv[1]); 957 } catch (...) { } 958 } 959 960 auto i = 0; 961 while (getline(cin, line)) { 962 if (i >= max && max > 0) { 963 i = 0; 964 end_output_line(); 965 } else if (i > 0) { 966 cout << '\t'; 967 if (cout.eof()) { 968 return 0; 969 } 970 } 971 972 cout << line; 973 i++; 974 } 975 976 if (i > 0) { 977 end_output_line(); 978 } 979 return 0; 980 } 981 982 // links gets all hyperlink-type substrings from the input, each match shown 983 // on its own output line 984 int links(int argc, char**) { 985 if (argc > 1) { 986 return no_args_expected(); 987 } 988 989 string line; 990 auto pat = regex("https?://[A-Za-z0-9+_.:%-]+(/[A-Za-z0-9+_.%/,#?&=-]*)*"); 991 992 while (getline(cin, line)) { 993 auto iter = sregex_iterator(line.begin(), line.end(), pat); 994 for (; iter != sregex_iterator(); iter++) { 995 output_line(iter->str()); 996 if (cout.eof()) { 997 return 0; 998 } 999 } 1000 } 1001 1002 return 0; 1003 } 1004 1005 // lower ASCII-lowercases all symbols in all lines 1006 int lower(int argc, char**) { 1007 if (argc > 1) { 1008 return no_args_expected(); 1009 } 1010 1011 string line; 1012 while (getline(cin, line)) { 1013 transform(line.begin(), line.end(), line.begin(), [](char c) { 1014 return tolower(c); 1015 }); 1016 1017 output_line(line); 1018 if (cout.eof()) { 1019 return 0; 1020 } 1021 } 1022 1023 return 0; 1024 } 1025 1026 int match(int argc, char** argv) { 1027 if (argc == 1) { 1028 return 0; 1029 } 1030 1031 vector<regex> expressions; 1032 size_t errors = make_regexes(argc, argv, expressions, true); 1033 if (errors > 0) { 1034 return 1; 1035 } 1036 1037 string line; 1038 while (getline(cin, line)) { 1039 if (matches_any(line, expressions)) { 1040 output_line(line); 1041 if (cout.eof()) { 1042 return 0; 1043 } 1044 } 1045 } 1046 return 0; 1047 } 1048 1049 // n numbers lines using the optional starting counter given, which is 1 by 1050 // default; each output line starts with the current counter, followed by a 1051 // tab, ending with the original input line 1052 int n(int argc, char** argv) { 1053 string line; 1054 int64_t n = 1; 1055 1056 if (argc > 1) { 1057 try { 1058 n = stol(argv[1]); 1059 } catch (...) { } 1060 } 1061 1062 while (getline(cin, line)) { 1063 cout << n << '\t' << line << endl; 1064 n++; 1065 } 1066 1067 return 0; 1068 } 1069 1070 // style_digits helps func nn do its job 1071 void style_digits(string digits) { 1072 auto l = digits.length(); 1073 if (l < 4) { 1074 cout << digits; 1075 return; 1076 } 1077 1078 auto alt = false; 1079 auto n = 3 - (l % 3); 1080 1081 for (auto c : digits) { 1082 cout << c; 1083 n++; 1084 if (n % 3 == 0) { 1085 alt = !alt; 1086 cout << (alt ? gray_style : "\x1b[0m"); 1087 } 1088 } 1089 1090 if (alt) { 1091 cout << "\x1b[0m"; 1092 } 1093 } 1094 1095 // nn stands for `nice numbers`, alternating ANSI-styles for long-enough runs 1096 // of digits, the result being easier to read/parse visually, especially on 1097 // output full of such long numbers, such as with tables 1098 int nn(int argc, char**) { 1099 if (argc > 1) { 1100 return no_args_expected(); 1101 } 1102 1103 string line; 1104 stringstream digits; 1105 auto in_digits = false; 1106 1107 while (getline(cin, line)) { 1108 for (auto c : line) { 1109 auto is_digit = '0' <= c && c <= '9'; 1110 1111 if (in_digits) { 1112 if (!is_digit) { 1113 in_digits = false; 1114 style_digits(digits.str()); 1115 digits.str(string()); 1116 cout << c; 1117 } else{ 1118 digits << c; 1119 } 1120 continue; 1121 } 1122 1123 if (is_digit) { 1124 in_digits = true; 1125 digits << c; 1126 continue; 1127 } 1128 1129 cout << c; 1130 } 1131 1132 if (in_digits) { 1133 in_digits = false; 1134 style_digits(digits.str()); 1135 digits.str(string()); 1136 } 1137 1138 end_output_line(); 1139 if (cout.eof()) { 1140 return 0; 1141 } 1142 } 1143 1144 return 0; 1145 } 1146 1147 // nothing reads nothing and writes, effectively doing nothing 1148 int nothing(int argc, char**) { 1149 if (argc > 1) { 1150 return no_args_expected(); 1151 } 1152 return 0; 1153 } 1154 1155 // plain ignores all ANSI-style sequences, leaving actual `plain` plain-text 1156 int plain(int argc, char**) { 1157 if (argc > 1) { 1158 return no_args_expected(); 1159 } 1160 1161 string line; 1162 1163 while (getline(cin, line)) { 1164 int state = 0; 1165 for (auto c : line) { 1166 switch (state) { 1167 case 0: 1168 if (c == '\x1b') { 1169 state = 1; 1170 } else { 1171 cout << c; 1172 } 1173 break; 1174 1175 case 1: 1176 if (c == '[') { 1177 state = 2; 1178 } else { 1179 cout << '\x1b'; 1180 cout << c; 1181 state = 1; 1182 } 1183 break; 1184 1185 case 2: 1186 if ('A' <= c && c <= 'Z') { 1187 state = 0; 1188 } else if ('a' <= c && c <= 'z') { 1189 state = 0; 1190 } 1191 break; 1192 } 1193 } 1194 1195 end_output_line(); 1196 if (cout.eof()) { 1197 return 0; 1198 } 1199 } 1200 1201 return 0; 1202 } 1203 1204 // primes shows the first few prime numbers, one per output line 1205 int primes(int argc, char** argv) { 1206 uint64_t count = 1000000; 1207 1208 if (argc > 1) { 1209 try { 1210 count = stol(argv[1]); 1211 } catch (...) { } 1212 } 1213 1214 for (uint64_t n = 3; count > 0; n += 2) { 1215 uint64_t max = sqrt((double)n); 1216 for (uint64_t div = 3; div <= max; div += 2) { 1217 if (n % div == 0) { 1218 goto skip; 1219 } 1220 } 1221 1222 output_line(n); 1223 if (cout.eof()) { 1224 return 0; 1225 } 1226 count--; 1227 1228 skip:; 1229 } 1230 1231 return 0; 1232 } 1233 1234 // skip ignores up to the given number of input lines, then emitting the rest 1235 int skip(int argc, char** argv) { 1236 string line; 1237 int64_t skip = 1; 1238 1239 if (argc > 1) { 1240 try { 1241 skip = stol(argv[1]); 1242 } catch (...) { } 1243 } 1244 1245 auto i = 0; 1246 while (getline(cin, line)) { 1247 if (i < skip) { 1248 i++; 1249 continue; 1250 } 1251 1252 output_line(line); 1253 if (cout.eof()) { 1254 return 0; 1255 } 1256 i++; 1257 } 1258 1259 return 0; 1260 } 1261 1262 // skip_last emits all but the last few input lines 1263 int skip_last(int argc, char** argv) { 1264 string line; 1265 vector<string> latest; 1266 int64_t max = 1; 1267 1268 if (argc > 1) { 1269 try { 1270 max = stol(argv[1]); 1271 } catch (...) { } 1272 } 1273 1274 if (max < 1) { 1275 while (getline(cin, line)) { 1276 output_line(line); 1277 if (cout.eof()) { 1278 return 0; 1279 } 1280 } 1281 return 0; 1282 } 1283 1284 size_t i = 0; 1285 while (getline(cin, line)) { 1286 if ((int64_t)latest.size() < max) { 1287 latest.push_back(line); 1288 } else { 1289 output_line(latest[i]); 1290 if (cout.eof()) { 1291 return 0; 1292 } 1293 latest[i] = line; 1294 } 1295 1296 i = (i + 1) % max; 1297 } 1298 1299 return 0; 1300 } 1301 1302 int size(int argc, char**) { 1303 if (argc > 1) { 1304 return no_args_expected(); 1305 } 1306 1307 uint64_t n = 0; 1308 freopen(0, "rb", stdin); 1309 while (getchar_unlocked() != EOF) { 1310 n++; 1311 } 1312 output_line(n); 1313 return 0; 1314 } 1315 1316 // teletype simulates the cadence of old teletype machines 1317 int teletype(int argc, char**) { 1318 if (argc > 1) { 1319 return no_args_expected(); 1320 } 1321 1322 string line; 1323 while (getline(cin, line)) { 1324 auto l = line.size(); 1325 if (l == 0) { 1326 end_output_line(); 1327 continue; 1328 } 1329 1330 end_output_line(); 1331 } 1332 1333 return 0; 1334 } 1335 1336 // trim ignores leading/trailing spaces in all lines 1337 int trim(int argc, char**) { 1338 if (argc > 1) { 1339 return no_args_expected(); 1340 } 1341 1342 string line; 1343 while (getline(cin, line)) { 1344 // no_cr(line); 1345 left_trim(line); 1346 right_trim(line); 1347 1348 output_line(line); 1349 if (cout.eof()) { 1350 return 0; 1351 } 1352 } 1353 1354 return 0; 1355 } 1356 1357 // trim_end ignores trailing spaces in all lines 1358 int trim_end(int argc, char**) { 1359 if (argc > 1) { 1360 return no_args_expected(); 1361 } 1362 1363 string line; 1364 while (getline(cin, line)) { 1365 // no_cr(line); 1366 right_trim(line); 1367 1368 output_line(line); 1369 if (cout.eof()) { 1370 return 0; 1371 } 1372 } 1373 1374 return 0; 1375 } 1376 1377 // unique avoids emitting the same input line more than once 1378 int unique(int argc, char**) { 1379 if (argc > 1) { 1380 return no_args_expected(); 1381 } 1382 1383 string line; 1384 set<string> seen; 1385 1386 while (getline(cin, line)) { 1387 if (seen.find(line) != seen.end()) { 1388 continue; 1389 } 1390 1391 output_line(line); 1392 if (cout.eof()) { 1393 return 0; 1394 } 1395 seen.insert(line); 1396 } 1397 1398 return 0; 1399 } 1400 1401 map<string, string> tool_aliases = { 1402 {"arguments", "args"}, 1403 {"blowtabs", "blow"}, 1404 {"breathe", "bl"}, 1405 {"breatheheader", "bh"}, 1406 {"breathelines", "bl"}, 1407 {"butlast", "skiplast"}, 1408 {"catl", "lines"}, 1409 {"countbytes", "size"}, 1410 {"dedup", "unique"}, 1411 {"dropall", "drop"}, 1412 {"erase", "drop"}, 1413 {"eraseall", "drop"}, 1414 {"expand", "blow"}, 1415 {"expandtabs", "blow"}, 1416 {"final", "last"}, 1417 {"idropall", "idrop"}, 1418 {"ierase", "idrop"}, 1419 {"ieraseall", "idrop"}, 1420 {"nil", "nothing"}, 1421 {"null", "nothing"}, 1422 {"pretsv", "begintsv"}, 1423 {"rstrip", "trimend"}, 1424 {"rtrim", "trimend"}, 1425 {"skipfinal", "skiplast"}, 1426 {"strip", "trim"}, 1427 {"stripend", "trimend"}, 1428 {"stripspace", "trim"}, 1429 {"stripspaces", "trim"}, 1430 {"striptrail", "trimend"}, 1431 {"striptrails", "trimend"}, 1432 {"trimspace", "trim"}, 1433 {"trimspaces", "trim"}, 1434 {"trimtrail", "trimend"}, 1435 {"trimtrails", "trimend"}, 1436 {"unixify", "lines"}, 1437 }; 1438 1439 map<string, int(*)(int, char**)> name2tool = { 1440 {"args", args}, 1441 {"avoid", avoid}, 1442 {"begin", begin}, 1443 {"begintsv", begin_tsv}, 1444 {"bh", bh}, 1445 {"bl", bl}, 1446 {"blow", blow}, 1447 {"book", book}, 1448 {"choplf", chop_lf}, 1449 {"delay", delay}, 1450 {"drop", drop}, 1451 {"first", first}, 1452 {"gbm", gbm}, 1453 {"help", help}, 1454 {"iavoid", iavoid}, 1455 {"idrop", idrop}, 1456 {"imatch", imatch}, 1457 {"last", last}, 1458 {"leak", leak}, 1459 {"lines", lines}, 1460 {"lineup", line_up}, 1461 {"links", links}, 1462 {"lower", lower}, 1463 {"match", match}, 1464 {"n", n}, 1465 {"nn", nn}, 1466 {"nothing", nothing}, 1467 {"plain", plain}, 1468 {"primes", primes}, 1469 {"skip", skip}, 1470 {"skiplast", skip_last}, 1471 {"size", size}, 1472 {"trim", trim}, 1473 {"trimend", trim_end}, 1474 {"unique", unique}, 1475 }; 1476 1477 int main(int argc, char** argv) { 1478 cin.tie(NULL); 1479 ios_base::sync_with_stdio(false); 1480 1481 argc--; 1482 argv++; 1483 1484 if (argc < 1) { 1485 help(argc, argv); 1486 return 0; 1487 } 1488 1489 string key; 1490 string name = argv[0]; 1491 key.append(name); 1492 key.erase(remove(key.begin(), key.end(), '-'), key.end()); 1493 key.erase(remove(key.begin(), key.end(), '_'), key.end()); 1494 1495 if (tool_aliases.find(name) != tool_aliases.end()) { 1496 key = tool_aliases[name]; 1497 } 1498 if (name2tool.find(key) == name2tool.end()) { 1499 stringstream msg; 1500 msg << name << ": no such tool"; 1501 show_error(msg.str()); 1502 return 1; 1503 } 1504 1505 try { 1506 auto tool = name2tool[key]; 1507 return tool(argc, argv); 1508 } catch (...) { 1509 show_error("generic error"); 1510 return 1; 1511 } 1512 }