File: minibox.cpp 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2024 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Minibox is a busybox/toybox-like app which has several small tools in it. You 27 simply run it with a specific tool to run as the first argument, without any 28 dashes in front of it. Running minibox without arguments will show a help 29 message, which in turn shows all the tools/names you can use. 30 31 You can build the minibox app by running the command below, which should take 32 several seconds to complete: 33 34 g++ -Wall -s -O2 -o minibox minibox.cpp 35 */ 36 37 const auto default_tabstop = 4; 38 const auto default_breathe_period = 5; 39 40 const char* gray_style = "\x1b[38;5;248m"; 41 // const char* green_style = "\x1b[38;5;26m"; 42 const char* green_style = "\x1b[38;5;29m"; 43 const char* red_style = "\x1b[38;5;1m"; 44 45 // const char* error_style = "\x1b[41m\x1b[97m"; 46 const char* error_style = "\x1b[31m"; 47 48 #include <algorithm> 49 #include <chrono> 50 #include <cmath> 51 #include <cstdint> 52 #include <cstring> 53 #include <iostream> 54 #include <map> 55 #include <regex> 56 #include <set> 57 #include <sstream> 58 #include <string> 59 #include <thread> 60 61 using namespace std; 62 63 map<string, string> name2help = { 64 {"args", "args [args...]\n\nShow all arguments given, one per output line"}, 65 { 66 "avoid", 67 "avoid [regexes...]\n\n" 68 "Ignore all lines case-sensitively matching any of the JS-style regular\n" 69 "expressions given", 70 }, 71 {"begin", "begin [args...]\n\nPrecede input lines with the args/lines given"}, 72 {"begintsv", "begintsv [items...]\n\nPrecede input lines with a single TSV line"}, 73 {"bh", "bh [every = 5]\n\nAdd an extra emtpy line after the 1st, then every few"}, 74 {"bl", "bl [every = 5]\n\nAdd an extra emtpy line every few"}, 75 {"blow", "blow [tabstop = 4]\n\nBlow/expand tabs into runs of spaces"}, 76 {"book", "book [page height]\n\nLay out lines into 2 columns, just like a book"}, 77 {"choplf", "choplf\n\nIgnore the last line-feed from the input, if present"}, 78 {"delay", "delay [seconds = 1.0]\n\nWait some time before emitting each line"}, 79 { 80 "drop", 81 "drop [what...]\n\n" 82 "Ignore all occurrences of all the regular expressions given, in the\n" 83 "order given", 84 }, 85 {"first", "first [lines = 1]\n\nLimit output up to the first few input lines"}, 86 {"gbm", "gbm [good...] [bad...] [meh...]\n\nANSI-style lines by regex-matches"}, 87 {"help", "help\n\nShow a help message about this app"}, 88 { 89 "iavoid", 90 "iavoid [regexes...]\n\n" 91 "Ignore all lines case-insensitively matching any of the JS-style regular\n" 92 "expressions given", 93 }, 94 { 95 "idrop", 96 "idrop [what...]\n\n" 97 "Ignore all case-insensitively-matched occurrences of all the regular\n" 98 "expressions given, in the order given", 99 }, 100 { 101 "imatch", 102 "imatch [regexes...]\n\n" 103 "Only keep lines case-insensitively matching any of the JS-style regular\n" 104 "expressions given", 105 }, 106 {"last", "last [lines = 1]\n\nLimit output up to the last few input lines"}, 107 {"leak", "leak\n\nHelp debug pipes, by copying input lines to stderr and stdout"}, 108 { 109 "lines", 110 "lines\n\n" 111 "Ignore trailing carriage returns, making sure the last line ends\n" 112 "with a line-feed, whether the input ended with one or not", 113 }, 114 { 115 "lineup", 116 "lineup [max = 0]\n\n" 117 "Join lines into TSV lines up to the number of items given; when\n" 118 "not given a max value, or when given 0 or a negative value, join\n" 119 "all input lines into a single TSV output line", 120 }, 121 {"links", "links\n\nFind all hyperlinks-like substrings from the input"}, 122 {"lower", "lower\n\nASCII-lowercase all input lines"}, 123 { 124 "match", 125 "match [regexes...]\n\n" 126 "Only keep lines case-sensitively matching any of the JS-style regular\n" 127 "expressions given", 128 }, 129 {"n", "n [start = 1]\n\nNumber all input lines"}, 130 {"nn", "nn\n\nNice Numbers makes long numbers easier to read via ANSI-styles"}, 131 {"nothing", "nothing\n\nRead/write nothing, effectively doing nothing"}, 132 {"plain", "plain\n\nIgnore all ANSI-style sequences"}, 133 {"primes", "primes [count]\n\nShow the first few prime numbers"}, 134 {"size", "size\n\nCount input bytes"}, 135 {"skip", "skip [lines = 1]\n\nIgnore the first few input lines"}, 136 {"skiplast", "skiplast [lines = 1]\n\nIgnore the last few input lines"}, 137 {"trim", "trim\n\nIgnore leading/trailing spaces on all lines"}, 138 {"trimend", "trimend\n\nIgnore trailing spaces on all lines"}, 139 {"unique", "unique\n\nAvoid emitting the same input line more than once"}, 140 }; 141 142 int make_regexes(int argc, char** argv, vector<regex>& res, bool case_sens) { 143 size_t errors = 0; 144 const auto dialect = regex_constants::ECMAScript; 145 const auto options = dialect | (case_sens ? 0 : regex_constants::icase); 146 147 for (int i = 1; i < argc; i++) { 148 try { 149 regex expr(argv[i], options); 150 res.push_back(expr); 151 } catch (regex_error& e) { 152 const auto es = error_style; 153 cerr << es << e.code() << ": " << e.what() << "\x1b[0m" << endl; 154 errors++; 155 } 156 } 157 158 return errors; 159 } 160 161 uint64_t count_runes(string& s) { 162 uint64_t n = 0; 163 for (auto c : s) { 164 n += ((c & 0xc0) != 0x80); 165 } 166 return n; 167 } 168 169 void de_bom(string &s) { 170 // s.starts_with("\xef\xbb\xbf") 171 if (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') { 172 s.erase(0, 3); 173 } 174 } 175 176 void no_cr(string &s) { 177 s.erase(remove(s.begin(), s.end(), '\r'), s.end()); 178 } 179 180 istream& get_unix_line(istream& src, string& dest) { 181 getline(src, dest); 182 no_cr(dest); 183 return src; 184 } 185 186 void left_trim(string &s) { 187 auto trimmed = find_if(s.begin(), s.end(), [](char c) { 188 return !isspace<char>(c, locale::classic()); 189 }); 190 s.erase(s.begin(), trimmed); 191 } 192 193 bool matches_any(string& s, vector<regex>& expressions) { 194 for (regex e : expressions) { 195 if (regex_search(s, e)) { 196 return true; 197 } 198 } 199 return false; 200 } 201 202 void remove_all(string& dest, char* ignore) { 203 auto len = strlen(ignore); 204 auto pos = string::npos; 205 while (1) { 206 pos = dest.find(ignore); 207 if (pos == string::npos) { 208 return; 209 } 210 dest.erase(pos, len); 211 } 212 } 213 214 void right_trim(string &s) { 215 auto trimmed = find_if(s.rbegin(), s.rend(), [](char c) { 216 return !isspace<char>(c, locale::classic()); 217 }); 218 s.erase(trimmed.base(), s.end()); 219 } 220 221 void show_error(string msg) { 222 cerr << error_style << msg << "\x1b[0m" << endl; 223 } 224 225 inline int no_args_expected() { 226 show_error("no args expected"); 227 return 1; 228 } 229 230 /* 231 The next few funcs allow changing the auto-flushing/line-buffering stdout 232 behavior across tools. 233 */ 234 235 inline void output_line(const char* s) { 236 cout << s << endl; 237 } 238 239 inline void output_line(uint64_t n) { 240 cout << n << endl; 241 } 242 243 inline void output_line(const string& s) { 244 cout << s << endl; 245 } 246 247 inline void output_styled_line(const string& style, string &line) { 248 cout << style << line << "\x1b[0m" << endl; 249 } 250 251 inline void end_output_line() { 252 cout << endl; 253 } 254 255 // args emits each string given to it on its own output line, ignoring any 256 // input 257 int args(int argc, char** argv) { 258 for (int i = 1; i < argc; i++) { 259 output_line(argv[i]); 260 } 261 return 0; 262 } 263 264 int avoid(int argc, char** argv) { 265 vector<regex> expressions; 266 size_t errors = make_regexes(argc, argv, expressions, true); 267 if (errors > 0) { 268 return 1; 269 } 270 271 string line; 272 while (getline(cin, line)) { 273 if (!matches_any(line, expressions)) { 274 output_line(line); 275 if (cout.eof()) { 276 return 0; 277 } 278 } 279 } 280 return 0; 281 } 282 283 // begin emits the few strings given as their own lines, before emitting back 284 // all input lines 285 int begin(int argc, char** argv) { 286 for (int i = 1; i < argc; i++) { 287 output_line(argv[i]); 288 } 289 290 string line; 291 while (getline(cin, line)) { 292 output_line(line); 293 if (cout.eof()) { 294 return 0; 295 } 296 } 297 return 0; 298 } 299 300 // begin_tsv emits a line of tab-separated values (TSV), before emitting back 301 // all input lines 302 int begin_tsv(int argc, char** argv) { 303 for (int i = 1; i < argc; i++) { 304 if (i > 1) { 305 cout << '\t'; 306 } 307 cout << argv[i]; 308 } 309 if (argc > 1) { 310 end_output_line(); 311 } 312 313 string line; 314 while (getline(cin, line)) { 315 output_line(line); 316 if (cout.eof()) { 317 return 0; 318 } 319 } 320 return 0; 321 } 322 323 // bh (breathe header) adds an extra empty line after the first/header line, 324 // and then adds a single extra empty line every few 325 int bh(int argc, char** argv) { 326 string line; 327 int64_t every = default_breathe_period; 328 329 if (argc > 1) { 330 try { 331 every = stol(argv[1]); 332 } catch (...) { } 333 } 334 335 if (!getline(cin, line)) { 336 return 0; 337 } 338 output_line(line); 339 340 auto i = 0; 341 while (getline(cin, line)) { 342 if (every > 0 && i % every == 0) { 343 end_output_line(); 344 } 345 346 output_line(line); 347 if (cout.eof()) { 348 return 0; 349 } 350 351 i++; 352 } 353 354 return 0; 355 } 356 357 358 // bl (breathe lines) adds single extra empty lines every few 359 int bl(int argc, char** argv) { 360 string line; 361 int64_t every = default_breathe_period; 362 363 if (argc > 1) { 364 try { 365 every = stol(argv[1]); 366 } catch (...) { } 367 } 368 369 auto i = 0; 370 while (getline(cin, line)) { 371 if (every > 0 && i % every == 0 && i > 0) { 372 end_output_line(); 373 } 374 375 output_line(line); 376 if (cout.eof()) { 377 return 0; 378 } 379 380 i++; 381 } 382 383 return 0; 384 } 385 386 // blow blows/expands tabs into runs of spaces 387 int blow(int argc, char** argv) { 388 string line; 389 int64_t tabstop = default_tabstop; 390 391 if (argc > 1) { 392 try { 393 tabstop = stol(argv[1]); 394 } catch (...) { } 395 } 396 397 if (tabstop < 1) { 398 while (getline(cin, line)) { 399 output_line(line); 400 if (cout.eof()) { 401 return 0; 402 } 403 } 404 return 0; 405 } 406 407 while (getline(cin, line)) { 408 int64_t i = 0; 409 410 for (auto c : line) { 411 if (c != '\t') { 412 i += ((c & 0xc0) != 0x80); 413 cout << c; 414 continue; 415 } 416 417 auto spaces = tabstop - i % tabstop; 418 i += spaces; 419 for (auto j = 0; j < spaces; j++) { 420 cout << ' '; 421 } 422 } 423 424 end_output_line(); 425 } 426 427 return 0; 428 } 429 430 // book lays out input lines on 2 columns, the same way books do it 431 int book(int argc, char** argv) { 432 if (argc != 2) { 433 show_error("expected 1 argument"); 434 return 1; 435 } 436 437 uint64_t page_height = 0; 438 try { 439 page_height = stol(argv[1]); 440 } catch (...) { 441 show_error("invalid page-height"); 442 return 1; 443 } 444 445 if (page_height < 2) { 446 show_error("page height can't be less than 2"); 447 return 1; 448 } 449 450 string line; 451 vector<string> lines; 452 while (getline(cin, line)) { 453 lines.push_back(line); 454 } 455 456 auto nlines = lines.size(); 457 auto inner = page_height - 1; 458 459 auto side = 0; 460 uint64_t widths[2] = {0, 0}; 461 462 for (uint64_t i = 0; i < nlines; i += inner) { 463 for (uint64_t j = i; j < nlines && j < i + inner; j++) { 464 auto w = count_runes(lines[j]); 465 if (widths[side] < w) { 466 widths[side] = w; 467 } 468 } 469 470 side = 1 - side; 471 } 472 473 for (uint64_t i = 0; i < nlines; i += 2*inner) { 474 if (i > 0) { 475 auto rest = (widths[1] > 0) ? (3 + widths[1]) : 2; 476 auto maxw = widths[0] + rest; 477 for (uint64_t j = 0; j < maxw; j++) { 478 // cout << '-'; 479 cout << "·"; 480 } 481 end_output_line(); 482 } 483 484 for (auto j = i; j < nlines && j < i + inner; j++) { 485 auto l = lines[j]; 486 auto w = count_runes(l); 487 auto r = (j + inner < nlines) ? lines[j + inner] : string(); 488 489 cout << l; 490 if (widths[0] > w) { 491 for (uint64_t j = 0; j < widths[0] - w; j++) { 492 cout << ' '; 493 } 494 } 495 496 w = r.size(); 497 if (w == 0) { 498 // cout << " |"; 499 cout << " █"; 500 } else { 501 // cout << " | "; 502 cout << " █ "; 503 } 504 505 output_line(r); 506 if (cout.eof()) { 507 return 0; 508 } 509 } 510 } 511 512 return 0; 513 } 514 515 // chop_lf ignore the last line-feed from the input, if present 516 int chop_lf(int argc, char**) { 517 if (argc > 1) { 518 return no_args_expected(); 519 } 520 521 auto i = 0; 522 string line; 523 while (getline(cin, line)) { 524 if (i > 0) { 525 end_output_line(); 526 } 527 cout << line; 528 i++; 529 } 530 531 return 0; 532 } 533 534 // delay waits the given number of seconds before emitting back each line 535 // from the input 536 int delay(int argc, char** argv) { 537 double seconds = 1.0; 538 539 if (argc > 1) { 540 try { 541 seconds = stof(argv[1]); 542 } catch (...) { } 543 } 544 545 if (isnan(seconds) || isinf(seconds)) { 546 show_error("invalid number"); 547 return 1; 548 } 549 550 if (seconds < 0) { 551 seconds = 0; 552 } 553 554 string line; 555 auto ms = (int)(1000 * seconds); 556 557 while (getline(cin, line)) { 558 this_thread::sleep_for(chrono::milliseconds(ms)); 559 560 output_line(line); 561 if (cout.eof()) { 562 return 0; 563 } 564 } 565 566 return 0; 567 } 568 569 // drop ignores all occurrences of all the regular expressions given, in the 570 // order given; regex matches are case-sensitive 571 int drop(int argc, char** argv) { 572 vector<regex> expressions; 573 size_t errors = make_regexes(argc, argv, expressions, true); 574 if (errors > 0) { 575 return 1; 576 } 577 578 string line; 579 while (getline(cin, line)) { 580 for (regex e : expressions) { 581 if (!regex_search(line, e)) { 582 continue; 583 } 584 585 string s = regex_replace(line, e, ""); 586 line.clear(); 587 line.append(s); 588 } 589 590 output_line(line); 591 if (cout.eof()) { 592 return 0; 593 } 594 } 595 596 return 0; 597 } 598 599 // first limits input up to its first few lines 600 int first(int argc, char** argv) { 601 string line; 602 int64_t max = 1; 603 604 if (argc > 1) { 605 try { 606 max = stol(argv[1]); 607 } catch (...) { } 608 } 609 610 if (max < 1) { 611 return 0; 612 } 613 614 auto i = 0; 615 while (getline(cin, line)) { 616 output_line(line); 617 if (cout.eof()) { 618 return 0; 619 } 620 i++; 621 622 if (i >= max) { 623 return 0; 624 } 625 } 626 627 return 0; 628 } 629 630 // gbm (good, bad, meh) ANSI-styles/colors lines by regex-matching 631 int gbm(int argc, char** argv) { 632 string line; 633 regex good, bad, meh; 634 635 const string good_style = green_style; 636 const string bad_style = red_style; 637 const string meh_style = gray_style; 638 639 switch (argc) { 640 case 2: 641 good = regex(argv[1]); 642 643 while (getline(cin, line)) { 644 if (cout.eof()) { 645 return 0; 646 } 647 648 if (regex_search(line, good)) { 649 output_styled_line(good_style, line); 650 continue; 651 } 652 653 output_line(line); 654 } 655 656 return 0; 657 658 case 3: 659 good = regex(argv[1]); 660 bad = regex(argv[2]); 661 662 while (getline(cin, line)) { 663 if (cout.eof()) { 664 return 0; 665 } 666 667 if (regex_search(line, good)) { 668 output_styled_line(good_style, line); 669 continue; 670 } 671 if (regex_search(line, bad)) { 672 output_styled_line(bad_style, line); 673 continue; 674 } 675 676 output_line(line); 677 } 678 679 return 0; 680 681 case 4: 682 good = regex(argv[1]); 683 bad = regex(argv[2]); 684 meh = regex(argv[3]); 685 686 while (getline(cin, line)) { 687 if (cout.eof()) { 688 return 0; 689 } 690 691 if (regex_search(line, good)) { 692 output_styled_line(good_style, line); 693 continue; 694 } 695 if (regex_search(line, bad)) { 696 output_styled_line(bad_style, line); 697 continue; 698 } 699 if (regex_search(line, meh)) { 700 output_styled_line(meh_style, line); 701 continue; 702 } 703 704 output_line(line); 705 } 706 707 return 0; 708 709 default: 710 show_error("only up to 3 args are supported"); 711 return 1; 712 } 713 } 714 715 extern map<string, string> tool_aliases; 716 extern map<string, int(*)(int, char**)> name2tool; 717 718 // help_general handles the no-arguments case for func help 719 int help_general(int, char**) { 720 output_line("minibox [tool] [args...]"); 721 end_output_line(); 722 output_line("This is a busybox/toybox-like app which has several small"); 723 output_line("tools in it."); 724 725 vector<string> keys; 726 for (auto kv : tool_aliases) { 727 keys.push_back(kv.first); 728 } 729 sort(keys.begin(), keys.end()); 730 731 end_output_line(); 732 end_output_line(); 733 output_line("Aliases"); 734 end_output_line(); 735 736 for (auto k : keys) { 737 cout << k << '\t' << tool_aliases[k] << endl; 738 } 739 740 keys.clear(); 741 for (auto kv : name2tool) { 742 keys.push_back(kv.first); 743 } 744 sort(keys.begin(), keys.end()); 745 746 end_output_line(); 747 end_output_line(); 748 output_line("Tools"); 749 750 for (auto k : keys) { 751 end_output_line(); 752 end_output_line(); 753 output_line(name2help[k]); 754 if (cout.eof()) { 755 return 0; 756 } 757 } 758 759 return 0; 760 } 761 762 int help(int argc, char** argv) { 763 if (argc <= 1) { 764 return help_general(argc, argv); 765 } 766 767 auto nerr = 0; 768 769 for (auto i = 1; i < argc; i++) { 770 auto name = argv[i]; 771 if (name2help.find(name) == name2help.end()) { 772 stringstream msg; 773 msg << name << ": no help message found"; 774 show_error(msg.str()); 775 continue; 776 } 777 778 if (i > 1) { 779 end_output_line(); 780 end_output_line(); 781 } 782 output_line(name2help[name]); 783 } 784 785 if (nerr > 0) { 786 return 1; 787 } 788 return 0; 789 } 790 791 int iavoid(int argc, char** argv) { 792 vector<regex> expressions; 793 size_t errors = make_regexes(argc, argv, expressions, false); 794 if (errors > 0) { 795 return 1; 796 } 797 798 string line; 799 while (getline(cin, line)) { 800 if (!matches_any(line, expressions)) { 801 output_line(line); 802 if (cout.eof()) { 803 return 0; 804 } 805 } 806 } 807 return 0; 808 } 809 810 // idrop ignores all case-insensitively-matched occurrences of all the regular 811 // expressions given, in the order given 812 int idrop(int argc, char** argv) { 813 vector<regex> expressions; 814 size_t errors = make_regexes(argc, argv, expressions, false); 815 if (errors > 0) { 816 return 1; 817 } 818 819 string line; 820 while (getline(cin, line)) { 821 for (regex e : expressions) { 822 if (!regex_search(line, e)) { 823 continue; 824 } 825 826 string s = regex_replace(line, e, ""); 827 line.clear(); 828 line.append(s); 829 } 830 831 output_line(line); 832 if (cout.eof()) { 833 return 0; 834 } 835 } 836 837 return 0; 838 } 839 840 int imatch(int argc, char** argv) { 841 if (argc == 1) { 842 return 0; 843 } 844 845 vector<regex> expressions; 846 size_t errors = make_regexes(argc, argv, expressions, false); 847 if (errors > 0) { 848 return 1; 849 } 850 851 string line; 852 while (getline(cin, line)) { 853 if (matches_any(line, expressions)) { 854 output_line(line); 855 if (cout.eof()) { 856 return 0; 857 } 858 } 859 } 860 return 0; 861 } 862 863 // last limits input up to its last few lines 864 int last(int argc, char** argv) { 865 string line; 866 vector<string> latest; 867 int64_t max = 1; 868 869 if (argc > 1) { 870 try { 871 max = stol(argv[1]); 872 } catch (...) { } 873 } 874 875 if (max < 1) { 876 return 0; 877 } 878 879 size_t i = 0; 880 while (getline(cin, line)) { 881 if ((int64_t)latest.size() < max) { 882 latest.push_back(line); 883 } else { 884 latest[i] = line; 885 } 886 i = (i + 1) % max; 887 } 888 889 for (size_t j = i; j < latest.size(); j++) { 890 output_line(latest[j]); 891 if (cout.eof()) { 892 return 0; 893 } 894 } 895 for (size_t j = 0; j < i; j++) { 896 output_line(latest[j]); 897 if (cout.eof()) { 898 return 0; 899 } 900 } 901 return 0; 902 } 903 904 // leak helps debug pipes, by copying all input lines both to stderr and to 905 // stdout 906 int leak(int argc, char**) { 907 if (argc > 1) { 908 return no_args_expected(); 909 } 910 911 string line; 912 while (getline(cin, line)) { 913 cerr << line << endl; 914 output_line(line); 915 if (cout.eof()) { 916 return 0; 917 } 918 } 919 return 0; 920 } 921 922 // lines ignores trailing carriage-returns from input lines, and ensures the 923 // last line ends with a line-feed, whether the input had that or not 924 int lines(int argc, char**) { 925 if (argc > 1) { 926 return no_args_expected(); 927 } 928 929 string line; 930 if (!get_unix_line(cin, line)) { 931 return 0; 932 } 933 de_bom(line); 934 output_line(line); 935 936 while (get_unix_line(cin, line)) { 937 output_line(line); 938 if (cout.eof()) { 939 return 0; 940 } 941 } 942 return 0; 943 } 944 945 // line_up joins input lines via tabs, up to the number given: whenever that 946 // number is exceeded, a new output line starts; when not given a number, or 947 // when that number is 0 or negative, all input lines are tab-joined into a 948 // single output line 949 int line_up(int argc, char** argv) { 950 string line; 951 int64_t max = 0; 952 953 if (argc > 1) { 954 try { 955 max = stol(argv[1]); 956 } catch (...) { } 957 } 958 959 auto i = 0; 960 while (getline(cin, line)) { 961 if (i >= max && max > 0) { 962 i = 0; 963 end_output_line(); 964 } else if (i > 0) { 965 cout << '\t'; 966 if (cout.eof()) { 967 return 0; 968 } 969 } 970 971 cout << line; 972 i++; 973 } 974 975 if (i > 0) { 976 end_output_line(); 977 } 978 return 0; 979 } 980 981 // links gets all hyperlink-type substrings from the input, each match shown 982 // on its own output line 983 int links(int argc, char**) { 984 if (argc > 1) { 985 return no_args_expected(); 986 } 987 988 string line; 989 auto pat = regex("https?://[A-Za-z0-9+_.:%-]+(/[A-Za-z0-9+_.%/,#?&=-]*)*"); 990 991 while (getline(cin, line)) { 992 auto iter = sregex_iterator(line.begin(), line.end(), pat); 993 for (; iter != sregex_iterator(); iter++) { 994 output_line(iter->str()); 995 if (cout.eof()) { 996 return 0; 997 } 998 } 999 } 1000 1001 return 0; 1002 } 1003 1004 // lower ASCII-lowercases all symbols in all lines 1005 int lower(int argc, char**) { 1006 if (argc > 1) { 1007 return no_args_expected(); 1008 } 1009 1010 string line; 1011 while (getline(cin, line)) { 1012 transform(line.begin(), line.end(), line.begin(), [](char c) { 1013 return tolower(c); 1014 }); 1015 1016 output_line(line); 1017 if (cout.eof()) { 1018 return 0; 1019 } 1020 } 1021 1022 return 0; 1023 } 1024 1025 int match(int argc, char** argv) { 1026 if (argc == 1) { 1027 return 0; 1028 } 1029 1030 vector<regex> expressions; 1031 size_t errors = make_regexes(argc, argv, expressions, true); 1032 if (errors > 0) { 1033 return 1; 1034 } 1035 1036 string line; 1037 while (getline(cin, line)) { 1038 if (matches_any(line, expressions)) { 1039 output_line(line); 1040 if (cout.eof()) { 1041 return 0; 1042 } 1043 } 1044 } 1045 return 0; 1046 } 1047 1048 // n numbers lines using the optional starting counter given, which is 1 by 1049 // default; each output line starts with the current counter, followed by a 1050 // tab, ending with the original input line 1051 int n(int argc, char** argv) { 1052 string line; 1053 int64_t n = 1; 1054 1055 if (argc > 1) { 1056 try { 1057 n = stol(argv[1]); 1058 } catch (...) { } 1059 } 1060 1061 while (getline(cin, line)) { 1062 cout << n << '\t' << line << endl; 1063 n++; 1064 } 1065 1066 return 0; 1067 } 1068 1069 // style_digits helps func nn do its job 1070 void style_digits(string digits) { 1071 auto l = digits.length(); 1072 if (l < 4) { 1073 cout << digits; 1074 return; 1075 } 1076 1077 auto alt = false; 1078 auto n = 3 - (l % 3); 1079 1080 for (auto c : digits) { 1081 cout << c; 1082 n++; 1083 if (n % 3 == 0) { 1084 alt = !alt; 1085 cout << (alt ? gray_style : "\x1b[0m"); 1086 } 1087 } 1088 1089 if (alt) { 1090 cout << "\x1b[0m"; 1091 } 1092 } 1093 1094 // nn stands for `nice numbers`, alternating ANSI-styles for long-enough runs 1095 // of digits, the result being easier to read/parse visually, especially on 1096 // output full of such long numbers, such as with tables 1097 int nn(int argc, char**) { 1098 if (argc > 1) { 1099 return no_args_expected(); 1100 } 1101 1102 string line; 1103 stringstream digits; 1104 auto in_digits = false; 1105 1106 while (getline(cin, line)) { 1107 for (auto c : line) { 1108 auto is_digit = '0' <= c && c <= '9'; 1109 1110 if (in_digits) { 1111 if (!is_digit) { 1112 in_digits = false; 1113 style_digits(digits.str()); 1114 digits.str(string()); 1115 cout << c; 1116 } else{ 1117 digits << c; 1118 } 1119 continue; 1120 } 1121 1122 if (is_digit) { 1123 in_digits = true; 1124 digits << c; 1125 continue; 1126 } 1127 1128 cout << c; 1129 } 1130 1131 if (in_digits) { 1132 in_digits = false; 1133 style_digits(digits.str()); 1134 digits.str(string()); 1135 } 1136 1137 end_output_line(); 1138 if (cout.eof()) { 1139 return 0; 1140 } 1141 } 1142 1143 return 0; 1144 } 1145 1146 // nothing reads nothing and writes, effectively doing nothing 1147 int nothing(int argc, char**) { 1148 if (argc > 1) { 1149 return no_args_expected(); 1150 } 1151 return 0; 1152 } 1153 1154 // plain ignores all ANSI-style sequences, leaving actual `plain` plain-text 1155 int plain(int argc, char**) { 1156 if (argc > 1) { 1157 return no_args_expected(); 1158 } 1159 1160 string line; 1161 1162 while (getline(cin, line)) { 1163 int state = 0; 1164 for (auto c : line) { 1165 switch (state) { 1166 case 0: 1167 if (c == '\x1b') { 1168 state = 1; 1169 } else { 1170 cout << c; 1171 } 1172 break; 1173 1174 case 1: 1175 if (c == '[') { 1176 state = 2; 1177 } else { 1178 cout << '\x1b'; 1179 cout << c; 1180 state = 1; 1181 } 1182 break; 1183 1184 case 2: 1185 if ('A' <= c && c <= 'Z') { 1186 state = 0; 1187 } else if ('a' <= c && c <= 'z') { 1188 state = 0; 1189 } 1190 break; 1191 } 1192 } 1193 1194 end_output_line(); 1195 if (cout.eof()) { 1196 return 0; 1197 } 1198 } 1199 1200 return 0; 1201 } 1202 1203 // primes shows the first few prime numbers, one per output line 1204 int primes(int argc, char** argv) { 1205 uint64_t count = 1000000; 1206 1207 if (argc > 1) { 1208 try { 1209 count = stol(argv[1]); 1210 } catch (...) { } 1211 } 1212 1213 for (uint64_t n = 3; count > 0; n += 2) { 1214 uint64_t max = sqrt((double)n); 1215 for (uint64_t div = 3; div <= max; div += 2) { 1216 if (n % div == 0) { 1217 goto skip; 1218 } 1219 } 1220 1221 output_line(n); 1222 if (cout.eof()) { 1223 return 0; 1224 } 1225 count--; 1226 1227 skip:; 1228 } 1229 1230 return 0; 1231 } 1232 1233 // skip ignores up to the given number of input lines, then emitting the rest 1234 int skip(int argc, char** argv) { 1235 string line; 1236 int64_t skip = 1; 1237 1238 if (argc > 1) { 1239 try { 1240 skip = stol(argv[1]); 1241 } catch (...) { } 1242 } 1243 1244 auto i = 0; 1245 while (getline(cin, line)) { 1246 if (i < skip) { 1247 i++; 1248 continue; 1249 } 1250 1251 output_line(line); 1252 if (cout.eof()) { 1253 return 0; 1254 } 1255 i++; 1256 } 1257 1258 return 0; 1259 } 1260 1261 // skip_last emits all but the last few input lines 1262 int skip_last(int argc, char** argv) { 1263 string line; 1264 vector<string> latest; 1265 int64_t max = 1; 1266 1267 if (argc > 1) { 1268 try { 1269 max = stol(argv[1]); 1270 } catch (...) { } 1271 } 1272 1273 if (max < 1) { 1274 while (getline(cin, line)) { 1275 output_line(line); 1276 if (cout.eof()) { 1277 return 0; 1278 } 1279 } 1280 return 0; 1281 } 1282 1283 size_t i = 0; 1284 while (getline(cin, line)) { 1285 if ((int64_t)latest.size() < max) { 1286 latest.push_back(line); 1287 } else { 1288 output_line(latest[i]); 1289 if (cout.eof()) { 1290 return 0; 1291 } 1292 latest[i] = line; 1293 } 1294 1295 i = (i + 1) % max; 1296 } 1297 1298 return 0; 1299 } 1300 1301 int size(int argc, char**) { 1302 if (argc > 1) { 1303 return no_args_expected(); 1304 } 1305 1306 uint64_t n = 0; 1307 freopen(0, "rb", stdin); 1308 while (getchar_unlocked() != EOF) { 1309 n++; 1310 } 1311 output_line(n); 1312 return 0; 1313 } 1314 1315 // teletype simulates the cadence of old teletype machines 1316 int teletype(int argc, char**) { 1317 if (argc > 1) { 1318 return no_args_expected(); 1319 } 1320 1321 string line; 1322 while (getline(cin, line)) { 1323 auto l = line.size(); 1324 if (l == 0) { 1325 end_output_line(); 1326 continue; 1327 } 1328 1329 end_output_line(); 1330 } 1331 1332 return 0; 1333 } 1334 1335 // trim ignores leading/trailing spaces in all lines 1336 int trim(int argc, char**) { 1337 if (argc > 1) { 1338 return no_args_expected(); 1339 } 1340 1341 string line; 1342 while (getline(cin, line)) { 1343 // no_cr(line); 1344 left_trim(line); 1345 right_trim(line); 1346 1347 output_line(line); 1348 if (cout.eof()) { 1349 return 0; 1350 } 1351 } 1352 1353 return 0; 1354 } 1355 1356 // trim_end ignores trailing spaces in all lines 1357 int trim_end(int argc, char**) { 1358 if (argc > 1) { 1359 return no_args_expected(); 1360 } 1361 1362 string line; 1363 while (getline(cin, line)) { 1364 // no_cr(line); 1365 right_trim(line); 1366 1367 output_line(line); 1368 if (cout.eof()) { 1369 return 0; 1370 } 1371 } 1372 1373 return 0; 1374 } 1375 1376 // unique avoids emitting the same input line more than once 1377 int unique(int argc, char**) { 1378 if (argc > 1) { 1379 return no_args_expected(); 1380 } 1381 1382 string line; 1383 set<string> seen; 1384 1385 while (getline(cin, line)) { 1386 if (seen.find(line) != seen.end()) { 1387 continue; 1388 } 1389 1390 output_line(line); 1391 if (cout.eof()) { 1392 return 0; 1393 } 1394 seen.insert(line); 1395 } 1396 1397 return 0; 1398 } 1399 1400 map<string, string> tool_aliases = { 1401 {"arguments", "args"}, 1402 {"blowtabs", "blow"}, 1403 {"breathe", "bl"}, 1404 {"breatheheader", "bh"}, 1405 {"breathelines", "bl"}, 1406 {"butlast", "skiplast"}, 1407 {"catl", "lines"}, 1408 {"countbytes", "size"}, 1409 {"dedup", "unique"}, 1410 {"dropall", "drop"}, 1411 {"erase", "drop"}, 1412 {"eraseall", "drop"}, 1413 {"expand", "blow"}, 1414 {"expandtabs", "blow"}, 1415 {"final", "last"}, 1416 {"idropall", "idrop"}, 1417 {"ierase", "idrop"}, 1418 {"ieraseall", "idrop"}, 1419 {"nil", "nothing"}, 1420 {"null", "nothing"}, 1421 {"pretsv", "begintsv"}, 1422 {"rstrip", "trimend"}, 1423 {"rtrim", "trimend"}, 1424 {"skipfinal", "skiplast"}, 1425 {"strip", "trim"}, 1426 {"stripend", "trimend"}, 1427 {"stripspace", "trim"}, 1428 {"stripspaces", "trim"}, 1429 {"striptrail", "trimend"}, 1430 {"striptrails", "trimend"}, 1431 {"trimspace", "trim"}, 1432 {"trimspaces", "trim"}, 1433 {"trimtrail", "trimend"}, 1434 {"trimtrails", "trimend"}, 1435 {"unixify", "lines"}, 1436 }; 1437 1438 map<string, int(*)(int, char**)> name2tool = { 1439 {"args", args}, 1440 {"avoid", avoid}, 1441 {"begin", begin}, 1442 {"begintsv", begin_tsv}, 1443 {"bh", bh}, 1444 {"bl", bl}, 1445 {"blow", blow}, 1446 {"book", book}, 1447 {"choplf", chop_lf}, 1448 {"delay", delay}, 1449 {"drop", drop}, 1450 {"first", first}, 1451 {"gbm", gbm}, 1452 {"help", help}, 1453 {"iavoid", iavoid}, 1454 {"idrop", idrop}, 1455 {"imatch", imatch}, 1456 {"last", last}, 1457 {"leak", leak}, 1458 {"lines", lines}, 1459 {"lineup", line_up}, 1460 {"links", links}, 1461 {"lower", lower}, 1462 {"match", match}, 1463 {"n", n}, 1464 {"nn", nn}, 1465 {"nothing", nothing}, 1466 {"plain", plain}, 1467 {"primes", primes}, 1468 {"skip", skip}, 1469 {"skiplast", skip_last}, 1470 {"size", size}, 1471 {"trim", trim}, 1472 {"trimend", trim_end}, 1473 {"unique", unique}, 1474 }; 1475 1476 int main(int argc, char** argv) { 1477 cin.tie(NULL); 1478 ios_base::sync_with_stdio(false); 1479 1480 argc--; 1481 argv++; 1482 1483 if (argc < 1) { 1484 help(argc, argv); 1485 return 0; 1486 } 1487 1488 string key; 1489 string name = argv[0]; 1490 key.append(name); 1491 key.erase(remove(key.begin(), key.end(), '-'), key.end()); 1492 key.erase(remove(key.begin(), key.end(), '_'), key.end()); 1493 1494 if (tool_aliases.find(name) != tool_aliases.end()) { 1495 key = tool_aliases[name]; 1496 } 1497 if (name2tool.find(key) == name2tool.end()) { 1498 stringstream msg; 1499 msg << name << ": no such tool"; 1500 show_error(msg.str()); 1501 return 1; 1502 } 1503 1504 try { 1505 auto tool = name2tool[key]; 1506 return tool(argc, argv); 1507 } catch (...) { 1508 show_error("generic error"); 1509 return 1; 1510 } 1511 }