File: bitdump.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./bitdump ./bitdump.c
  29 
  30 Building with TSV defined makes `bitdump` emit lines of tab-separated values,
  31 instead of space-separated values. You can do that by running
  32 
  33 cc -Wall -s -O2 -D TSV -o ./bitdump ./bitdump.c
  34 */
  35 
  36 #include <fcntl.h>
  37 #include <stdbool.h>
  38 #include <stdio.h>
  39 #include <string.h>
  40 #include <sys/stat.h>
  41 
  42 #ifdef _WIN32
  43 #include <windows.h>
  44 #endif
  45 
  46 // info is the multi-line help message
  47 const char* info = ""
  48 "bitdump [options...] [filenames...]\n"
  49 "\n"
  50 "Show all bits for all input bytes, starting each output line with the\n"
  51 "leading byte's offset.\n"
  52 "\n"
  53 "\n"
  54 "Options\n"
  55 "\n"
  56 "    -h, --h            show this help message\n"
  57 "    -help, --help      aliases for option -h\n"
  58 "";
  59 
  60 // ITEM_SEP is the item separator inserted between all values in output lines
  61 #ifdef TSV
  62 #define ITEM_SEP '\t'
  63 #else
  64 #define ITEM_SEP ' '
  65 #endif
  66 
  67 // EMIT_CONST abstracts emitting string constants without their final null byte
  68 #define EMIT_CONST(w, x) fwrite(x, sizeof(x) - 1, 1, w)
  69 
  70 inline void write_bytes(FILE* w, const unsigned char* src, size_t len) {
  71     fwrite(src, len, 1, w);
  72 }
  73 
  74 /*
  75 tlp = '(f"{bin(v)[2:]:>08}" for v in range(256))' | lineup 8 | gsub '\t' |
  76     tlp 'f"    \"{l}\""'
  77 */
  78 const unsigned char lookup[256 * 8] = ""
  79     "0000000000000001000000100000001100000100000001010000011000000111"
  80     "0000100000001001000010100000101100001100000011010000111000001111"
  81     "0001000000010001000100100001001100010100000101010001011000010111"
  82     "0001100000011001000110100001101100011100000111010001111000011111"
  83     "0010000000100001001000100010001100100100001001010010011000100111"
  84     "0010100000101001001010100010101100101100001011010010111000101111"
  85     "0011000000110001001100100011001100110100001101010011011000110111"
  86     "0011100000111001001110100011101100111100001111010011111000111111"
  87     "0100000001000001010000100100001101000100010001010100011001000111"
  88     "0100100001001001010010100100101101001100010011010100111001001111"
  89     "0101000001010001010100100101001101010100010101010101011001010111"
  90     "0101100001011001010110100101101101011100010111010101111001011111"
  91     "0110000001100001011000100110001101100100011001010110011001100111"
  92     "0110100001101001011010100110101101101100011011010110111001101111"
  93     "0111000001110001011100100111001101110100011101010111011001110111"
  94     "0111100001111001011110100111101101111100011111010111111001111111"
  95     "1000000010000001100000101000001110000100100001011000011010000111"
  96     "1000100010001001100010101000101110001100100011011000111010001111"
  97     "1001000010010001100100101001001110010100100101011001011010010111"
  98     "1001100010011001100110101001101110011100100111011001111010011111"
  99     "1010000010100001101000101010001110100100101001011010011010100111"
 100     "1010100010101001101010101010101110101100101011011010111010101111"
 101     "1011000010110001101100101011001110110100101101011011011010110111"
 102     "1011100010111001101110101011101110111100101111011011111010111111"
 103     "1100000011000001110000101100001111000100110001011100011011000111"
 104     "1100100011001001110010101100101111001100110011011100111011001111"
 105     "1101000011010001110100101101001111010100110101011101011011010111"
 106     "1101100011011001110110101101101111011100110111011101111011011111"
 107     "1110000011100001111000101110001111100100111001011110011011100111"
 108     "1110100011101001111010101110101111101100111011011110111011101111"
 109     "1111000011110001111100101111001111110100111101011111011011110111"
 110     "1111100011111001111110101111101111111100111111011111111011111111"
 111     "";
 112 
 113 // write_bin is faster than calling fprintf(w, "%08b", b): this matters
 114 // because it's called for every input byte
 115 inline void write_bin(FILE* w, unsigned char b) {
 116     const void* ptr = &lookup[8 * b];
 117     fwrite(ptr, 8, 1, w);
 118 }
 119 
 120 void write_decimal_uint(FILE* w, size_t n) {
 121     if (n < 1) {
 122         EMIT_CONST(w, "00000000");
 123         return;
 124     }
 125 
 126     size_t digits;
 127     // 20 is the most digits unsigned 64-bit ints can ever need
 128     unsigned char buf[24];
 129     for (digits = 0; n > 0; digits++, n /= 10) {
 130         buf[sizeof(buf) - 1 - digits] = (n % 10) + '0';
 131     }
 132 
 133     // left-pad the coming digits up to 8 chars
 134     if (digits < 8) {
 135         write_bytes(w, (unsigned char*)"00000000", 8 - digits);
 136     }
 137 
 138     // emit all digits
 139     unsigned char* start = buf + sizeof(buf) - digits;
 140     write_bytes(w, start, digits);
 141 }
 142 
 143 void write_hex_uint(FILE* w, size_t n) {
 144     if (n < 1) {
 145         EMIT_CONST(w, "00000000");
 146         return;
 147     }
 148 
 149     size_t digits;
 150     // 20 is the most digits unsigned 64-bit ints can ever need
 151     unsigned char buf[24];
 152     for (digits = 0; n > 0; digits += 2, n /= 256) {
 153         unsigned char b = n % 256;
 154         const char* hex_digits = "0123456789abcdef";
 155         buf[sizeof(buf) - 1 - digits - 1] = hex_digits[b >> 4];
 156         buf[sizeof(buf) - 1 - digits - 0] = hex_digits[b & 0x0f];
 157     }
 158 
 159     // left-pad the coming digits up to 8 chars
 160     if (digits < 8) {
 161         write_bytes(w, (unsigned char*)"00000000", 8 - digits);
 162     }
 163 
 164     // emit all digits
 165     unsigned char* start = buf + sizeof(buf) - digits;
 166     write_bytes(w, start, digits);
 167 }
 168 
 169 void decimal_offset(FILE* w, size_t offset) {
 170     write_decimal_uint(w, offset);
 171     putc(ITEM_SEP, w);
 172 }
 173 
 174 void hexadecimal_offset(FILE* w, size_t offset) {
 175     write_hex_uint(w, offset);
 176     putc(ITEM_SEP, w);
 177 }
 178 
 179 void no_offset(FILE*, size_t) {
 180 }
 181 
 182 // handle_reader shows all bytes read from the source given as colored hex
 183 // values, showing offsets and ASCII symbols on the sides of each output line
 184 void handle_reader(FILE* w, FILE* src, void (*start_row)(FILE*, size_t)) {
 185     const size_t bufcap = 32 * 1024;
 186     unsigned char buf[bufcap];
 187     size_t offset = 0;
 188 
 189     while (!feof(w)) {
 190         const size_t len = fread(&buf, sizeof(buf[0]), sizeof(buf), src);
 191         if (len < 1) {
 192             // assume input is over when no bytes were read
 193             if (offset > 0) {
 194                 putc('\n', w);
 195             }
 196             break;
 197         }
 198 
 199         for (size_t i = 0; i < len; i++, offset++) {
 200             const size_t rem = offset % 8;
 201             if (rem == 0) {
 202                 if (offset > 0) {
 203                     putc('\n', w);
 204                 }
 205                 start_row(w, offset);
 206             } else {
 207                 putc(ITEM_SEP, w);
 208             }
 209             write_bin(w, buf[i]);
 210         }
 211     }
 212 }
 213 
 214 // handle_file handles data from the filename given; returns false only when
 215 // the file can't be opened
 216 bool handle_file(FILE* w, const char* path, void (*start_row)(FILE*, size_t)) {
 217     FILE* f = fopen(path, "rb");
 218     if (f == NULL) {
 219         // ensure currently-buffered/deferred output shows up right now: not
 220         // doing so may scramble results in the common case where stdout and
 221         // stderr are the same, thus confusing users
 222         putc('\n', w);
 223 
 224         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 225         return false;
 226     }
 227 
 228     handle_reader(w, f, start_row);
 229 
 230     fclose(f);
 231     return true;
 232 }
 233 
 234 // is_help_option simplifies control-flow for func run
 235 bool is_help_option(char* s) {
 236     return (s[0] == '-') && (
 237         strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 ||
 238         strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0
 239     );
 240 }
 241 
 242 // run returns the number of errors
 243 int run(int argc, char** argv, FILE* w) {
 244     size_t files = 0;
 245     size_t errors = 0;
 246     const void (*start_row)() = decimal_offset;
 247 
 248     // handle all filenames/options given
 249     for (size_t i = 1; i < argc && !feof(w); i++) {
 250         // a `-` filename stands for the standard input
 251         if (argv[i][0] == '-' && argv[i][1] == 0) {
 252             handle_reader(w, stdin, start_row);
 253             continue;
 254         }
 255 
 256         if (is_help_option(argv[i])) {
 257             // help option quits the app right away
 258             fprintf(stderr, "%s", info);
 259             return 0;
 260         }
 261 
 262         if (files > 0) {
 263             // put an empty line between adjacent outputs
 264             putc('\n', w);
 265         }
 266 
 267         if (!handle_file(w, argv[i], start_row)) {
 268             errors++;
 269         }
 270         files++;
 271     }
 272 
 273     // no filenames means use stdin as the only input
 274     if (files == 0) {
 275         handle_reader(w, stdin, start_row);
 276     }
 277 
 278     return errors;
 279 }
 280 
 281 int main(int argc, char** argv) {
 282 #ifdef _WIN32
 283     setmode(fileno(stdin), O_BINARY);
 284     // ensure output lines end in LF instead of CRLF on windows
 285     setmode(fileno(stdout), O_BINARY);
 286     setmode(fileno(stderr), O_BINARY);
 287 #endif
 288 
 289     return run(argc, argv, stdout) == 0 ? 0 : 1;
 290 }