File: bitdump.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -o ./bitdump ./bitdump.c
  29 
  30 Building with TSV defined makes `bitdump` emit lines of tab-separated values,
  31 instead of space-separated values. You can do that by running
  32 
  33 cc -Wall -s -O2 -D TSV -o ./bitdump ./bitdump.c
  34 */
  35 
  36 #include <stdbool.h>
  37 #include <stdio.h>
  38 #include <string.h>
  39 
  40 #ifdef _WIN32
  41 #include <fcntl.h>
  42 #include <windows.h>
  43 #endif
  44 
  45 // info is the multi-line help message
  46 const char* info = ""
  47 "bitdump [options...] [filenames...]\n"
  48 "\n"
  49 "Show all bits for all input bytes, starting each output line with the\n"
  50 "leading byte's offset.\n"
  51 "\n"
  52 "\n"
  53 "Options\n"
  54 "\n"
  55 "    -h, --h            show this help message\n"
  56 "    -help, --help      aliases for option -h\n"
  57 "";
  58 
  59 // ITEM_SEP is the item separator inserted between all values in output lines
  60 #ifdef TSV
  61 #define ITEM_SEP '\t'
  62 #else
  63 #define ITEM_SEP ' '
  64 #endif
  65 
  66 // EMIT_CONST abstracts emitting string constants without their final null byte
  67 #define EMIT_CONST(w, x) fwrite(x, sizeof(x) - 1, 1, w)
  68 
  69 void write_bytes(FILE* w, const unsigned char* src, size_t len) {
  70     fwrite(src, len, 1, w);
  71 }
  72 
  73 /*
  74 tlp = '(f"{bin(v)[2:]:>08}" for v in range(256))' | lineup 8 | gsub '\t' |
  75     tlp 'f"    \"{l}\""'
  76 */
  77 const unsigned char lookup[256 * 8] = ""
  78     "0000000000000001000000100000001100000100000001010000011000000111"
  79     "0000100000001001000010100000101100001100000011010000111000001111"
  80     "0001000000010001000100100001001100010100000101010001011000010111"
  81     "0001100000011001000110100001101100011100000111010001111000011111"
  82     "0010000000100001001000100010001100100100001001010010011000100111"
  83     "0010100000101001001010100010101100101100001011010010111000101111"
  84     "0011000000110001001100100011001100110100001101010011011000110111"
  85     "0011100000111001001110100011101100111100001111010011111000111111"
  86     "0100000001000001010000100100001101000100010001010100011001000111"
  87     "0100100001001001010010100100101101001100010011010100111001001111"
  88     "0101000001010001010100100101001101010100010101010101011001010111"
  89     "0101100001011001010110100101101101011100010111010101111001011111"
  90     "0110000001100001011000100110001101100100011001010110011001100111"
  91     "0110100001101001011010100110101101101100011011010110111001101111"
  92     "0111000001110001011100100111001101110100011101010111011001110111"
  93     "0111100001111001011110100111101101111100011111010111111001111111"
  94     "1000000010000001100000101000001110000100100001011000011010000111"
  95     "1000100010001001100010101000101110001100100011011000111010001111"
  96     "1001000010010001100100101001001110010100100101011001011010010111"
  97     "1001100010011001100110101001101110011100100111011001111010011111"
  98     "1010000010100001101000101010001110100100101001011010011010100111"
  99     "1010100010101001101010101010101110101100101011011010111010101111"
 100     "1011000010110001101100101011001110110100101101011011011010110111"
 101     "1011100010111001101110101011101110111100101111011011111010111111"
 102     "1100000011000001110000101100001111000100110001011100011011000111"
 103     "1100100011001001110010101100101111001100110011011100111011001111"
 104     "1101000011010001110100101101001111010100110101011101011011010111"
 105     "1101100011011001110110101101101111011100110111011101111011011111"
 106     "1110000011100001111000101110001111100100111001011110011011100111"
 107     "1110100011101001111010101110101111101100111011011110111011101111"
 108     "1111000011110001111100101111001111110100111101011111011011110111"
 109     "1111100011111001111110101111101111111100111111011111111011111111"
 110     "";
 111 
 112 // write_bin is faster than calling fprintf(w, "%08b", b): this matters
 113 // because it's called for every input byte
 114 void write_bin(FILE* w, unsigned char b) {
 115     const void* ptr = &lookup[8 * b];
 116     fwrite(ptr, 8, 1, w);
 117 }
 118 
 119 void write_decimal_uint(FILE* w, size_t n) {
 120     if (n < 1) {
 121         EMIT_CONST(w, "00000000");
 122         return;
 123     }
 124 
 125     size_t digits;
 126     // 20 is the most digits unsigned 64-bit ints can ever need
 127     unsigned char buf[24];
 128     for (digits = 0; n > 0; digits++, n /= 10) {
 129         buf[sizeof(buf) - 1 - digits] = (n % 10) + '0';
 130     }
 131 
 132     // left-pad the coming digits up to 8 chars
 133     if (digits < 8) {
 134         write_bytes(w, (unsigned char*)"00000000", 8 - digits);
 135     }
 136 
 137     // emit all digits
 138     const unsigned char* start = buf + sizeof(buf) - digits;
 139     write_bytes(w, start, digits);
 140 }
 141 
 142 void write_hex_uint(FILE* w, size_t n) {
 143     if (n < 1) {
 144         EMIT_CONST(w, "00000000");
 145         return;
 146     }
 147 
 148     size_t digits;
 149     // 20 is the most digits unsigned 64-bit ints can ever need
 150     unsigned char buf[24];
 151     for (digits = 0; n > 0; digits += 2, n /= 256) {
 152         unsigned char b = n % 256;
 153         const char* hex_digits = "0123456789abcdef";
 154         buf[sizeof(buf) - 1 - digits - 1] = hex_digits[b >> 4];
 155         buf[sizeof(buf) - 1 - digits - 0] = hex_digits[b & 0x0f];
 156     }
 157 
 158     // left-pad the coming digits up to 8 chars
 159     if (digits < 8) {
 160         write_bytes(w, (unsigned char*)"00000000", 8 - digits);
 161     }
 162 
 163     // emit all digits
 164     const unsigned char* start = buf + sizeof(buf) - digits;
 165     write_bytes(w, start, digits);
 166 }
 167 
 168 void decimal_offset(FILE* w, size_t offset) {
 169     write_decimal_uint(w, offset);
 170     putc(ITEM_SEP, w);
 171 }
 172 
 173 void hexadecimal_offset(FILE* w, size_t offset) {
 174     write_hex_uint(w, offset);
 175     putc(ITEM_SEP, w);
 176 }
 177 
 178 void no_offset(FILE*, size_t) {
 179 }
 180 
 181 // handle_reader shows all bytes read from the source given as colored hex
 182 // values, showing offsets and ASCII symbols on the sides of each output line
 183 void handle_reader(FILE* w, FILE* src, void (*start_row)(FILE*, size_t)) {
 184     const size_t bufcap = 32 * 1024;
 185     unsigned char buf[bufcap];
 186     size_t offset = 0;
 187 
 188     while (!feof(w)) {
 189         const size_t len = fread(&buf, sizeof(buf[0]), sizeof(buf), src);
 190         if (len < 1) {
 191             // assume input is over when no bytes were read
 192             if (offset > 0) {
 193                 putc('\n', w);
 194             }
 195             break;
 196         }
 197 
 198         for (size_t i = 0; i < len; i++, offset++) {
 199             const size_t rem = offset % 8;
 200             if (rem == 0) {
 201                 if (offset > 0) {
 202                     putc('\n', w);
 203                 }
 204                 start_row(w, offset);
 205             } else {
 206                 putc(ITEM_SEP, w);
 207             }
 208             write_bin(w, buf[i]);
 209         }
 210     }
 211 }
 212 
 213 // handle_file handles data from the filename given; returns false only when
 214 // the file can't be opened
 215 bool handle_file(FILE* w, const char* path, void (*start_row)(FILE*, size_t)) {
 216     FILE* f = fopen(path, "rb");
 217     if (f == NULL) {
 218         // ensure currently-buffered/deferred output shows up right now: not
 219         // doing so may scramble results in the common case where stdout and
 220         // stderr are the same, thus confusing users
 221         putc('\n', w);
 222 
 223         fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path);
 224         return false;
 225     }
 226 
 227     handle_reader(w, f, start_row);
 228 
 229     fclose(f);
 230     return true;
 231 }
 232 
 233 // is_help_option simplifies control-flow for func run
 234 bool is_help_option(const char* s) {
 235     return (s[0] == '-') && (
 236         strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 ||
 237         strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0
 238     );
 239 }
 240 
 241 // run returns the number of errors
 242 int run(int argc, char** argv, FILE* w) {
 243     size_t files = 0;
 244     size_t errors = 0;
 245     const void (*start_row)() = decimal_offset;
 246 
 247     // handle all filenames/options given
 248     for (size_t i = 1; i < argc && !feof(w); i++) {
 249         // a `-` filename stands for the standard input
 250         if (argv[i][0] == '-' && argv[i][1] == 0) {
 251             handle_reader(w, stdin, start_row);
 252             continue;
 253         }
 254 
 255         if (is_help_option(argv[i])) {
 256             // help option quits the app right away
 257             fprintf(stderr, "%s", info);
 258             return 0;
 259         }
 260 
 261         if (files > 0) {
 262             // put an empty line between adjacent outputs
 263             putc('\n', w);
 264         }
 265 
 266         if (!handle_file(w, argv[i], start_row)) {
 267             errors++;
 268         }
 269         files++;
 270     }
 271 
 272     // no filenames means use stdin as the only input
 273     if (files == 0) {
 274         handle_reader(w, stdin, start_row);
 275     }
 276 
 277     return errors;
 278 }
 279 
 280 int main(int argc, char** argv) {
 281 #ifdef _WIN32
 282     setmode(fileno(stdin), O_BINARY);
 283     // ensure output lines end in LF instead of CRLF on windows
 284     setmode(fileno(stdout), O_BINARY);
 285     setmode(fileno(stderr), O_BINARY);
 286 #endif
 287 
 288     return run(argc, argv, stdout) == 0 ? 0 : 1;
 289 }