File: bitdump.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O2 -flto -o ./bitdump ./bitdump.c
  29 
  30 Building with TSV defined makes `bitdump` emit lines of tab-separated values,
  31 instead of space-separated values. You can do that by running
  32 
  33 cc -Wall -s -O2 -flto -D TSV -o ./bitdump ./bitdump.c
  34 */
  35 
  36 #include <stdbool.h>
  37 #include <stdio.h>
  38 #include <string.h>
  39 
  40 #ifdef _WIN32
  41 #include <fcntl.h>
  42 #include <windows.h>
  43 #endif
  44 
  45 #ifdef RED_ERRORS
  46 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  47 #ifdef __APPLE__
  48 #define ERROR_STYLE "\x1b[31m"
  49 #endif
  50 #define RESET_STYLE "\x1b[0m"
  51 #else
  52 #define ERROR_STYLE
  53 #define RESET_STYLE
  54 #endif
  55 
  56 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  57 
  58 // ITEM_SEP is the item separator inserted between all values in output lines
  59 #ifdef TSV
  60 #define ITEM_SEP '\t'
  61 #else
  62 #define ITEM_SEP ' '
  63 #endif
  64 
  65 // EMIT_CONST emits string constants without their final null byte
  66 #define EMIT_CONST(w, x) fwrite(x, 1, sizeof(x) - 1, w)
  67 
  68 const char* info = ""
  69 "bitdump [options...] [filenames...]\n"
  70 "\n"
  71 "Show all bits for all input bytes, starting each output line with the\n"
  72 "leading byte's offset.\n"
  73 "\n"
  74 "\n"
  75 "Options\n"
  76 "\n"
  77 "    -h, --h            show this help message\n"
  78 "    -help, --help      aliases for option -h\n"
  79 "\n"
  80 "    -no-offset, --no-offset      don't start lines with current byte offsets\n"
  81 "    -no-offsets, --no-offsets    aliases for option -no-offset\n"
  82 "";
  83 
  84 /*
  85 tlp = '(f"{bin(v)[2:]:>08}" for v in range(256))' | lineup 8 | gsub '\t' |
  86     tlp 'f"    \"{l}\""'
  87 */
  88 const unsigned char lookup[256 * 8] = ""
  89     "0000000000000001000000100000001100000100000001010000011000000111"
  90     "0000100000001001000010100000101100001100000011010000111000001111"
  91     "0001000000010001000100100001001100010100000101010001011000010111"
  92     "0001100000011001000110100001101100011100000111010001111000011111"
  93     "0010000000100001001000100010001100100100001001010010011000100111"
  94     "0010100000101001001010100010101100101100001011010010111000101111"
  95     "0011000000110001001100100011001100110100001101010011011000110111"
  96     "0011100000111001001110100011101100111100001111010011111000111111"
  97     "0100000001000001010000100100001101000100010001010100011001000111"
  98     "0100100001001001010010100100101101001100010011010100111001001111"
  99     "0101000001010001010100100101001101010100010101010101011001010111"
 100     "0101100001011001010110100101101101011100010111010101111001011111"
 101     "0110000001100001011000100110001101100100011001010110011001100111"
 102     "0110100001101001011010100110101101101100011011010110111001101111"
 103     "0111000001110001011100100111001101110100011101010111011001110111"
 104     "0111100001111001011110100111101101111100011111010111111001111111"
 105     "1000000010000001100000101000001110000100100001011000011010000111"
 106     "1000100010001001100010101000101110001100100011011000111010001111"
 107     "1001000010010001100100101001001110010100100101011001011010010111"
 108     "1001100010011001100110101001101110011100100111011001111010011111"
 109     "1010000010100001101000101010001110100100101001011010011010100111"
 110     "1010100010101001101010101010101110101100101011011010111010101111"
 111     "1011000010110001101100101011001110110100101101011011011010110111"
 112     "1011100010111001101110101011101110111100101111011011111010111111"
 113     "1100000011000001110000101100001111000100110001011100011011000111"
 114     "1100100011001001110010101100101111001100110011011100111011001111"
 115     "1101000011010001110100101101001111010100110101011101011011010111"
 116     "1101100011011001110110101101101111011100110111011101111011011111"
 117     "1110000011100001111000101110001111100100111001011110011011100111"
 118     "1110100011101001111010101110101111101100111011011110111011101111"
 119     "1111000011110001111100101111001111110100111101011111011011110111"
 120     "1111100011111001111110101111101111111100111111011111111011111111"
 121     "";
 122 
 123 // write_bin is faster than calling fprintf(w, "%08b", b): this matters
 124 // because it's called for every input byte
 125 static inline void write_bin(FILE* w, unsigned char b) {
 126     const void* ptr = &lookup[8 * b];
 127     fwrite(ptr, 1, 8, w);
 128 }
 129 
 130 void write_decimal_uint(FILE* w, size_t n) {
 131     if (n < 1) {
 132         EMIT_CONST(w, "00000000");
 133         return;
 134     }
 135 
 136     size_t digits;
 137     // 20 is the most digits unsigned 64-bit ints can ever need
 138     unsigned char buf[24];
 139     for (digits = 0; n > 0; digits++, n /= 10) {
 140         buf[sizeof(buf) - 1 - digits] = (n % 10) + '0';
 141     }
 142 
 143     // left-pad the coming digits up to 8 chars
 144     if (digits < 8) {
 145         fwrite((unsigned char*)"00000000", 1, 8 - digits, w);
 146     }
 147 
 148     // emit all digits
 149     const unsigned char* start = buf + sizeof(buf) - digits;
 150     fwrite(start, 1, digits, w);
 151 }
 152 
 153 void write_hex_uint(FILE* w, size_t n) {
 154     if (n < 1) {
 155         EMIT_CONST(w, "00000000");
 156         return;
 157     }
 158 
 159     size_t digits;
 160     // 20 is the most digits unsigned 64-bit ints can ever need
 161     unsigned char buf[24];
 162     for (digits = 0; n > 0; digits += 2, n /= 256) {
 163         unsigned char b = n % 256;
 164         const char* hex_digits = "0123456789abcdef";
 165         buf[sizeof(buf) - 1 - digits - 1] = hex_digits[b >> 4];
 166         buf[sizeof(buf) - 1 - digits - 0] = hex_digits[b & 0x0f];
 167     }
 168 
 169     // left-pad the coming digits up to 8 chars
 170     if (digits < 8) {
 171         fwrite((unsigned char*)"00000000", 1, 8 - digits, w);
 172     }
 173 
 174     // emit all digits
 175     const unsigned char* start = buf + sizeof(buf) - digits;
 176     fwrite(start, 1, digits, w);
 177 }
 178 
 179 void decimal_offset(FILE* w, size_t offset) {
 180     write_decimal_uint(w, offset);
 181     fputc(ITEM_SEP, w);
 182 }
 183 
 184 void hexadecimal_offset(FILE* w, size_t offset) {
 185     write_hex_uint(w, offset);
 186     fputc(ITEM_SEP, w);
 187 }
 188 
 189 void no_offset(FILE*, size_t) {
 190     // do nothing on purpose
 191 }
 192 
 193 // handle_reader shows all bytes read from the source given as colored hex
 194 // values, showing offsets and ASCII symbols on the sides of each output line
 195 void handle_reader(FILE* w, FILE* src, void (*start_row)(FILE*, size_t)) {
 196     const size_t bufcap = 32 * 1024;
 197     unsigned char buf[bufcap];
 198     size_t offset = 0;
 199 
 200     while (!feof(w)) {
 201         const size_t len = fread(&buf, sizeof(buf[0]), sizeof(buf), src);
 202         if (len < 1) {
 203             // assume input is over when no bytes were read
 204             if (offset > 0) {
 205                 fputc('\n', w);
 206             }
 207             break;
 208         }
 209 
 210         for (size_t i = 0; i < len; i++, offset++) {
 211             const size_t rem = offset % 8;
 212             if (rem == 0) {
 213                 if (offset > 0) {
 214                     fputc('\n', w);
 215                 }
 216                 start_row(w, offset);
 217             } else {
 218                 fputc(ITEM_SEP, w);
 219             }
 220             write_bin(w, buf[i]);
 221         }
 222     }
 223 }
 224 
 225 // handle_file handles data from the filename given; returns false only when
 226 // the file can't be opened
 227 bool handle_file(FILE* w, const char* path, void (*start_row)(FILE*, size_t)) {
 228     FILE* f = fopen(path, "rb");
 229     if (f == NULL) {
 230         fputc('\n', w);
 231         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 232         return false;
 233     }
 234 
 235     handle_reader(w, f, start_row);
 236 
 237     fclose(f);
 238     return true;
 239 }
 240 
 241 // is_help_option simplifies control-flow for func run
 242 bool is_help_option(const char* s) {
 243     return (s[0] == '-') && (
 244         strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 ||
 245         strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0
 246     );
 247 }
 248 
 249 // run returns the number of errors
 250 int run(int argc, char** argv, FILE* w) {
 251     size_t dashes = 0;
 252     for (size_t i = 1; i < argc && !feof(w); i++) {
 253         // a `-` filename stands for the standard input
 254         if (strcmp(argv[i], "-") == 0) {
 255             dashes++;
 256         }
 257     }
 258 
 259     if (dashes > 1) {
 260         const char* m = "can't use the standard input (dash) more than once";
 261         fprintf(stderr, ERROR_LINE("%s"), m);
 262         return 1;
 263     }
 264 
 265     size_t files = 0;
 266     size_t errors = 0;
 267     const void (*start_row)() = decimal_offset;
 268     bool options = true;
 269 
 270     // handle all filenames/options given
 271     for (size_t i = 1; i < argc && !feof(w); i++) {
 272         // a `-` filename stands for the standard input
 273         if (strcmp(argv[i], "-") == 0) {
 274             handle_reader(w, stdin, start_row);
 275             continue;
 276         }
 277 
 278         if (options) {
 279             if (
 280                 strcmp(argv[i], "-no-offset") == 0 ||
 281                 strcmp(argv[i], "-no-offsets") == 0 ||
 282                 strcmp(argv[i], "--no-offset") == 0 ||
 283                 strcmp(argv[i], "--no-offsets") == 0
 284             ) {
 285                 start_row = no_offset;
 286                 continue;
 287             }
 288         }
 289 
 290         if (strcmp(argv[i], "--") == 0) {
 291             options = false;
 292             continue;
 293         }
 294 
 295         if (files > 0) {
 296             // put an empty line between adjacent outputs
 297             fputc('\n', w);
 298         }
 299 
 300         if (!handle_file(w, argv[i], start_row)) {
 301             errors++;
 302         }
 303         files++;
 304     }
 305 
 306     // no filenames means use stdin as the only input
 307     if (files == 0 && !feof(w)) {
 308         handle_reader(w, stdin, start_row);
 309     }
 310 
 311     return errors;
 312 }
 313 
 314 int main(int argc, char** argv) {
 315 #ifdef _WIN32
 316     setmode(fileno(stdin), O_BINARY);
 317     // ensure output lines end in LF instead of CRLF on windows
 318     setmode(fileno(stdout), O_BINARY);
 319     setmode(fileno(stderr), O_BINARY);
 320 #endif
 321 
 322     if (argc > 1 && is_help_option(argv[1])) {
 323         fprintf(stderr, "%s", info);
 324         return 0;
 325     }
 326 
 327     return run(argc, argv, stdout) == 0 ? 0 : 1;
 328 }