File: bitdump.c
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 You can build this command-line app by running
  27 
  28 cc -Wall -s -O3 -flto -o ./bitdump ./bitdump.c
  29 
  30 Building with TSV defined makes `bitdump` emit lines of tab-separated values,
  31 instead of space-separated values. You can do that by running
  32 
  33 cc -Wall -s -O3 -flto -D TSV -o ./bitdump ./bitdump.c
  34 */
  35 
  36 #include <stdbool.h>
  37 #include <stdio.h>
  38 #include <string.h>
  39 
  40 #ifdef _WIN32
  41 #include <fcntl.h>
  42 #include <windows.h>
  43 #endif
  44 
  45 #ifdef RED_ERRORS
  46 #define ERROR_STYLE "\x1b[38;2;204;0;0m"
  47 #ifdef __APPLE__
  48 #define ERROR_STYLE "\x1b[31m"
  49 #endif
  50 #define RESET_STYLE "\x1b[0m"
  51 #else
  52 #define ERROR_STYLE
  53 #define RESET_STYLE
  54 #endif
  55 
  56 #define ERROR_LINE(MSG) (ERROR_STYLE MSG RESET_STYLE "\n")
  57 
  58 // ITEM_SEP is the item separator inserted between all values in output lines
  59 #ifdef TSV
  60 #define ITEM_SEP '\t'
  61 #else
  62 #define ITEM_SEP ' '
  63 #endif
  64 
  65 // EMIT_CONST emits string constants without their final null byte
  66 #define EMIT_CONST(w, x) fwrite(x, 1, sizeof(x) - 1, w)
  67 
  68 const char* info = ""
  69 "bitdump [options...] [filenames...]\n"
  70 "\n"
  71 "Show all bits for all input bytes, starting each output line with the\n"
  72 "leading byte's offset.\n"
  73 "\n"
  74 "\n"
  75 "Options\n"
  76 "\n"
  77 "    -h, --h            show this help message\n"
  78 "    -help, --help      aliases for option -h\n"
  79 "";
  80 
  81 /*
  82 tlp = '(f"{bin(v)[2:]:>08}" for v in range(256))' | lineup 8 | gsub '\t' |
  83     tlp 'f"    \"{l}\""'
  84 */
  85 const unsigned char lookup[256 * 8] = ""
  86     "0000000000000001000000100000001100000100000001010000011000000111"
  87     "0000100000001001000010100000101100001100000011010000111000001111"
  88     "0001000000010001000100100001001100010100000101010001011000010111"
  89     "0001100000011001000110100001101100011100000111010001111000011111"
  90     "0010000000100001001000100010001100100100001001010010011000100111"
  91     "0010100000101001001010100010101100101100001011010010111000101111"
  92     "0011000000110001001100100011001100110100001101010011011000110111"
  93     "0011100000111001001110100011101100111100001111010011111000111111"
  94     "0100000001000001010000100100001101000100010001010100011001000111"
  95     "0100100001001001010010100100101101001100010011010100111001001111"
  96     "0101000001010001010100100101001101010100010101010101011001010111"
  97     "0101100001011001010110100101101101011100010111010101111001011111"
  98     "0110000001100001011000100110001101100100011001010110011001100111"
  99     "0110100001101001011010100110101101101100011011010110111001101111"
 100     "0111000001110001011100100111001101110100011101010111011001110111"
 101     "0111100001111001011110100111101101111100011111010111111001111111"
 102     "1000000010000001100000101000001110000100100001011000011010000111"
 103     "1000100010001001100010101000101110001100100011011000111010001111"
 104     "1001000010010001100100101001001110010100100101011001011010010111"
 105     "1001100010011001100110101001101110011100100111011001111010011111"
 106     "1010000010100001101000101010001110100100101001011010011010100111"
 107     "1010100010101001101010101010101110101100101011011010111010101111"
 108     "1011000010110001101100101011001110110100101101011011011010110111"
 109     "1011100010111001101110101011101110111100101111011011111010111111"
 110     "1100000011000001110000101100001111000100110001011100011011000111"
 111     "1100100011001001110010101100101111001100110011011100111011001111"
 112     "1101000011010001110100101101001111010100110101011101011011010111"
 113     "1101100011011001110110101101101111011100110111011101111011011111"
 114     "1110000011100001111000101110001111100100111001011110011011100111"
 115     "1110100011101001111010101110101111101100111011011110111011101111"
 116     "1111000011110001111100101111001111110100111101011111011011110111"
 117     "1111100011111001111110101111101111111100111111011111111011111111"
 118     "";
 119 
 120 // write_bin is faster than calling fprintf(w, "%08b", b): this matters
 121 // because it's called for every input byte
 122 static inline void write_bin(FILE* w, unsigned char b) {
 123     const void* ptr = &lookup[8 * b];
 124     fwrite(ptr, 8, 1, w);
 125 }
 126 
 127 void write_decimal_uint(FILE* w, size_t n) {
 128     if (n < 1) {
 129         EMIT_CONST(w, "00000000");
 130         return;
 131     }
 132 
 133     size_t digits;
 134     // 20 is the most digits unsigned 64-bit ints can ever need
 135     unsigned char buf[24];
 136     for (digits = 0; n > 0; digits++, n /= 10) {
 137         buf[sizeof(buf) - 1 - digits] = (n % 10) + '0';
 138     }
 139 
 140     // left-pad the coming digits up to 8 chars
 141     if (digits < 8) {
 142         fwrite((unsigned char*)"00000000", 1, 8 - digits, w);
 143     }
 144 
 145     // emit all digits
 146     const unsigned char* start = buf + sizeof(buf) - digits;
 147     fwrite(start, 1, digits, w);
 148 }
 149 
 150 void write_hex_uint(FILE* w, size_t n) {
 151     if (n < 1) {
 152         EMIT_CONST(w, "00000000");
 153         return;
 154     }
 155 
 156     size_t digits;
 157     // 20 is the most digits unsigned 64-bit ints can ever need
 158     unsigned char buf[24];
 159     for (digits = 0; n > 0; digits += 2, n /= 256) {
 160         unsigned char b = n % 256;
 161         const char* hex_digits = "0123456789abcdef";
 162         buf[sizeof(buf) - 1 - digits - 1] = hex_digits[b >> 4];
 163         buf[sizeof(buf) - 1 - digits - 0] = hex_digits[b & 0x0f];
 164     }
 165 
 166     // left-pad the coming digits up to 8 chars
 167     if (digits < 8) {
 168         fwrite((unsigned char*)"00000000", 1, 8 - digits, w);
 169     }
 170 
 171     // emit all digits
 172     const unsigned char* start = buf + sizeof(buf) - digits;
 173     fwrite(start, 1, digits, w);
 174 }
 175 
 176 void decimal_offset(FILE* w, size_t offset) {
 177     write_decimal_uint(w, offset);
 178     fputc(ITEM_SEP, w);
 179 }
 180 
 181 void hexadecimal_offset(FILE* w, size_t offset) {
 182     write_hex_uint(w, offset);
 183     fputc(ITEM_SEP, w);
 184 }
 185 
 186 void no_offset(FILE*, size_t) {
 187     // do nothing on purpose
 188 }
 189 
 190 // handle_reader shows all bytes read from the source given as colored hex
 191 // values, showing offsets and ASCII symbols on the sides of each output line
 192 void handle_reader(FILE* w, FILE* src, void (*start_row)(FILE*, size_t)) {
 193     const size_t bufcap = 32 * 1024;
 194     unsigned char buf[bufcap];
 195     size_t offset = 0;
 196 
 197     while (!feof(w)) {
 198         const size_t len = fread(&buf, sizeof(buf[0]), sizeof(buf), src);
 199         if (len < 1) {
 200             // assume input is over when no bytes were read
 201             if (offset > 0) {
 202                 fputc('\n', w);
 203             }
 204             break;
 205         }
 206 
 207         for (size_t i = 0; i < len; i++, offset++) {
 208             const size_t rem = offset % 8;
 209             if (rem == 0) {
 210                 if (offset > 0) {
 211                     fputc('\n', w);
 212                 }
 213                 start_row(w, offset);
 214             } else {
 215                 fputc(ITEM_SEP, w);
 216             }
 217             write_bin(w, buf[i]);
 218         }
 219     }
 220 }
 221 
 222 // handle_file handles data from the filename given; returns false only when
 223 // the file can't be opened
 224 bool handle_file(FILE* w, const char* path, void (*start_row)(FILE*, size_t)) {
 225     FILE* f = fopen(path, "rb");
 226     if (f == NULL) {
 227         fputc('\n', w);
 228         fprintf(stderr, ERROR_LINE("can't open file named '%s'"), path);
 229         return false;
 230     }
 231 
 232     handle_reader(w, f, start_row);
 233 
 234     fclose(f);
 235     return true;
 236 }
 237 
 238 // is_help_option simplifies control-flow for func run
 239 bool is_help_option(const char* s) {
 240     return (s[0] == '-') && (
 241         strcmp(s, "-h") == 0 || strcmp(s, "-help") == 0 ||
 242         strcmp(s, "--h") == 0 || strcmp(s, "--help") == 0
 243     );
 244 }
 245 
 246 // run returns the number of errors
 247 int run(int argc, char** argv, FILE* w) {
 248     size_t files = 0;
 249     size_t errors = 0;
 250     const void (*start_row)() = decimal_offset;
 251 
 252     // handle all filenames/options given
 253     for (size_t i = 1; i < argc && !feof(w); i++) {
 254         // a `-` filename stands for the standard input
 255         if (argv[i][0] == '-' && argv[i][1] == 0) {
 256             handle_reader(w, stdin, start_row);
 257             continue;
 258         }
 259 
 260         if (files > 0) {
 261             // put an empty line between adjacent outputs
 262             fputc('\n', w);
 263         }
 264 
 265         if (!handle_file(w, argv[i], start_row)) {
 266             errors++;
 267         }
 268         files++;
 269     }
 270 
 271     // no filenames means use stdin as the only input
 272     if (files == 0) {
 273         handle_reader(w, stdin, start_row);
 274     }
 275 
 276     return errors;
 277 }
 278 
 279 int main(int argc, char** argv) {
 280 #ifdef _WIN32
 281     setmode(fileno(stdin), O_BINARY);
 282     // ensure output lines end in LF instead of CRLF on windows
 283     setmode(fileno(stdout), O_BINARY);
 284     setmode(fileno(stderr), O_BINARY);
 285 #endif
 286 
 287     if (argc > 1 && is_help_option(argv[1])) {
 288         fprintf(stderr, "%s", info);
 289         return 0;
 290     }
 291 
 292     return run(argc, argv, stdout) == 0 ? 0 : 1;
 293 }