File: chex/data.c 1 // info is the multi-line help message 2 const char* info = "" 3 "chex [options...] [filenames...]\n" 4 "\n" 5 "Colored HEXadecimal is a simple hexadecimal (base-16) viewer to inspect\n" 6 "bytes from files or standard input.\n" 7 "\n" 8 "Each line shows the starting offset for the bytes shown, 20 of the bytes\n" 9 "themselves in base-16 notation, and any ASCII codes when the byte values\n" 10 "are in the typical ASCII range.\n" 11 "\n" 12 "The base-16 codes are color-coded, with most bytes shown in gray, while\n" 13 "all-1 and all-0 bytes are shown in orange and blue respectively.\n" 14 "\n" 15 "All-0 bytes are the commonest kind in most binary file types and, along\n" 16 "with all-1 bytes are also a special case worth noticing when exploring\n" 17 "binary data, so it makes sense for them to stand out right away.\n" 18 "\n" 19 "\n" 20 "Options\n" 21 "\n" 22 " -h, --h show this help message\n" 23 " -help, --help aliases for option -h\n" 24 "\n" 25 " -p, --p plain-text output, without ANSI styles\n" 26 " -plain, --plain aliases for option -p\n" 27 ""; 28 29 // styled_hex_results is a super-fast direct byte-to-result lookup table, and 30 // was autogenerated by running the command 31 // 32 // seq 0 255 | ./hex-styles.awk 33 // const char* styled_hex_results[256] = { 34 // "\x1b[38;5;111m00 ", 35 // "\x1b[38;5;246m01 ", 36 // "\x1b[38;5;246m02 ", 37 // "\x1b[38;5;246m03 ", 38 // "\x1b[38;5;246m04 ", 39 // "\x1b[38;5;246m05 ", 40 // "\x1b[38;5;246m06 ", 41 // "\x1b[38;5;246m07 ", 42 // "\x1b[38;5;246m08 ", 43 // "\x1b[38;5;246m09 ", 44 // "\x1b[38;5;246m0a ", 45 // "\x1b[38;5;246m0b ", 46 // "\x1b[38;5;246m0c ", 47 // "\x1b[38;5;246m0d ", 48 // "\x1b[38;5;246m0e ", 49 // "\x1b[38;5;246m0f ", 50 // "\x1b[38;5;246m10 ", 51 // "\x1b[38;5;246m11 ", 52 // "\x1b[38;5;246m12 ", 53 // "\x1b[38;5;246m13 ", 54 // "\x1b[38;5;246m14 ", 55 // "\x1b[38;5;246m15 ", 56 // "\x1b[38;5;246m16 ", 57 // "\x1b[38;5;246m17 ", 58 // "\x1b[38;5;246m18 ", 59 // "\x1b[38;5;246m19 ", 60 // "\x1b[38;5;246m1a ", 61 // "\x1b[38;5;246m1b ", 62 // "\x1b[38;5;246m1c ", 63 // "\x1b[38;5;246m1d ", 64 // "\x1b[38;5;246m1e ", 65 // "\x1b[38;5;246m1f ", 66 // "\x1b[38;5;72m20\x1b[38;5;239m ", 67 // "\x1b[38;5;72m21\x1b[38;5;239m!", 68 // "\x1b[38;5;72m22\x1b[38;5;239m\"", 69 // "\x1b[38;5;72m23\x1b[38;5;239m#", 70 // "\x1b[38;5;72m24\x1b[38;5;239m$", 71 // "\x1b[38;5;72m25\x1b[38;5;239m%", 72 // "\x1b[38;5;72m26\x1b[38;5;239m&", 73 // "\x1b[38;5;72m27\x1b[38;5;239m'", 74 // "\x1b[38;5;72m28\x1b[38;5;239m(", 75 // "\x1b[38;5;72m29\x1b[38;5;239m)", 76 // "\x1b[38;5;72m2a\x1b[38;5;239m*", 77 // "\x1b[38;5;72m2b\x1b[38;5;239m+", 78 // "\x1b[38;5;72m2c\x1b[38;5;239m,", 79 // "\x1b[38;5;72m2d\x1b[38;5;239m-", 80 // "\x1b[38;5;72m2e\x1b[38;5;239m.", 81 // "\x1b[38;5;72m2f\x1b[38;5;239m/", 82 // "\x1b[38;5;72m30\x1b[38;5;239m0", 83 // "\x1b[38;5;72m31\x1b[38;5;239m1", 84 // "\x1b[38;5;72m32\x1b[38;5;239m2", 85 // "\x1b[38;5;72m33\x1b[38;5;239m3", 86 // "\x1b[38;5;72m34\x1b[38;5;239m4", 87 // "\x1b[38;5;72m35\x1b[38;5;239m5", 88 // "\x1b[38;5;72m36\x1b[38;5;239m6", 89 // "\x1b[38;5;72m37\x1b[38;5;239m7", 90 // "\x1b[38;5;72m38\x1b[38;5;239m8", 91 // "\x1b[38;5;72m39\x1b[38;5;239m9", 92 // "\x1b[38;5;72m3a\x1b[38;5;239m:", 93 // "\x1b[38;5;72m3b\x1b[38;5;239m;", 94 // "\x1b[38;5;72m3c\x1b[38;5;239m<", 95 // "\x1b[38;5;72m3d\x1b[38;5;239m=", 96 // "\x1b[38;5;72m3e\x1b[38;5;239m>", 97 // "\x1b[38;5;72m3f\x1b[38;5;239m?", 98 // "\x1b[38;5;72m40\x1b[38;5;239m@", 99 // "\x1b[38;5;72m41\x1b[38;5;239mA", 100 // "\x1b[38;5;72m42\x1b[38;5;239mB", 101 // "\x1b[38;5;72m43\x1b[38;5;239mC", 102 // "\x1b[38;5;72m44\x1b[38;5;239mD", 103 // "\x1b[38;5;72m45\x1b[38;5;239mE", 104 // "\x1b[38;5;72m46\x1b[38;5;239mF", 105 // "\x1b[38;5;72m47\x1b[38;5;239mG", 106 // "\x1b[38;5;72m48\x1b[38;5;239mH", 107 // "\x1b[38;5;72m49\x1b[38;5;239mI", 108 // "\x1b[38;5;72m4a\x1b[38;5;239mJ", 109 // "\x1b[38;5;72m4b\x1b[38;5;239mK", 110 // "\x1b[38;5;72m4c\x1b[38;5;239mL", 111 // "\x1b[38;5;72m4d\x1b[38;5;239mM", 112 // "\x1b[38;5;72m4e\x1b[38;5;239mN", 113 // "\x1b[38;5;72m4f\x1b[38;5;239mO", 114 // "\x1b[38;5;72m50\x1b[38;5;239mP", 115 // "\x1b[38;5;72m51\x1b[38;5;239mQ", 116 // "\x1b[38;5;72m52\x1b[38;5;239mR", 117 // "\x1b[38;5;72m53\x1b[38;5;239mS", 118 // "\x1b[38;5;72m54\x1b[38;5;239mT", 119 // "\x1b[38;5;72m55\x1b[38;5;239mU", 120 // "\x1b[38;5;72m56\x1b[38;5;239mV", 121 // "\x1b[38;5;72m57\x1b[38;5;239mW", 122 // "\x1b[38;5;72m58\x1b[38;5;239mX", 123 // "\x1b[38;5;72m59\x1b[38;5;239mY", 124 // "\x1b[38;5;72m5a\x1b[38;5;239mZ", 125 // "\x1b[38;5;72m5b\x1b[38;5;239m[", 126 // "\x1b[38;5;72m5c\x1b[38;5;239m\\", 127 // "\x1b[38;5;72m5d\x1b[38;5;239m]", 128 // "\x1b[38;5;72m5e\x1b[38;5;239m^", 129 // "\x1b[38;5;72m5f\x1b[38;5;239m_", 130 // "\x1b[38;5;72m60\x1b[38;5;239m`", 131 // "\x1b[38;5;72m61\x1b[38;5;239ma", 132 // "\x1b[38;5;72m62\x1b[38;5;239mb", 133 // "\x1b[38;5;72m63\x1b[38;5;239mc", 134 // "\x1b[38;5;72m64\x1b[38;5;239md", 135 // "\x1b[38;5;72m65\x1b[38;5;239me", 136 // "\x1b[38;5;72m66\x1b[38;5;239mf", 137 // "\x1b[38;5;72m67\x1b[38;5;239mg", 138 // "\x1b[38;5;72m68\x1b[38;5;239mh", 139 // "\x1b[38;5;72m69\x1b[38;5;239mi", 140 // "\x1b[38;5;72m6a\x1b[38;5;239mj", 141 // "\x1b[38;5;72m6b\x1b[38;5;239mk", 142 // "\x1b[38;5;72m6c\x1b[38;5;239ml", 143 // "\x1b[38;5;72m6d\x1b[38;5;239mm", 144 // "\x1b[38;5;72m6e\x1b[38;5;239mn", 145 // "\x1b[38;5;72m6f\x1b[38;5;239mo", 146 // "\x1b[38;5;72m70\x1b[38;5;239mp", 147 // "\x1b[38;5;72m71\x1b[38;5;239mq", 148 // "\x1b[38;5;72m72\x1b[38;5;239mr", 149 // "\x1b[38;5;72m73\x1b[38;5;239ms", 150 // "\x1b[38;5;72m74\x1b[38;5;239mt", 151 // "\x1b[38;5;72m75\x1b[38;5;239mu", 152 // "\x1b[38;5;72m76\x1b[38;5;239mv", 153 // "\x1b[38;5;72m77\x1b[38;5;239mw", 154 // "\x1b[38;5;72m78\x1b[38;5;239mx", 155 // "\x1b[38;5;72m79\x1b[38;5;239my", 156 // "\x1b[38;5;72m7a\x1b[38;5;239mz", 157 // "\x1b[38;5;72m7b\x1b[38;5;239m{", 158 // "\x1b[38;5;72m7c\x1b[38;5;239m|", 159 // "\x1b[38;5;72m7d\x1b[38;5;239m}", 160 // "\x1b[38;5;72m7e\x1b[38;5;239m~", 161 // "\x1b[38;5;246m7f ", 162 // "\x1b[38;5;246m80 ", 163 // "\x1b[38;5;246m81 ", 164 // "\x1b[38;5;246m82 ", 165 // "\x1b[38;5;246m83 ", 166 // "\x1b[38;5;246m84 ", 167 // "\x1b[38;5;246m85 ", 168 // "\x1b[38;5;246m86 ", 169 // "\x1b[38;5;246m87 ", 170 // "\x1b[38;5;246m88 ", 171 // "\x1b[38;5;246m89 ", 172 // "\x1b[38;5;246m8a ", 173 // "\x1b[38;5;246m8b ", 174 // "\x1b[38;5;246m8c ", 175 // "\x1b[38;5;246m8d ", 176 // "\x1b[38;5;246m8e ", 177 // "\x1b[38;5;246m8f ", 178 // "\x1b[38;5;246m90 ", 179 // "\x1b[38;5;246m91 ", 180 // "\x1b[38;5;246m92 ", 181 // "\x1b[38;5;246m93 ", 182 // "\x1b[38;5;246m94 ", 183 // "\x1b[38;5;246m95 ", 184 // "\x1b[38;5;246m96 ", 185 // "\x1b[38;5;246m97 ", 186 // "\x1b[38;5;246m98 ", 187 // "\x1b[38;5;246m99 ", 188 // "\x1b[38;5;246m9a ", 189 // "\x1b[38;5;246m9b ", 190 // "\x1b[38;5;246m9c ", 191 // "\x1b[38;5;246m9d ", 192 // "\x1b[38;5;246m9e ", 193 // "\x1b[38;5;246m9f ", 194 // "\x1b[38;5;246ma0 ", 195 // "\x1b[38;5;246ma1 ", 196 // "\x1b[38;5;246ma2 ", 197 // "\x1b[38;5;246ma3 ", 198 // "\x1b[38;5;246ma4 ", 199 // "\x1b[38;5;246ma5 ", 200 // "\x1b[38;5;246ma6 ", 201 // "\x1b[38;5;246ma7 ", 202 // "\x1b[38;5;246ma8 ", 203 // "\x1b[38;5;246ma9 ", 204 // "\x1b[38;5;246maa ", 205 // "\x1b[38;5;246mab ", 206 // "\x1b[38;5;246mac ", 207 // "\x1b[38;5;246mad ", 208 // "\x1b[38;5;246mae ", 209 // "\x1b[38;5;246maf ", 210 // "\x1b[38;5;246mb0 ", 211 // "\x1b[38;5;246mb1 ", 212 // "\x1b[38;5;246mb2 ", 213 // "\x1b[38;5;246mb3 ", 214 // "\x1b[38;5;246mb4 ", 215 // "\x1b[38;5;246mb5 ", 216 // "\x1b[38;5;246mb6 ", 217 // "\x1b[38;5;246mb7 ", 218 // "\x1b[38;5;246mb8 ", 219 // "\x1b[38;5;246mb9 ", 220 // "\x1b[38;5;246mba ", 221 // "\x1b[38;5;246mbb ", 222 // "\x1b[38;5;246mbc ", 223 // "\x1b[38;5;246mbd ", 224 // "\x1b[38;5;246mbe ", 225 // "\x1b[38;5;246mbf ", 226 // "\x1b[38;5;246mc0 ", 227 // "\x1b[38;5;246mc1 ", 228 // "\x1b[38;5;246mc2 ", 229 // "\x1b[38;5;246mc3 ", 230 // "\x1b[38;5;246mc4 ", 231 // "\x1b[38;5;246mc5 ", 232 // "\x1b[38;5;246mc6 ", 233 // "\x1b[38;5;246mc7 ", 234 // "\x1b[38;5;246mc8 ", 235 // "\x1b[38;5;246mc9 ", 236 // "\x1b[38;5;246mca ", 237 // "\x1b[38;5;246mcb ", 238 // "\x1b[38;5;246mcc ", 239 // "\x1b[38;5;246mcd ", 240 // "\x1b[38;5;246mce ", 241 // "\x1b[38;5;246mcf ", 242 // "\x1b[38;5;246md0 ", 243 // "\x1b[38;5;246md1 ", 244 // "\x1b[38;5;246md2 ", 245 // "\x1b[38;5;246md3 ", 246 // "\x1b[38;5;246md4 ", 247 // "\x1b[38;5;246md5 ", 248 // "\x1b[38;5;246md6 ", 249 // "\x1b[38;5;246md7 ", 250 // "\x1b[38;5;246md8 ", 251 // "\x1b[38;5;246md9 ", 252 // "\x1b[38;5;246mda ", 253 // "\x1b[38;5;246mdb ", 254 // "\x1b[38;5;246mdc ", 255 // "\x1b[38;5;246mdd ", 256 // "\x1b[38;5;246mde ", 257 // "\x1b[38;5;246mdf ", 258 // "\x1b[38;5;246me0 ", 259 // "\x1b[38;5;246me1 ", 260 // "\x1b[38;5;246me2 ", 261 // "\x1b[38;5;246me3 ", 262 // "\x1b[38;5;246me4 ", 263 // "\x1b[38;5;246me5 ", 264 // "\x1b[38;5;246me6 ", 265 // "\x1b[38;5;246me7 ", 266 // "\x1b[38;5;246me8 ", 267 // "\x1b[38;5;246me9 ", 268 // "\x1b[38;5;246mea ", 269 // "\x1b[38;5;246meb ", 270 // "\x1b[38;5;246mec ", 271 // "\x1b[38;5;246med ", 272 // "\x1b[38;5;246mee ", 273 // "\x1b[38;5;246mef ", 274 // "\x1b[38;5;246mf0 ", 275 // "\x1b[38;5;246mf1 ", 276 // "\x1b[38;5;246mf2 ", 277 // "\x1b[38;5;246mf3 ", 278 // "\x1b[38;5;246mf4 ", 279 // "\x1b[38;5;246mf5 ", 280 // "\x1b[38;5;246mf6 ", 281 // "\x1b[38;5;246mf7 ", 282 // "\x1b[38;5;246mf8 ", 283 // "\x1b[38;5;246mf9 ", 284 // "\x1b[38;5;246mfa ", 285 // "\x1b[38;5;246mfb ", 286 // "\x1b[38;5;246mfc ", 287 // "\x1b[38;5;246mfd ", 288 // "\x1b[38;5;246mfe ", 289 // "\x1b[38;5;209mff ", 290 // }; File: chex/hex.rc 1 // https://docs.microsoft.com/en-us/windows/win32/menurc/versioninfo-resource 2 // windres -O coff -o hex.res hex.rc 3 4 IDI_ICON1 ICON "logo.ico" File: chex/hex-styles.awk 1 #!/usr/bin/awk -f 2 3 # all 0 bits 4 $0 == 0 { 5 print "\"\\x1b[38;5;111m00 \"," 6 next 7 } 8 9 # ascii symbol which need backslashing 10 $0 == 34 || $0 == 92 { 11 printf "\"\\x1b[38;5;72m%02x\\x1b[38;5;239m\\%c\",\n", $0 + 0, $0 12 next 13 } 14 15 # all other ascii symbol 16 32 <= $0 && $0 <= 126 { 17 printf "\"\\x1b[38;5;72m%02x\\x1b[38;5;239m%c\",\n", $0 + 0, $0 18 next 19 } 20 21 # all 1 bits 22 $0 == 255 { 23 print "\"\\x1b[38;5;209mff \"," 24 next 25 } 26 27 # all other bytes 28 1 { 29 printf "\"\\x1b[38;5;246m%02x \",\n", $0 + 0 30 next 31 } File: chex/info.txt 1 chex [options...] [filenames...] 2 3 Colored HEXadecimal is a simple hexadecimal (base-16) viewer to inspect 4 bytes from files or standard input. 5 6 Each line shows the starting offset for the bytes shown, 20 of the bytes 7 themselves in base-16 notation, and any ASCII codes when the byte values 8 are in the typical ASCII range. 9 10 The base-16 codes are color-coded, with most bytes shown in gray, while 11 all-1 and all-0 bytes are shown in orange and blue respectively. 12 13 All-0 bytes are the commonest kind in most binary file types and, along 14 with all-1 bytes are also a special case worth noticing when exploring 15 binary data, so it makes sense for them to stand out right away. 16 17 18 Options 19 20 -h, --h show this help message 21 -help, --help aliases for option -h 22 23 -p, --p plain-text output, without ANSI styles 24 -plain, --plain aliases for option -p File: chex/logo.ico <BINARY> File: chex/logo.png <BINARY> File: chex/main.c 1 #include <fcntl.h> 2 #include <math.h> 3 #include <stdbool.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 #include <string.h> 7 #include <sys/stat.h> 8 9 // building with CHEX_COMPACT_OUTPUT defined makes `chex` output many fewer 10 // bytes, at the cost of using arguably worse colors 11 12 #ifdef CHEX_COMPACT_OUTPUT 13 #define OUTPUT_FOR_00 "\x1b[34m00 " 14 #define OUTPUT_FOR_FF "\x1b[33mff " 15 #define NORMAL_HEX_STYLE "\x1b[37m" 16 #define ASCII_HEX_STYLE "\x1b[32m" 17 #define ASCII_BYTE_STYLE "\x1b[30m" 18 #else 19 #define OUTPUT_FOR_00 "\x1b[38;5;111m00 " 20 #define OUTPUT_FOR_FF "\x1b[38;5;209mff " 21 #define NORMAL_HEX_STYLE "\x1b[38;5;246m" 22 #define ASCII_HEX_STYLE "\x1b[38;5;72m" 23 #define ASCII_BYTE_STYLE "\x1b[38;5;239m" 24 #endif 25 26 extern const char* info; 27 // extern const char* styled_hex_results[256]; 28 29 // styled_hex_lengths is a lookup table for the string lengths of values in 30 // styled_hex_results 31 // size_t styled_hex_lengths[256] = {}; 32 33 // bufwriter is, as the name implies, a buffered-writer: when it's aimed at 34 // stdout, it considerably speeds up this app, as intended 35 typedef struct bufwriter { 36 // buf is the buffer proper 37 unsigned char* buf; 38 39 // len is how many bytes of the buffer are currently being used 40 size_t len; 41 42 // cap is the capacity of the buffer, or the most bytes it can hold 43 size_t cap; 44 45 // out is the destination of all that's written into the buffer 46 FILE* out; 47 48 // done signals when/if no more output is accepted at the destination 49 bool done; 50 } bufwriter; 51 52 // new_bufwriter is the constructor for type bufwriter 53 bufwriter new_bufwriter(FILE* dst, size_t cap) { 54 bufwriter res; 55 res.cap = cap; 56 res.done = false; 57 res.len = 0; 58 res.out = dst; 59 res.buf = malloc(res.cap); 60 return res; 61 } 62 63 // flush does as it says: it empties the buffer after ensuring its bytes end 64 // on their intended destination 65 void flush(bufwriter* w) { 66 if (w->len > 0 && fwrite(w->buf, w->len, 1, w->out) < 1) { 67 w->done = true; 68 } 69 w->len = 0; 70 } 71 72 // close_bufwriter ensures all output is shown and deallocates the buffer 73 void close_bufwriter(bufwriter* w) { 74 flush(w); 75 free(w->buf); 76 w->buf = NULL; 77 } 78 79 // write_bytes does as it says, minimizing the number of calls to fwrite 80 void write_bytes(bufwriter* w, const unsigned char* src, size_t len) { 81 if (w->len + len < w->cap) { 82 // all bytes fit into buffer 83 memcpy(w->buf + w->len, src, len); 84 w->len += len; 85 return; 86 } 87 88 // ensure current buffer bytes go out, before crossing strides 89 flush(w); 90 91 // emit all chunks striding beyond/at the buffer's capacity 92 for (; len >= w->cap; src += w->cap, len -= w->cap) { 93 if (fwrite(src, w->cap, 1, w->out) < 1) { 94 w->done = true; 95 return; 96 } 97 } 98 99 // now all, if any, remaining bytes will fit into the buffer 100 memcpy(w->buf, src, len); 101 w->len += len; 102 } 103 104 // write_byte does as it says 105 void write_byte(bufwriter* w, unsigned char b) { 106 if (w->len >= w->cap) { 107 flush(w); 108 } 109 110 unsigned char* ptr = w->buf + w->len; 111 *ptr = b; 112 w->len++; 113 } 114 115 // EMIT_CONST abstracts a common use-case of the bufwriter, which is 116 // emitting string constants without their final null byte 117 #define EMIT_CONST(w, x) write_bytes(w, (unsigned char*)x, sizeof(x) - 1) 118 119 // write_hex is faster than calling fprintf(w, "%02x", b): this matters 120 // because it's called for every input byte 121 void write_hex(bufwriter* w, unsigned char b) { 122 const char* hex_digits = "0123456789abcdef"; 123 write_byte(w, hex_digits[b >> 4]); 124 write_byte(w, hex_digits[b & 0x0f]); 125 } 126 127 // write_styled_hex emits an ANSI color-coded hexadecimal representation 128 // of the byte given 129 void write_styled_hex(bufwriter* w, unsigned char b) { 130 // all-bits-off is almost always noteworthy 131 if (b == 0) { 132 EMIT_CONST(w, OUTPUT_FOR_00); 133 return; 134 } 135 // all-bits-on is often noteworthy 136 if (b == 0xff) { 137 EMIT_CONST(w, OUTPUT_FOR_FF); 138 return; 139 } 140 141 // regular ASCII display symbols 142 if (32 <= b && b <= 126) { 143 EMIT_CONST(w, ASCII_HEX_STYLE); 144 write_hex(w, b); 145 EMIT_CONST(w, ASCII_BYTE_STYLE); 146 write_byte(w, b); 147 return; 148 } 149 150 // ASCII control values, and other bytes beyond displayable ASCII 151 EMIT_CONST(w, NORMAL_HEX_STYLE); 152 write_hex(w, b); 153 write_byte(w, ' '); 154 } 155 156 // ruler emits a ruler-like string of spaced-out symbols 157 void ruler(bufwriter* w, size_t bytes_per_line) { 158 const size_t gap = 4; 159 if (bytes_per_line < gap) { 160 return; 161 } 162 163 EMIT_CONST(w, " ·"); 164 for (size_t n = bytes_per_line - gap; n >= gap; n -= gap) { 165 EMIT_CONST(w, " ·"); 166 } 167 } 168 169 // write_commas_uint shows a number by separating 3-digits groups with commas 170 void write_commas_uint(bufwriter* w, size_t n) { 171 if (n == 0) { 172 EMIT_CONST(w, "0"); 173 return; 174 } 175 176 size_t digits; 177 // 20 is the most digits unsigned 64-bit ints can ever need 178 unsigned char buf[24]; 179 for (digits = 0; n > 0; digits++, n /= 10) { 180 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 181 } 182 183 // now emit the leading digits, which may not come in 3 184 size_t leading = digits % 3; 185 if (leading == 0) { 186 // avoid having a comma before the first digit 187 leading = digits < 3 ? digits : 3; 188 } 189 unsigned char* start = buf + sizeof(buf) - digits; 190 write_bytes(w, start, leading); 191 start += leading; 192 digits -= leading; 193 194 // now emit all remaining digits in groups of 3, alternating styles 195 for (; digits > 0; start += 3, digits -= 3) { 196 write_byte(w, ','); 197 write_bytes(w, start, 3); 198 } 199 } 200 201 // output_state ties all values representing the current state shared across 202 // all functions involved in interpreting the input-buffer and showing its 203 // bytes and ASCII values 204 typedef struct output_state { 205 // the whole input-buffer and its currently-used length in bytes 206 unsigned char* buf; 207 size_t buflen; 208 209 // the ASCII-text buffer and its currently-used length in bytes 210 unsigned char* txt; 211 size_t txtlen; 212 213 // offset is the byte counter, shown at the start of each line 214 size_t offset; 215 216 // linewidth is how many bytes each line can show at most 217 size_t linewidth; 218 219 // lines is the line counter, which is used to provide periodic 220 // breather lines, to make eye-scanning big output blobs easier 221 size_t lines; 222 223 // showtxt is a hint on whether it's sensible to show the ASCII-text 224 // buffer for the current line 225 bool showtxt; 226 } output_state; 227 228 // peek_ascii looks 2 lines ahead in the buffer to get all ASCII-like runs 229 // of bytes, which are later meant to show on the side panel 230 void peek_ascii(size_t i, size_t end, output_state* os) { 231 unsigned char prev = 0; 232 os->txtlen = 0; 233 234 for (size_t j = i; j < end; j++) { 235 const unsigned char b = os->buf[j]; 236 237 if (' ' < b && b <= '~') { 238 bool first = os->txtlen == 0; 239 if (first) { 240 // show ASCII panel, if the symbols start on the current line 241 os->showtxt = j - i < os->linewidth; 242 } 243 244 // add a space before the symbol, when it's the start of a `word` 245 if ((prev <= ' ' || prev > '~') && !first) { 246 os->txt[os->txtlen] = ' '; 247 os->txtlen++; 248 } 249 250 // add the symbol itself 251 os->txt[os->txtlen] = b; 252 os->txtlen++; 253 } 254 255 prev = b; 256 } 257 } 258 259 // write_plain_uint is the unstyled counterpart of func write_styled_uint 260 void write_plain_uint(bufwriter* w, size_t n) { 261 if (n < 1) { 262 EMIT_CONST(w, " 0"); 263 return; 264 } 265 266 size_t digits; 267 // 20 is the most digits unsigned 64-bit ints can ever need 268 unsigned char buf[24]; 269 for (digits = 0; n > 0; digits++, n /= 10) { 270 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 271 } 272 273 // left-pad the coming digits up to 8 chars 274 if (digits < 8) { 275 write_bytes(w, (unsigned char*)" ", 8 - digits); 276 } 277 278 // emit all digits 279 unsigned char* start = buf + sizeof(buf) - digits; 280 write_bytes(w, start, digits); 281 } 282 283 // write_styled_uint is a quick way to emit the offset-counter showing at the 284 // start of each line; it assumes 8-item left-padding of values, unless the 285 // numbers are too big for that 286 void write_styled_uint(bufwriter* w, size_t n) { 287 if (n < 1) { 288 EMIT_CONST(w, " 0"); 289 return; 290 } 291 292 size_t digits; 293 // 20 is the most digits unsigned 64-bit ints can ever need 294 unsigned char buf[24]; 295 for (digits = 0; n > 0; digits++, n /= 10) { 296 buf[sizeof(buf) - 1 - digits] = (n % 10) + '0'; 297 } 298 299 // left-pad the coming digits up to 8 chars 300 if (digits < 8) { 301 write_bytes(w, (unsigned char*)" ", 8 - digits); 302 } 303 304 // now emit the leading digits, which may be fewer than 3 305 size_t leading = digits % 3; 306 unsigned char* start = buf + sizeof(buf) - digits; 307 write_bytes(w, start, leading); 308 start += leading; 309 digits -= leading; 310 311 // now emit all remaining digits in groups of 3, alternating styles 312 bool styled = leading != 0; 313 for (; digits > 0; start += 3, digits -= 3, styled = !styled) { 314 if (styled) { 315 EMIT_CONST(w, "\x1b[38;5;243m"); 316 write_bytes(w, start, 3); 317 EMIT_CONST(w, "\x1b[0m"); 318 } else { 319 write_bytes(w, start, 3); 320 } 321 } 322 } 323 324 // emit_styled_file_info emits an ANSI-styled line showing a filename and the 325 // file's size in bytes 326 void emit_styled_file_info(bufwriter* w, const char* path, size_t nbytes) { 327 EMIT_CONST(w, "• "); 328 write_bytes(w, (unsigned char*)path, strlen(path)); 329 EMIT_CONST(w, " \x1b[38;5;245m("); 330 write_commas_uint(w, nbytes); 331 EMIT_CONST(w, " bytes)\x1b[0m\n"); 332 } 333 334 // emit_plain_file_info is the unstyled counterpart of func emit_styled_file_info 335 void emit_plain_file_info(bufwriter* w, const char* path, size_t nbytes) { 336 EMIT_CONST(w, "• "); 337 write_bytes(w, (unsigned char*)path, strlen(path)); 338 EMIT_CONST(w, " ("); 339 write_commas_uint(w, nbytes); 340 EMIT_CONST(w, " bytes)\n"); 341 } 342 343 // emit_styled_line handles the details of showing a styled line out of the current 344 // input-buffer chunk 345 void emit_styled_line(bufwriter* w, size_t i, size_t end, output_state* os) { 346 for (size_t j = i; j < end; j++, os->offset++) { 347 const unsigned char b = os->buf[j]; 348 349 if (j % os->linewidth == 0) { 350 // show a ruler every few lines to make eye-scanning easier 351 if (os->lines % 5 == 0 && os->lines > 0) { 352 EMIT_CONST(w, " \x1b[38;5;245m"); 353 ruler(w, os->linewidth); 354 EMIT_CONST(w, "\x1b[0m\n"); 355 } 356 os->lines++; 357 358 // start next line with offset of its 1st item, also 359 // changing the background color for the colored hex 360 // code which will follow 361 // fprintf(stdout, "%8d", os->offset); 362 write_styled_uint(w, os->offset); 363 EMIT_CONST(w, " \x1b[48;5;254m"); 364 } 365 366 // show the current byte `with style` 367 write_styled_hex(w, b); 368 // const unsigned char* s = (const unsigned char*)styled_hex_results[b]; 369 // write_bytes(w, s, styled_hex_lengths[b]); 370 } 371 372 if (os->showtxt) { 373 EMIT_CONST(w, "\x1b[0m "); 374 for (size_t j = end - i; j < os->linewidth; j++) { 375 EMIT_CONST(w, " "); 376 } 377 378 write_bytes(w, os->txt, os->txtlen); 379 write_byte(w, '\n'); 380 return; 381 } 382 EMIT_CONST(w, "\x1b[0m\n"); 383 } 384 385 // emit_plain_line handles the details of showing a plain (unstyled) line out 386 // of the current input-buffer chunk 387 void emit_plain_line(bufwriter* w, size_t i, size_t end, output_state* os) { 388 for (size_t j = i; j < end; j++, os->offset++) { 389 const unsigned char b = os->buf[j]; 390 391 if (j % os->linewidth == 0) { 392 // show a ruler every few lines to make eye-scanning easier 393 if (os->lines % 5 == 0 && os->lines > 0) { 394 // EMIT_CONST(w, " "); 395 // ruler(w, os->linewidth); 396 write_byte(w, '\n'); 397 } 398 os->lines++; 399 400 // start next line with offset of its 1st item, also 401 // changing the background color for the colored hex 402 // code which will follow 403 // fprintf(stdout, "%8d", os->offset); 404 write_plain_uint(w, os->offset); 405 EMIT_CONST(w, " "); 406 } 407 408 // show the current byte `with style` 409 write_hex(w, b); 410 write_byte(w, ' '); 411 } 412 413 if (os->showtxt) { 414 EMIT_CONST(w, " "); 415 for (size_t j = end - i; j < os->linewidth; j++) { 416 EMIT_CONST(w, " "); 417 } 418 419 write_bytes(w, os->txt, os->txtlen); 420 write_byte(w, '\n'); 421 return; 422 } 423 write_byte(w, '\n'); 424 } 425 426 // config has all the settings used to emit output 427 typedef struct config { 428 // bytes_per_line determines the `width` of output lines 429 size_t bytes_per_line; 430 431 // emit_file_info is chosen to emit file-info with colors or plainly 432 void (*emit_file_info)(bufwriter* w, const char* path, size_t nbytes); 433 434 // emit_line is chosen to emit hex bytes with colors or plainly 435 void (*emit_line)(bufwriter* w, size_t i, size_t end, output_state* os); 436 } config; 437 438 // handle_reader shows all bytes read from the source given as colored hex 439 // values, showing offsets and ASCII symbols on the sides of each output line 440 void handle_reader(bufwriter* w, FILE* src, config cfg) { 441 const size_t bufcap = 32 * 1024; 442 // limit line-width to the buffer's capacity 443 if (cfg.bytes_per_line > bufcap) { 444 cfg.bytes_per_line = bufcap; 445 } 446 447 const size_t two_lines = 2 * cfg.bytes_per_line; 448 unsigned char txt[two_lines]; 449 450 unsigned char buf[bufcap]; 451 // ensure the effective buffer-size is a multiple of the line-width 452 size_t max = bufcap - bufcap % cfg.bytes_per_line; 453 454 output_state os; 455 os.buf = buf; 456 os.linewidth = cfg.bytes_per_line; 457 os.lines = 0; 458 os.offset = 0; 459 os.txt = txt; 460 461 const size_t one_line = cfg.bytes_per_line; 462 463 while (!w->done) { 464 os.buflen = fread(&buf, sizeof(unsigned char), max, src); 465 if (os.buflen < 1) { 466 // assume input is over when no bytes were read 467 return; 468 } 469 470 for (size_t i = 0; i < os.buflen; i += one_line) { 471 size_t end; 472 473 // remember all ASCII symbols in current pair of output lines 474 end = i + two_lines < os.buflen ? i + two_lines : os.buflen; 475 peek_ascii(i, end, &os); 476 477 // show current output line 478 end = i + one_line < os.buflen ? i + one_line : os.buflen; 479 cfg.emit_line(w, i, end, &os); 480 } 481 } 482 } 483 484 // handle_file handles data from the filename given; returns false only when 485 // the file can't be opened 486 bool handle_file(bufwriter* w, const char* path, config cfg) { 487 // a `-` filename stands for the standard input 488 if (strcmp(path, "-") == 0) { 489 EMIT_CONST(w, "• <stdin>\n"); 490 EMIT_CONST(w, "\n"); 491 handle_reader(w, stdin, cfg); 492 return true; 493 } 494 495 FILE* f = fopen(path, "rb"); 496 if (f == NULL) { 497 // ensure currently-buffered/deferred output shows up right now: not 498 // doing so may scramble results in the common case where stdout and 499 // stderr are the same, thus confusing users 500 flush(w); 501 502 fprintf(stderr, "\x1b[31mcan't open file named %s\x1b[0m\n", path); 503 return false; 504 } 505 506 // get the file size 507 struct stat st; 508 fstat(fileno(f), &st); 509 510 // show output 511 cfg.emit_file_info(w, path, st.st_size); 512 EMIT_CONST(w, "\n"); 513 handle_reader(w, f, cfg); 514 515 fclose(f); 516 return true; 517 } 518 519 // is_help_option simplifies control-flow for func run 520 bool is_help_option(char* s) { 521 return false || 522 strcmp(s, "-h") == 0 || 523 strcmp(s, "-help") == 0 || 524 strcmp(s, "--h") == 0 || 525 strcmp(s, "--help") == 0; 526 } 527 528 // is_plain_option simplifies control-flow for func run 529 bool is_plain_option(char* s) { 530 return false || 531 strcmp(s, "-p") == 0 || 532 strcmp(s, "-plain") == 0 || 533 strcmp(s, "--p") == 0 || 534 strcmp(s, "--plain") == 0; 535 } 536 537 // run returns the number of errors 538 size_t run(int argc, char** argv) { 539 config cfg; 540 cfg.bytes_per_line = 16; 541 cfg.emit_line = &emit_styled_line; 542 cfg.emit_file_info = &emit_styled_file_info; 543 544 // handle special cmd-line options and count filenames 545 size_t fnames = 0; 546 for (size_t i = 1; i < argc; i++) { 547 if (is_help_option(argv[i])) { 548 // help option is handled right away, also quitting the app 549 fprintf(stderr, "%s", info); 550 return 0; 551 } 552 if (is_plain_option(argv[i])) { 553 cfg.emit_line = &emit_plain_line; 554 cfg.emit_file_info = &emit_plain_file_info; 555 continue; 556 } 557 fnames++; 558 } 559 560 bufwriter w = new_bufwriter(stdout, 32 * 1024); 561 562 // no filenames means use stdin as the only input 563 if (fnames == 0) { 564 EMIT_CONST(&w, "• <stdin>\n"); 565 EMIT_CONST(&w, "\n"); 566 handle_reader(&w, stdin, cfg); 567 close_bufwriter(&w); 568 return 0; 569 } 570 571 size_t errors = 0; 572 bool first_file = true; 573 574 // handle all filenames given 575 for (size_t i = 1; i < argc && !w.done; i++) { 576 if (i == 1 && is_plain_option(argv[i])) { 577 // special cmd-line options aren't filenames 578 continue; 579 } 580 581 if (!first_file) { 582 // put an empty line between adjacent hex outputs 583 write_byte(&w, '\n'); 584 } 585 586 if (!handle_file(&w, argv[i], cfg)) { 587 errors++; 588 } 589 first_file = false; 590 } 591 592 close_bufwriter(&w); 593 return errors; 594 } 595 596 int main(int argc, char** argv) { 597 #ifdef _WIN32 598 setmode(fileno(stdin), O_BINARY); 599 // ensure output lines end in LF instead of CRLF on windows 600 setmode(fileno(stdout), O_BINARY); 601 setmode(fileno(stderr), O_BINARY); 602 #endif 603 604 // disable automatic stdio buffering, in favor of explicit buffering 605 setvbuf(stdin, NULL, _IONBF, 0); 606 setvbuf(stdout, NULL, _IONBF, 0); 607 setvbuf(stderr, NULL, _IONBF, 0); 608 609 // for (size_t i = 0; i < 256; i++) { 610 // styled_hex_lengths[i] = strlen(styled_hex_results[i]); 611 // } 612 return run(argc, argv) == 0 ? 0 : 1; 613 }