File: bytedump.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath bytedump.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "fmt"
  37     "io"
  38     "math"
  39     "os"
  40     "strconv"
  41     "strings"
  42 )
  43 
  44 const info = `
  45 bytedump [options...] [filenames...]
  46 
  47 Show bytes as hexadecimal and ascii on the side.
  48 
  49 Each line shows the starting offset for the bytes shown, 16 of the bytes
  50 themselves in base-16 notation, and any ASCII codes when the byte values
  51 are in the typical ASCII range.
  52 
  53 The ASCII codes always include 2 rows, which makes the output more 'grep'
  54 friendly, since strings up to 32 items can't be accidentally missed. The
  55 offsets shown are base-16.
  56 `
  57 
  58 const perLine = 16
  59 
  60 // hexSymbols is a direct lookup table combining 2 hex digits with either a
  61 // space or a displayable ASCII symbol matching the byte's own ASCII value;
  62 // this table was autogenerated by running the command
  63 //
  64 // seq 0 255 | ./hex-symbols.awk
  65 var hexSymbols = [256]string{
  66     `00 `, `01 `, `02 `, `03 `, `04 `, `05 `, `06 `, `07 `,
  67     `08 `, `09 `, `0a `, `0b `, `0c `, `0d `, `0e `, `0f `,
  68     `10 `, `11 `, `12 `, `13 `, `14 `, `15 `, `16 `, `17 `,
  69     `18 `, `19 `, `1a `, `1b `, `1c `, `1d `, `1e `, `1f `,
  70     `20 `, `21!`, `22"`, `23#`, `24$`, `25%`, `26&`, `27'`,
  71     `28(`, `29)`, `2a*`, `2b+`, `2c,`, `2d-`, `2e.`, `2f/`,
  72     `300`, `311`, `322`, `333`, `344`, `355`, `366`, `377`,
  73     `388`, `399`, `3a:`, `3b;`, `3c<`, `3d=`, `3e>`, `3f?`,
  74     `40@`, `41A`, `42B`, `43C`, `44D`, `45E`, `46F`, `47G`,
  75     `48H`, `49I`, `4aJ`, `4bK`, `4cL`, `4dM`, `4eN`, `4fO`,
  76     `50P`, `51Q`, `52R`, `53S`, `54T`, `55U`, `56V`, `57W`,
  77     `58X`, `59Y`, `5aZ`, `5b[`, `5c\`, `5d]`, `5e^`, `5f_`,
  78     "60`", `61a`, `62b`, `63c`, `64d`, `65e`, `66f`, `67g`,
  79     `68h`, `69i`, `6aj`, `6bk`, `6cl`, `6dm`, `6en`, `6fo`,
  80     `70p`, `71q`, `72r`, `73s`, `74t`, `75u`, `76v`, `77w`,
  81     `78x`, `79y`, `7az`, `7b{`, `7c|`, `7d}`, `7e~`, `7f `,
  82     `80 `, `81 `, `82 `, `83 `, `84 `, `85 `, `86 `, `87 `,
  83     `88 `, `89 `, `8a `, `8b `, `8c `, `8d `, `8e `, `8f `,
  84     `90 `, `91 `, `92 `, `93 `, `94 `, `95 `, `96 `, `97 `,
  85     `98 `, `99 `, `9a `, `9b `, `9c `, `9d `, `9e `, `9f `,
  86     `a0 `, `a1 `, `a2 `, `a3 `, `a4 `, `a5 `, `a6 `, `a7 `,
  87     `a8 `, `a9 `, `aa `, `ab `, `ac `, `ad `, `ae `, `af `,
  88     `b0 `, `b1 `, `b2 `, `b3 `, `b4 `, `b5 `, `b6 `, `b7 `,
  89     `b8 `, `b9 `, `ba `, `bb `, `bc `, `bd `, `be `, `bf `,
  90     `c0 `, `c1 `, `c2 `, `c3 `, `c4 `, `c5 `, `c6 `, `c7 `,
  91     `c8 `, `c9 `, `ca `, `cb `, `cc `, `cd `, `ce `, `cf `,
  92     `d0 `, `d1 `, `d2 `, `d3 `, `d4 `, `d5 `, `d6 `, `d7 `,
  93     `d8 `, `d9 `, `da `, `db `, `dc `, `dd `, `de `, `df `,
  94     `e0 `, `e1 `, `e2 `, `e3 `, `e4 `, `e5 `, `e6 `, `e7 `,
  95     `e8 `, `e9 `, `ea `, `eb `, `ec `, `ed `, `ee `, `ef `,
  96     `f0 `, `f1 `, `f2 `, `f3 `, `f4 `, `f5 `, `f6 `, `f7 `,
  97     `f8 `, `f9 `, `fa `, `fb `, `fc `, `fd `, `fe `, `ff `,
  98 }
  99 
 100 func main() {
 101     args := os.Args[1:]
 102 
 103     if len(args) > 0 {
 104         switch args[0] {
 105         case `-h`, `--h`, `-help`, `--help`:
 106             os.Stdout.WriteString(info[1:])
 107             return
 108         }
 109     }
 110 
 111     if len(args) > 0 && args[0] == `--` {
 112         args = args[1:]
 113     }
 114 
 115     if err := run(args); err != nil {
 116         os.Stdout.WriteString(err.Error())
 117         os.Stdout.WriteString("\n")
 118         os.Exit(1)
 119     }
 120 }
 121 
 122 func run(args []string) error {
 123     w := bufio.NewWriterSize(os.Stdout, 32*1024)
 124     defer w.Flush()
 125 
 126     // with no filenames given, handle stdin and quit
 127     if len(args) == 0 {
 128         return handle(w, os.Stdin, `<stdin>`, -1)
 129     }
 130 
 131     for i, fname := range args {
 132         if i > 0 {
 133             w.WriteString("\n")
 134             w.WriteString("\n")
 135         }
 136 
 137         if err := handleFile(w, fname); err != nil {
 138             return err
 139         }
 140     }
 141 
 142     return nil
 143 }
 144 
 145 func handleFile(w *bufio.Writer, fname string) error {
 146     f, err := os.Open(fname)
 147     if err != nil {
 148         return err
 149     }
 150     defer f.Close()
 151 
 152     stat, err := f.Stat()
 153     if err != nil {
 154         return handle(w, f, fname, -1)
 155     }
 156 
 157     fsize := int(stat.Size())
 158     return handle(w, f, fname, fsize)
 159 }
 160 
 161 // handle shows some messages related to the input and the cmd-line options
 162 // used, and then follows them by the hexadecimal byte-view
 163 func handle(w *bufio.Writer, r io.Reader, name string, size int) error {
 164     owidth10 := -1
 165     owidth16 := -1
 166     if size > 0 {
 167         w10 := math.Log10(float64(size))
 168         w10 = math.Max(math.Ceil(w10), 1)
 169         w16 := math.Log2(float64(size)) / 4
 170         w16 = math.Max(math.Ceil(w16), 1)
 171         owidth10 = int(w10)
 172         owidth16 = int(w16)
 173     }
 174 
 175     if owidth10 < 0 {
 176         owidth10 = 8
 177     }
 178     if owidth16 < 0 {
 179         owidth16 = 8
 180     }
 181 
 182     rc := rendererConfig{
 183         out:           w,
 184         offsetWidth10: max(owidth10, 8),
 185         offsetWidth16: max(owidth16, 8),
 186     }
 187 
 188     if size < 0 {
 189         fmt.Fprintf(w, "%s\n", name)
 190     } else {
 191         const fs = "%s  (%s bytes)\n"
 192         fmt.Fprintf(w, fs, name, sprintCommas(size))
 193     }
 194     w.WriteByte('\n')
 195 
 196     // calling func Read directly can sometimes result in chunks shorter
 197     // than the max chunk-size, even when there are plenty of bytes yet
 198     // to read; to avoid that, use a buffered-reader to explicitly fill
 199     // a slice instead
 200     br := bufio.NewReader(r)
 201 
 202     // to show ASCII up to 1 full chunk ahead, 2 chunks are needed
 203     cur := make([]byte, 0, perLine)
 204     ahead := make([]byte, 0, perLine)
 205 
 206     // the ASCII-panel's wide output requires staying 1 step/chunk behind,
 207     // so to speak
 208     cur, err := fillChunk(cur[:0], perLine, br)
 209     if len(cur) == 0 {
 210         if err == io.EOF {
 211             err = nil
 212         }
 213         return err
 214     }
 215 
 216     for {
 217         ahead, err := fillChunk(ahead[:0], perLine, br)
 218         if err != nil && err != io.EOF {
 219             return err
 220         }
 221 
 222         if len(ahead) == 0 {
 223             // done, maybe except for an extra line of output
 224             break
 225         }
 226 
 227         // show the byte-chunk on its own output line
 228         if err := writeChunk(rc, cur, ahead); err != nil {
 229             return io.EOF
 230         }
 231 
 232         rc.chunks++
 233         rc.offset += uint(len(cur))
 234         cur = cur[:copy(cur, ahead)]
 235     }
 236 
 237     // don't forget the last output line
 238     if rc.chunks > 0 && len(cur) > 0 {
 239         return writeChunk(rc, cur, nil)
 240     }
 241     return nil
 242 }
 243 
 244 // fillChunk tries to read the number of bytes given, appending them to the
 245 // byte-slice given; this func returns an EOF error only when no bytes are
 246 // read, which somewhat simplifies error-handling for the func caller
 247 func fillChunk(chunk []byte, n int, br *bufio.Reader) ([]byte, error) {
 248     // read buffered-bytes up to the max chunk-size
 249     for i := 0; i < n; i++ {
 250         b, err := br.ReadByte()
 251         if err == nil {
 252             chunk = append(chunk, b)
 253             continue
 254         }
 255 
 256         if err == io.EOF && i > 0 {
 257             return chunk, nil
 258         }
 259         return chunk, err
 260     }
 261 
 262     // got the full byte-count asked for
 263     return chunk, nil
 264 }
 265 
 266 // rendererConfig groups several arguments given to any of the rendering funcs
 267 type rendererConfig struct {
 268     // out is writer to send all output to
 269     out *bufio.Writer
 270 
 271     // offset is the byte-offset of the first byte shown on the current output
 272     // line: if shown at all, it's shown at the start the line
 273     offset uint
 274 
 275     // chunks is the 0-based counter for byte-chunks/lines shown so far, which
 276     // indirectly keeps track of when it's time to show a `breather` line
 277     chunks uint
 278 
 279     // offsetWidth10 is the max string-width for the base-10 byte-offsets
 280     // shown at the start of output lines, and determines those values'
 281     // left-padding
 282     offsetWidth10 int
 283 
 284     // offsetWidth16 is the max string-width for the base-16 byte-offsets
 285     // shown at the start of output lines, and determines those values'
 286     // left-padding
 287     offsetWidth16 int
 288 }
 289 
 290 // loopThousandsGroups comes from my lib/package `mathplus`: that's why it
 291 // handles negatives, even though this app only uses it with non-negatives.
 292 func loopThousandsGroups(n int, fn func(i, n int)) {
 293     // 0 doesn't have a log10
 294     if n == 0 {
 295         fn(0, 0)
 296         return
 297     }
 298 
 299     sign := +1
 300     if n < 0 {
 301         n = -n
 302         sign = -1
 303     }
 304 
 305     intLog1000 := int(math.Log10(float64(n)) / 3)
 306     remBase := int(math.Pow10(3 * intLog1000))
 307 
 308     for i := 0; remBase > 0; i++ {
 309         group := (1000 * n) / remBase / 1000
 310         fn(i, sign*group)
 311         // if original number was negative, ensure only first
 312         // group gives a negative input to the callback
 313         sign = +1
 314 
 315         n %= remBase
 316         remBase /= 1000
 317     }
 318 }
 319 
 320 // sprintCommas turns the non-negative number given into a readable string,
 321 // where digits are grouped-separated by commas
 322 func sprintCommas(n int) string {
 323     var sb strings.Builder
 324     loopThousandsGroups(n, func(i, n int) {
 325         if i == 0 {
 326             var buf [4]byte
 327             sb.Write(strconv.AppendInt(buf[:0], int64(n), 10))
 328             return
 329         }
 330         sb.WriteByte(',')
 331         writePad0Sub1000Counter(&sb, uint(n))
 332     })
 333     return sb.String()
 334 }
 335 
 336 // writePad0Sub1000Counter is an alternative to fmt.Fprintf(w, `%03d`, n)
 337 func writePad0Sub1000Counter(w io.Writer, n uint) {
 338     // precondition is 0...999
 339     if n > 999 {
 340         w.Write([]byte(`???`))
 341         return
 342     }
 343 
 344     var buf [3]byte
 345     buf[0] = byte(n/100) + '0'
 346     n %= 100
 347     buf[1] = byte(n/10) + '0'
 348     buf[2] = byte(n%10) + '0'
 349     w.Write(buf[:])
 350 }
 351 
 352 // writeHex is faster than calling fmt.Fprintf(w, `%02x`, b): this
 353 // matters because it's called for every byte of input which isn't
 354 // all 0s or all 1s
 355 func writeHex(w *bufio.Writer, b byte) {
 356     const hexDigits = `0123456789abcdef`
 357     w.WriteByte(hexDigits[b>>4])
 358     w.WriteByte(hexDigits[b&0x0f])
 359 }
 360 
 361 // padding is the padding/spacing emitted across each output line
 362 const padding = 2
 363 
 364 func writeChunk(rc rendererConfig, first, second []byte) error {
 365     w := rc.out
 366 
 367     // start each line with the byte-offset for the 1st item shown on it
 368     // writeDecimalCounter(w, rc.offsetWidth10, rc.offset)
 369     // w.WriteByte(' ')
 370 
 371     // start each line with the byte-offset for the 1st item shown on it
 372     writeHexadecimalCounter(w, rc.offsetWidth16, rc.offset)
 373     w.WriteByte(' ')
 374 
 375     for _, b := range first {
 376         // fmt.Fprintf(w, ` %02x`, b)
 377         //
 378         // the commented part above was a performance bottleneck, since
 379         // the slow/generic fmt.Fprintf was called for each input byte
 380         w.WriteByte(' ')
 381         writeHex(w, b)
 382     }
 383 
 384     writeASCII(w, first, second, perLine)
 385     return w.WriteByte('\n')
 386 }
 387 
 388 // writeDecimalCounter just emits a left-padded number
 389 func writeDecimalCounter(w *bufio.Writer, width int, n uint) {
 390     var buf [24]byte
 391     str := strconv.AppendUint(buf[:0], uint64(n), 10)
 392     writeSpaces(w, width-len(str))
 393     w.Write(str)
 394 }
 395 
 396 // writeHexadecimalCounter just emits a zero-padded base-16 number
 397 func writeHexadecimalCounter(w *bufio.Writer, width int, n uint) {
 398     var buf [24]byte
 399     str := strconv.AppendUint(buf[:0], uint64(n), 16)
 400     // writeSpaces(w, width-len(str))
 401     for i := 0; i < width-len(str); i++ {
 402         w.WriteByte('0')
 403     }
 404     w.Write(str)
 405 }
 406 
 407 // writeSpaces bulk-emits the number of spaces given
 408 func writeSpaces(w *bufio.Writer, n int) {
 409     const spaces = `                                `
 410     for ; n > len(spaces); n -= len(spaces) {
 411         w.WriteString(spaces)
 412     }
 413     if n > 0 {
 414         w.WriteString(spaces[:n])
 415     }
 416 }
 417 
 418 // writeASCII emits the side-panel showing all ASCII runs for each line
 419 func writeASCII(w *bufio.Writer, first, second []byte, perline int) {
 420     spaces := padding + 3*(perline-len(first))
 421 
 422     for _, b := range first {
 423         if 32 < b && b < 127 {
 424             writeSpaces(w, spaces)
 425             w.WriteByte(b)
 426             spaces = 0
 427         } else {
 428             spaces++
 429         }
 430     }
 431 
 432     for _, b := range second {
 433         if 32 < b && b < 127 {
 434             writeSpaces(w, spaces)
 435             w.WriteByte(b)
 436             spaces = 0
 437         } else {
 438             spaces++
 439         }
 440     }
 441 }