File: bytedump.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath bytedump.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "bytes"
  37     "errors"
  38     "fmt"
  39     "io"
  40     "math"
  41     "os"
  42     "strconv"
  43     "strings"
  44 )
  45 
  46 const info = `
  47 bytedump [options...] [filenames...]
  48 
  49 Show bytes as hexadecimal and ascii on the side.
  50 
  51 Each line shows the starting offset for the bytes shown, 16 of the bytes
  52 themselves in base-16 notation, and any ASCII codes when the byte values
  53 are in the typical ASCII range. The offsets shown are base-10.
  54 `
  55 
  56 const perLine = 16
  57 
  58 // errNoMoreOutput is a dummy error whose message is ignored, and which
  59 // causes the app to quit immediately and successfully
  60 var errNoMoreOutput = errors.New(`no more output`)
  61 
  62 // hexSymbols is a direct lookup table combining 2 hex digits with either a
  63 // space or a displayable ASCII symbol matching the byte's own ASCII value;
  64 // this table was autogenerated by running the command
  65 //
  66 // seq 0 255 | ./hex-symbols.awk
  67 var hexSymbols = [256]string{
  68     `00 `, `01 `, `02 `, `03 `, `04 `, `05 `, `06 `, `07 `,
  69     `08 `, `09 `, `0a `, `0b `, `0c `, `0d `, `0e `, `0f `,
  70     `10 `, `11 `, `12 `, `13 `, `14 `, `15 `, `16 `, `17 `,
  71     `18 `, `19 `, `1a `, `1b `, `1c `, `1d `, `1e `, `1f `,
  72     `20 `, `21!`, `22"`, `23#`, `24$`, `25%`, `26&`, `27'`,
  73     `28(`, `29)`, `2a*`, `2b+`, `2c,`, `2d-`, `2e.`, `2f/`,
  74     `300`, `311`, `322`, `333`, `344`, `355`, `366`, `377`,
  75     `388`, `399`, `3a:`, `3b;`, `3c<`, `3d=`, `3e>`, `3f?`,
  76     `40@`, `41A`, `42B`, `43C`, `44D`, `45E`, `46F`, `47G`,
  77     `48H`, `49I`, `4aJ`, `4bK`, `4cL`, `4dM`, `4eN`, `4fO`,
  78     `50P`, `51Q`, `52R`, `53S`, `54T`, `55U`, `56V`, `57W`,
  79     `58X`, `59Y`, `5aZ`, `5b[`, `5c\`, `5d]`, `5e^`, `5f_`,
  80     "60`", `61a`, `62b`, `63c`, `64d`, `65e`, `66f`, `67g`,
  81     `68h`, `69i`, `6aj`, `6bk`, `6cl`, `6dm`, `6en`, `6fo`,
  82     `70p`, `71q`, `72r`, `73s`, `74t`, `75u`, `76v`, `77w`,
  83     `78x`, `79y`, `7az`, `7b{`, `7c|`, `7d}`, `7e~`, `7f `,
  84     `80 `, `81 `, `82 `, `83 `, `84 `, `85 `, `86 `, `87 `,
  85     `88 `, `89 `, `8a `, `8b `, `8c `, `8d `, `8e `, `8f `,
  86     `90 `, `91 `, `92 `, `93 `, `94 `, `95 `, `96 `, `97 `,
  87     `98 `, `99 `, `9a `, `9b `, `9c `, `9d `, `9e `, `9f `,
  88     `a0 `, `a1 `, `a2 `, `a3 `, `a4 `, `a5 `, `a6 `, `a7 `,
  89     `a8 `, `a9 `, `aa `, `ab `, `ac `, `ad `, `ae `, `af `,
  90     `b0 `, `b1 `, `b2 `, `b3 `, `b4 `, `b5 `, `b6 `, `b7 `,
  91     `b8 `, `b9 `, `ba `, `bb `, `bc `, `bd `, `be `, `bf `,
  92     `c0 `, `c1 `, `c2 `, `c3 `, `c4 `, `c5 `, `c6 `, `c7 `,
  93     `c8 `, `c9 `, `ca `, `cb `, `cc `, `cd `, `ce `, `cf `,
  94     `d0 `, `d1 `, `d2 `, `d3 `, `d4 `, `d5 `, `d6 `, `d7 `,
  95     `d8 `, `d9 `, `da `, `db `, `dc `, `dd `, `de `, `df `,
  96     `e0 `, `e1 `, `e2 `, `e3 `, `e4 `, `e5 `, `e6 `, `e7 `,
  97     `e8 `, `e9 `, `ea `, `eb `, `ec `, `ed `, `ee `, `ef `,
  98     `f0 `, `f1 `, `f2 `, `f3 `, `f4 `, `f5 `, `f6 `, `f7 `,
  99     `f8 `, `f9 `, `fa `, `fb `, `fc `, `fd `, `fe `, `ff `,
 100 }
 101 
 102 func main() {
 103     args := os.Args[1:]
 104 
 105     if len(args) > 0 {
 106         switch args[0] {
 107         case `-h`, `--h`, `-help`, `--help`:
 108             os.Stdout.WriteString(info[1:])
 109             return
 110         }
 111     }
 112 
 113     if len(args) > 0 && args[0] == `--` {
 114         args = args[1:]
 115     }
 116 
 117     if err := run(args); err != nil && err != errNoMoreOutput {
 118         os.Stdout.WriteString(err.Error())
 119         os.Stdout.WriteString("\n")
 120         os.Exit(1)
 121     }
 122 }
 123 
 124 func run(args []string) error {
 125     w := bufio.NewWriterSize(os.Stdout, 32*1024)
 126     defer w.Flush()
 127 
 128     // with no filenames given, handle stdin and quit
 129     if len(args) == 0 {
 130         return handle(w, os.Stdin, `<stdin>`, -1)
 131     }
 132 
 133     for i, fname := range args {
 134         if i > 0 {
 135             w.WriteString("\n")
 136             w.WriteString("\n")
 137         }
 138 
 139         if err := handleFile(w, fname); err != nil {
 140             return err
 141         }
 142     }
 143 
 144     return nil
 145 }
 146 
 147 func handleFile(w *bufio.Writer, fname string) error {
 148     f, err := os.Open(fname)
 149     if err != nil {
 150         return err
 151     }
 152     defer f.Close()
 153 
 154     stat, err := f.Stat()
 155     if err != nil {
 156         return handle(w, f, fname, -1)
 157     }
 158 
 159     fsize := int(stat.Size())
 160     return handle(w, f, fname, fsize)
 161 }
 162 
 163 // handle shows some messages related to the input and the cmd-line options
 164 // used, and then follows them by the hexadecimal byte-view
 165 func handle(w *bufio.Writer, r io.Reader, name string, size int) error {
 166     owidth := -1
 167     if size > 0 {
 168         w := math.Log10(float64(size))
 169         w = math.Max(math.Ceil(w), 1)
 170         owidth = int(w)
 171     }
 172     if owidth < 0 {
 173         owidth = 8
 174     }
 175 
 176     rc := rendererConfig{
 177         out:         w,
 178         ruler:       makeRuler(perLine),
 179         offsetWidth: owidth,
 180     }
 181 
 182     if size < 0 {
 183         fmt.Fprintf(w, "%s\n", name)
 184     } else {
 185         const fs = "%s  (%s bytes)\n"
 186         fmt.Fprintf(w, fs, name, sprintCommas(size))
 187     }
 188     w.WriteByte('\n')
 189 
 190     // when done, emit a new line in case only part of the last line is
 191     // shown, which means no newline was emitted for it
 192     defer w.WriteString("\n")
 193 
 194     // calling func Read directly can sometimes result in chunks shorter
 195     // than the max chunk-size, even when there are plenty of bytes yet
 196     // to read; to avoid that, use a buffered-reader to explicitly fill
 197     // a slice instead
 198     br := bufio.NewReader(r)
 199 
 200     // to show ASCII up to 1 full chunk ahead, 2 chunks are needed
 201     cur := make([]byte, 0, perLine)
 202     ahead := make([]byte, 0, perLine)
 203 
 204     // the ASCII-panel's wide output requires staying 1 step/chunk behind,
 205     // so to speak
 206     cur, err := fillChunk(cur[:0], perLine, br)
 207     if len(cur) == 0 {
 208         if err == io.EOF {
 209             err = nil
 210         }
 211         return err
 212     }
 213 
 214     for {
 215         ahead, err := fillChunk(ahead[:0], perLine, br)
 216         if err != nil && err != io.EOF {
 217             return err
 218         }
 219 
 220         if len(ahead) == 0 {
 221             // done, maybe except for an extra line of output
 222             break
 223         }
 224 
 225         // show the byte-chunk on its own output line
 226         if err := writeChunk(rc, cur, ahead); err != nil {
 227             return errNoMoreOutput
 228         }
 229 
 230         rc.chunks++
 231         rc.offset += uint(len(cur))
 232         cur = cur[:copy(cur, ahead)]
 233     }
 234 
 235     // don't forget the last output line
 236     if rc.chunks > 0 && len(cur) > 0 {
 237         return writeChunk(rc, cur, nil)
 238     }
 239     return nil
 240 }
 241 
 242 // fillChunk tries to read the number of bytes given, appending them to the
 243 // byte-slice given; this func returns an EOF error only when no bytes are
 244 // read, which somewhat simplifies error-handling for the func caller
 245 func fillChunk(chunk []byte, n int, br *bufio.Reader) ([]byte, error) {
 246     // read buffered-bytes up to the max chunk-size
 247     for i := 0; i < n; i++ {
 248         b, err := br.ReadByte()
 249         if err == nil {
 250             chunk = append(chunk, b)
 251             continue
 252         }
 253 
 254         if err == io.EOF && i > 0 {
 255             return chunk, nil
 256         }
 257         return chunk, err
 258     }
 259 
 260     // got the full byte-count asked for
 261     return chunk, nil
 262 }
 263 
 264 // makeRuler prerenders a ruler-line, used to make the output lines breathe
 265 func makeRuler(numitems int) []byte {
 266     if n := numitems / 4; n > 0 {
 267         var pat = []byte(`           ·`)
 268         return bytes.Repeat(pat, n-1)
 269     }
 270     return nil
 271 }
 272 
 273 // rendererConfig groups several arguments given to any of the rendering funcs
 274 type rendererConfig struct {
 275     // out is writer to send all output to
 276     out *bufio.Writer
 277 
 278     // offset is the byte-offset of the first byte shown on the current output
 279     // line: if shown at all, it's shown at the start the line
 280     offset uint
 281 
 282     // chunks is the 0-based counter for byte-chunks/lines shown so far, which
 283     // indirectly keeps track of when it's time to show a `breather` line
 284     chunks uint
 285 
 286     // ruler is the `ruler` content to show on `breather` lines
 287     ruler []byte
 288 
 289     // perLine is how many hex-encoded bytes are shown per line
 290     perLine uint
 291 
 292     // offsetWidth is the max string-width for the byte-offsets shown at the
 293     // start of output lines, and determines those values' left-padding
 294     offsetWidth int
 295 }
 296 
 297 // loopThousandsGroups comes from my lib/package `mathplus`: that's why it
 298 // handles negatives, even though this app only uses it with non-negatives.
 299 func loopThousandsGroups(n int, fn func(i, n int)) {
 300     // 0 doesn't have a log10
 301     if n == 0 {
 302         fn(0, 0)
 303         return
 304     }
 305 
 306     sign := +1
 307     if n < 0 {
 308         n = -n
 309         sign = -1
 310     }
 311 
 312     intLog1000 := int(math.Log10(float64(n)) / 3)
 313     remBase := int(math.Pow10(3 * intLog1000))
 314 
 315     for i := 0; remBase > 0; i++ {
 316         group := (1000 * n) / remBase / 1000
 317         fn(i, sign*group)
 318         // if original number was negative, ensure only first
 319         // group gives a negative input to the callback
 320         sign = +1
 321 
 322         n %= remBase
 323         remBase /= 1000
 324     }
 325 }
 326 
 327 // sprintCommas turns the non-negative number given into a readable string,
 328 // where digits are grouped-separated by commas
 329 func sprintCommas(n int) string {
 330     var sb strings.Builder
 331     loopThousandsGroups(n, func(i, n int) {
 332         if i == 0 {
 333             var buf [4]byte
 334             sb.Write(strconv.AppendInt(buf[:0], int64(n), 10))
 335             return
 336         }
 337         sb.WriteByte(',')
 338         writePad0Sub1000Counter(&sb, uint(n))
 339     })
 340     return sb.String()
 341 }
 342 
 343 // writePad0Sub1000Counter is an alternative to fmt.Fprintf(w, `%03d`, n)
 344 func writePad0Sub1000Counter(w io.Writer, n uint) {
 345     // precondition is 0...999
 346     if n > 999 {
 347         w.Write([]byte(`???`))
 348         return
 349     }
 350 
 351     var buf [3]byte
 352     buf[0] = byte(n/100) + '0'
 353     n %= 100
 354     buf[1] = byte(n/10) + '0'
 355     buf[2] = byte(n%10) + '0'
 356     w.Write(buf[:])
 357 }
 358 
 359 // writeHex is faster than calling fmt.Fprintf(w, `%02x`, b): this
 360 // matters because it's called for every byte of input which isn't
 361 // all 0s or all 1s
 362 func writeHex(w *bufio.Writer, b byte) {
 363     const hexDigits = `0123456789abcdef`
 364     w.WriteByte(hexDigits[b>>4])
 365     w.WriteByte(hexDigits[b&0x0f])
 366 }
 367 
 368 // padding is the padding/spacing emitted across each output line, except for
 369 // the breather/ruler lines
 370 const padding = 2
 371 
 372 func writeChunk(rc rendererConfig, first, second []byte) error {
 373     w := rc.out
 374 
 375     // show a ruler every few lines to make eye-scanning easier
 376     if rc.chunks%5 == 0 && rc.chunks > 0 {
 377         writeSpaces(w, padding+rc.offsetWidth)
 378         w.Write(rc.ruler)
 379         w.WriteByte('\n')
 380     }
 381 
 382     // start each line with the byte-offset for the 1st item shown on it
 383     writeCounter(w, rc.offsetWidth, rc.offset)
 384     w.WriteByte(' ')
 385 
 386     for _, b := range first {
 387         // fmt.Fprintf(w, ` %02x`, b)
 388         //
 389         // the commented part above was a performance bottleneck, since
 390         // the slow/generic fmt.Fprintf was called for each input byte
 391         w.WriteByte(' ')
 392         writeHex(w, b)
 393     }
 394 
 395     writeASCII(w, first, second, perLine)
 396     return w.WriteByte('\n')
 397 }
 398 
 399 // writeCounter just emits a left-padded number
 400 func writeCounter(w *bufio.Writer, width int, n uint) {
 401     var buf [32]byte
 402     str := strconv.AppendUint(buf[:0], uint64(n), 10)
 403     writeSpaces(w, width-len(str))
 404     w.Write(str)
 405 }
 406 
 407 // writeSpaces bulk-emits the number of spaces given
 408 func writeSpaces(w *bufio.Writer, n int) {
 409     const spaces = `                                `
 410     for ; n > len(spaces); n -= len(spaces) {
 411         w.WriteString(spaces)
 412     }
 413     if n > 0 {
 414         w.WriteString(spaces[:n])
 415     }
 416 }
 417 
 418 // writeASCII emits the side-panel showing all ASCII runs for each line
 419 func writeASCII(w *bufio.Writer, first, second []byte, perline int) {
 420     // prev keeps track of the previous byte, so spaces are added
 421     // when bytes change from non-visible-ASCII to visible-ASCII
 422     var prev byte
 423 
 424     spaces := padding + 3*(perline-len(first))
 425 
 426     for _, b := range first {
 427         if 32 < b && b < 127 {
 428             if !(32 < prev && prev < 127) {
 429                 writeSpaces(w, spaces)
 430                 spaces = 1
 431             }
 432             w.WriteByte(b)
 433         }
 434         prev = b
 435     }
 436 
 437     for _, b := range second {
 438         if 32 < b && b < 127 {
 439             if !(32 < prev && prev < 127) {
 440                 writeSpaces(w, spaces)
 441                 spaces = 1
 442             }
 443             w.WriteByte(b)
 444         }
 445         prev = b
 446     }
 447 }