File: si.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath si.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "bytes" 37 "encoding/base64" 38 "errors" 39 "io" 40 "net" 41 "os" 42 "os/exec" 43 "path/filepath" 44 "runtime" 45 "strings" 46 ) 47 48 const info = ` 49 si [options...] [filenames/URIs...] 50 51 52 This app (Show It) shows data using your default web browser by auto-opening 53 tabs. When reading from stdin, the content-type is auto-detected: data are 54 then sent right away to the browser via localhost, using a random port among 55 the available ones. 56 57 The localhost connection is available only until all data are transferred: 58 this means refreshing your browser tab will lose your content, replacing it 59 with a server-not-found message page. 60 61 When given filenames and/or URIs, the browser tabs will point to their paths, 62 so accidentally reloading them doesn't make their content disappear, unless 63 those files are actually deleted between reloads. 64 65 Dozens of common data-formats are recognized when piped from stdin, such as 66 - HTML (web pages) 67 - PDF 68 - pictures (PNG, JPEG, SVG, WEBP, GIF) 69 - audio (AAC, MP3, FLAC, WAV, AU, MIDI) 70 - video (MP4, MOV, WEBM, MKV, AVI) 71 - JSON 72 - generic UTF-8 plain-text 73 74 Base64-encoded data URIs are auto-detected and decoded appropriately. 75 76 The options are, available both in single and double-dash versions 77 78 -h show this help message 79 -help show this help message 80 81 -from declare MIME type, instead of auto-guessing it 82 -mime declare MIME type, instead of auto-guessing it 83 84 -play autoplay media; useful only for audio/video data 85 -autoplay autoplay media; useful only for audio/video data 86 ` 87 88 func main() { 89 args := os.Args[1:] 90 var cfg config 91 92 for len(args) > 0 { 93 if args[0] == `--` { 94 args = args[1:] 95 break 96 } 97 98 if hasAnyPrefix(args[0], `-from=`, `--from=`, `-mime=`, `--mime=`) { 99 cfg.From = args[0][strings.IndexByte(args[0], '=')+1:] 100 args = args[1:] 101 continue 102 } 103 104 switch args[0] { 105 case `-h`, `--h`, `-help`, `--help`: 106 os.Stdout.WriteString(info[1:]) 107 return 108 109 case `-autoplay`, `--autoplay`, `-play`, `--play`: 110 cfg.Autoplay = true 111 args = args[1:] 112 113 case `-from`, `--from`, `-mime`, `--mime`: 114 if len(args) == 0 { 115 os.Stderr.WriteString("missing MIME-type argument\n") 116 os.Exit(1) 117 } 118 cfg.From = args[1] 119 args = args[2:] 120 } 121 } 122 123 nerr := 0 124 125 // show all filenames/URIs given by opening new browser tabs for each 126 for _, s := range args { 127 s = strings.TrimSpace(s) 128 if err := handle(s, cfg); err != nil { 129 os.Stderr.WriteString(err.Error()) 130 os.Stderr.WriteString("\n") 131 nerr++ 132 } 133 } 134 135 // serve from stdin only if no filenames were given 136 if len(args) == 0 { 137 if err := handleInput(os.Stdin, cfg); err != nil { 138 os.Stderr.WriteString(err.Error()) 139 os.Stderr.WriteString("\n") 140 nerr++ 141 } 142 } 143 144 // quit in failure if any input clearly failed to show up 145 if nerr > 0 { 146 os.Exit(1) 147 } 148 } 149 150 func hasAnyPrefix(s string, prefixes ...string) bool { 151 for _, p := range prefixes { 152 if strings.HasPrefix(s, p) { 153 return true 154 } 155 } 156 return false 157 } 158 159 // handle shows a filename/URI by operning a new browser tab for it 160 func handle(s string, cfg config) error { 161 // open a new browser window for each URI 162 if strings.HasPrefix(s, `https://`) || strings.HasPrefix(s, `http://`) { 163 return showURI(s) 164 } 165 166 // handle data-URIs 167 if strings.HasPrefix(s, `data:`) && strings.Contains(s, `;base64,`) { 168 if err := showURI(s); err != nil { 169 return err 170 } 171 return handleInput(strings.NewReader(s), cfg) 172 } 173 174 // the browser needs full paths when showing local files 175 fpath, err := filepath.Abs(s) 176 if err != nil { 177 return err 178 } 179 180 // open a new browser tab for each full-path filename 181 return showURI(`file:///` + fpath) 182 } 183 184 // showURI tries to open the file/url given using the host operating system's 185 // defaults 186 func showURI(what string) error { 187 const fph = `url.dll,FileProtocolHandler` 188 189 switch runtime.GOOS { 190 case `windows`: 191 return exec.Command(`rundll32`, fph, what).Run() 192 case `darwin`: 193 return exec.Command(`open`, what).Run() 194 default: 195 return exec.Command(`xdg-open`, what).Run() 196 } 197 } 198 199 // handleInput specifically handles stdin and data-URIs 200 func handleInput(r io.Reader, cfg config) error { 201 if cfg.From != `` { 202 return serveOnce(nil, r, serveConfig{ 203 ContentType: cfg.From, 204 ContentLength: -1, 205 Autoplay: cfg.Autoplay, 206 }) 207 } 208 209 // before starting the single-request server, try to detect the MIME type 210 // by inspecting the first bytes of the stream and matching known filetype 211 // starting patterns 212 var buf [64]byte 213 n, err := r.Read(buf[:]) 214 if err != nil && err != io.EOF { 215 return err 216 } 217 start := buf[:n] 218 219 // handle data-URI-like inputs 220 if bytes.HasPrefix(start, []byte(`data:`)) { 221 if bytes.Contains(start, []byte(`;base64,`)) { 222 return handleDataURI(start, r, cfg) 223 } 224 } 225 226 // handle regular data, trying to auto-detect its MIME type using 227 // its first few bytes 228 mime, ok := detectMIME(start) 229 if !ok { 230 mime = cfg.From 231 } 232 if mime == `` { 233 mime = `text/plain` 234 } 235 236 // remember to precede the partly-used reader with the starting bytes; 237 // give a negative/invalid filesize hint, since stream is single-use 238 return serveOnce(start, r, serveConfig{ 239 ContentType: mime, 240 ContentLength: -1, 241 Autoplay: cfg.Autoplay, 242 }) 243 } 244 245 // handleDataURI handles data-URIs for func handleInput 246 func handleDataURI(start []byte, r io.Reader, cfg config) error { 247 if !bytes.HasPrefix(start, []byte(`data:`)) { 248 return errors.New(`invalid data-URI`) 249 } 250 251 i := bytes.Index(start, []byte(`;base64,`)) 252 if i < 0 { 253 return errors.New(`invalid data-URI`) 254 } 255 256 // force browser to play wave and aiff sounds, instead of 257 // showing a useless download-file option 258 switch mime := string(start[len(`data:`):i]); mime { 259 case `audio/wav`, `audio/wave`, `audio/x-wav`, `audio/aiff`, `audio/x-aiff`: 260 before := beforeAudio 261 if cfg.Autoplay { 262 before = beforeAutoplayAudio 263 } 264 265 // surround URI-encoded audio data with a web page only having 266 // a media player in it: this is necessary for wave and aiff 267 // sounds, since web browsers may insist on a useless download 268 // option for those media types 269 r = io.MultiReader( 270 strings.NewReader(before), 271 bytes.NewReader(start), 272 r, 273 strings.NewReader(afterAudio), 274 ) 275 276 return serveOnce(nil, r, serveConfig{ 277 ContentType: `text/html; charset=UTF-8`, 278 ContentLength: -1, 279 Autoplay: cfg.Autoplay, 280 }) 281 282 case `image/bmp`, `image/x-bmp`: 283 // surround URI-encoded bitmap data with a web page only having 284 // an image element in it: this is necessary for bitmap pictures, 285 // since web browsers may insist on a useless download option for 286 // that media type 287 r = io.MultiReader( 288 strings.NewReader(beforeBitmap), 289 bytes.NewReader(start), 290 r, 291 strings.NewReader(afterBitmap), 292 ) 293 294 return serveOnce(nil, r, serveConfig{ 295 ContentType: `text/html; charset=UTF-8`, 296 ContentLength: -1, 297 Autoplay: cfg.Autoplay, 298 }) 299 300 default: 301 start = start[i+len(`;base64,`):] 302 r = io.MultiReader(bytes.NewReader(start), r) 303 dec := base64.NewDecoder(base64.URLEncoding, r) 304 305 // give a negative/invalid filesize hint, since stream is single-use 306 return serveOnce(nil, dec, serveConfig{ 307 ContentType: mime, 308 ContentLength: -1, 309 Autoplay: cfg.Autoplay, 310 }) 311 } 312 } 313 314 // config is the result of parsing all cmd-line arguments the app was given 315 type config struct { 316 // From is an optional hint for the source data format, and disables 317 // type-autodetection when it's non-empty 318 From string 319 320 // Autoplay autoplays audio/video data from stdin 321 Autoplay bool 322 } 323 324 const ( 325 fromUsage = `` + 326 `declare MIME-type, disabling type-autodetection; ` + 327 `use when MIME-type autodetection fails, or to use a ` + 328 `charset different from UTF-8` 329 330 mimeUsage = `alias for option -from` 331 playUsage = `alias for option -autoplay` 332 autoplayUsage = `autoplay; useful only when stdin has audio/video data` 333 ) 334 335 // serveConfig has all details func serveOnce needs 336 type serveConfig struct { 337 // ContentType is the MIME type of what's being served 338 ContentType string 339 340 // ContentLength is the byte-count of what's being served; negative 341 // values are ignored 342 ContentLength int 343 344 // Autoplay autoplays audio/video data from stdin 345 Autoplay bool 346 } 347 348 // makeDotless is similar to filepath.Ext, except its results never start 349 // with a dot 350 func makeDotless(s string) string { 351 i := strings.LastIndexByte(s, '.') 352 if i >= 0 { 353 return s[(i + 1):] 354 } 355 return s 356 } 357 358 // hasPrefixByte is a simpler, single-byte version of bytes.HasPrefix 359 func hasPrefixByte(b []byte, prefix byte) bool { 360 return len(b) > 0 && b[0] == prefix 361 } 362 363 // hasPrefixFold is a case-insensitive bytes.HasPrefix 364 func hasPrefixFold(s []byte, prefix []byte) bool { 365 n := len(prefix) 366 return len(s) >= n && bytes.EqualFold(s[:n], prefix) 367 } 368 369 // trimLeadingWhitespace ignores leading space-like symbols: this is useful 370 // to handle text-based data formats more flexibly 371 func trimLeadingWhitespace(b []byte) []byte { 372 for len(b) > 0 { 373 switch b[0] { 374 case ' ', '\t', '\n', '\r': 375 b = b[1:] 376 default: 377 return b 378 } 379 } 380 381 // an empty slice is all that's left, at this point 382 return nil 383 } 384 385 const ( 386 // maxbufsize is the max capacity the HTTP-protocol line-scanners are 387 // allowed to reach 388 maxbufsize = 128 * 1024 389 390 // beforeAudio starts HTML webpage with just an audio player 391 beforeAudio = `<!DOCTYPE html> 392 <html> 393 <head> 394 <meta charset="UTF-8"> 395 <link rel="icon" href="data:,"> 396 <title>wave sound</title> 397 <style> 398 body { margin: 2rem auto; width: 90vw; } 399 audio { margin: auto; width: 100%; } 400 </style> 401 </head> 402 <body> 403 <audio controls autofocus src="` 404 405 // beforeAutoplayAudio starts HTML webpage with just an audio player 406 // in autoplay mode 407 beforeAutoplayAudio = `<!DOCTYPE html> 408 <html> 409 <head> 410 <meta charset="UTF-8"> 411 <link rel="icon" href="data:,"> 412 <title>wave sound</title> 413 <style> 414 body { margin: 2rem auto; width: 90vw; } 415 audio { margin: auto; width: 100%; } 416 </style> 417 </head> 418 <body> 419 <audio controls autofocus autoplay src="` 420 421 // afterAudio ends HTML webpage with just an audio player 422 afterAudio = "\"></audio>\n</body>\n</html>\n" 423 424 // beforeBitmap starts HTML webpage with just an image 425 beforeBitmap = `<!DOCTYPE html> 426 <html> 427 <head> 428 <meta charset="UTF-8"> 429 <link rel="icon" href="data:,"> 430 <title>bitmap image</title> 431 <style> 432 body { margin: 0.5rem auto; width: 90vw; } 433 img { margin: auto; width: 100%; } 434 </style> 435 </head> 436 <body> 437 <img src="` 438 439 // afterBitmap ends HTML webpage with just an image 440 afterBitmap = "\"></img>\n</body>\n</html>\n" 441 ) 442 443 // serveOnce literally serves a single web request and no more 444 func serveOnce(start []byte, rest io.Reader, cfg serveConfig) error { 445 // pick a random port from the currently-available ones 446 srv, err := net.Listen(`tcp`, `127.0.0.1:0`) 447 if err != nil { 448 return err 449 } 450 defer srv.Close() 451 452 // open a new browser tab for that localhost port 453 err = showURI(`http://` + srv.Addr().String()) 454 if err != nil { 455 return err 456 } 457 458 // accept first connection: no need for async as the server quits after 459 // its first response 460 conn, err := srv.Accept() 461 if err != nil { 462 return err 463 } 464 defer conn.Close() 465 466 respond(conn, start, rest, cfg) 467 return nil 468 } 469 470 // respond reads/ignores all request headers, and then replies with some 471 // content given, quitting immediately after 472 func respond(conn net.Conn, start []byte, rest io.Reader, cfg serveConfig) { 473 sc := bufio.NewScanner(conn) 474 sc.Buffer(nil, maxbufsize) 475 for sc.Scan() && sc.Text() != `` { 476 // ignore all request headers 477 } 478 479 switch cfg.ContentType { 480 case `audio/wav`, `audio/wave`, `audio/x-wav`, `audio/aiff`, `audio/x-aiff`: 481 // force browser to play wave and aiff sounds, instead of showing 482 // a useless download-file option; encode audio bytes as data-URI 483 // in an intermediate buffer 484 485 writePreludeHTTP(conn, `text/html; charset=UTF-8`, -1) 486 // emit opening HTML right until <audio controls src=" 487 if cfg.Autoplay { 488 io.WriteString(conn, beforeAutoplayAudio) 489 } else { 490 io.WriteString(conn, beforeAudio) 491 } 492 // emit the data-URI 493 writeBase64(conn, cfg.ContentType, start, rest) 494 // emit closing HTML after data-URI audio 495 io.WriteString(conn, afterAudio) 496 return 497 498 case `image/bmp`, `image/x-bmp`: 499 // force browser to show bitmap pictures, instead of showing a 500 // useless download-file option; encode picture bytes as data-URI 501 // in an intermediate buffer 502 503 writePreludeHTTP(conn, `text/html; charset=UTF-8`, -1) 504 // emit opening HTML right until <img src=" 505 io.WriteString(conn, beforeBitmap) 506 // emit the data-URI 507 writeBase64(conn, cfg.ContentType, start, rest) 508 // emit closing HTML after data-URI image 509 io.WriteString(conn, afterBitmap) 510 return 511 512 default: 513 writePreludeHTTP(conn, cfg.ContentType, cfg.ContentLength) 514 // send the starting bytes used to auto-detect the content-type 515 conn.Write(start) 516 // send rest of payload at light-speed 517 io.Copy(conn, rest) 518 } 519 } 520 521 func writePreludeHTTP(conn net.Conn, contentType string, contentLength int) { 522 // respond right after the first empty line, which always follows the 523 // request's headers 524 io.WriteString(conn, "HTTP/1.1 200 OK\r\n") 525 io.WriteString(conn, `Content-Type: `) 526 io.WriteString(conn, contentType) 527 io.WriteString(conn, "\r\n") 528 if contentLength > 0 { 529 io.WriteString(conn, `Content-Length: `) 530 io.WriteString(conn, string(contentLength)) 531 io.WriteString(conn, "\r\n") 532 } 533 534 // prevent download-dialog or auto-download from the browser's part 535 io.WriteString(conn, "Content-Disposition: inline\r\n") 536 // tell browser this is the last request 537 io.WriteString(conn, "Connection: close\r\n") 538 // payload starts right after an empty line 539 io.WriteString(conn, "\r\n") 540 } 541 542 func writeBase64(conn net.Conn, mimeType string, start []byte, rest io.Reader) { 543 // send the data-URI intro 544 io.WriteString(conn, `data:`) 545 io.WriteString(conn, mimeType) 546 io.WriteString(conn, `;base64,`) 547 enc := base64.NewEncoder(base64.StdEncoding, conn) 548 // base64-encode the starting bytes used to auto-detect the input type 549 enc.Write(start) 550 // base64-encode the rest of the input 551 io.Copy(enc, rest) 552 enc.Close() 553 } 554 555 // nameToMIME tries to match a MIME type to a filename, dotted file extension, 556 // or a dot-less filetype/extension given 557 func nameToMIME(fname string) (mimeType string, ok bool) { 558 // handle dotless file types and filenames alike 559 kind, ok := type2mime[makeDotless(fname)] 560 return kind, ok 561 } 562 563 // detectMIME guesses the first appropriate MIME type from the first few 564 // data bytes given: 24 bytes are enough to detect all supported types 565 func detectMIME(b []byte) (mimeType string, ok bool) { 566 t, ok := detectType(b) 567 if ok { 568 return t, true 569 } 570 return ``, false 571 } 572 573 // detectType guesses the first appropriate file type for the data given: 574 // here the type is a a filename extension without the leading dot 575 func detectType(b []byte) (dotlessExt string, ok bool) { 576 // empty data, so there's no way to detect anything 577 if len(b) == 0 { 578 return ``, false 579 } 580 581 // check for plain-text web-document formats case-insensitively 582 kind, ok := checkDoc(b) 583 if ok { 584 return kind, true 585 } 586 587 // check data formats which allow any byte at the start 588 kind, ok = checkSpecial(b) 589 if ok { 590 return kind, true 591 } 592 593 // check all other supported data formats 594 headers := hdrDispatch[b[0]] 595 for _, t := range headers { 596 if hasPrefixPattern(b[1:], t.Header[1:], cba) { 597 return t.Type, true 598 } 599 } 600 601 // unrecognized data format 602 return ``, false 603 } 604 605 // checkDoc tries to guess if the bytes given are the start of HTML, SVG, 606 // XML, or JSON data 607 func checkDoc(b []byte) (kind string, ok bool) { 608 // ignore leading whitespaces 609 b = trimLeadingWhitespace(b) 610 611 // can't detect anything with empty data 612 if len(b) == 0 { 613 return ``, false 614 } 615 616 // handle XHTML documents which don't start with a doctype declaration 617 if bytes.Contains(b, doctypeHTML) { 618 return html, true 619 } 620 621 // handle HTML/SVG/XML documents 622 if hasPrefixByte(b, '<') { 623 if hasPrefixFold(b, []byte{'<', '?', 'x', 'm', 'l'}) { 624 if bytes.Contains(b, []byte{'<', 's', 'v', 'g'}) { 625 return svg, true 626 } 627 return xml, true 628 } 629 630 headers := hdrDispatch['<'] 631 for _, v := range headers { 632 if hasPrefixFold(b, v.Header) { 633 return v.Type, true 634 } 635 } 636 return ``, false 637 } 638 639 // handle JSON with top-level arrays 640 if hasPrefixByte(b, '[') { 641 // match [", or [[, or [{, ignoring spaces between 642 b = trimLeadingWhitespace(b[1:]) 643 if len(b) > 0 { 644 switch b[0] { 645 case '"', '[', '{': 646 return json, true 647 } 648 } 649 return ``, false 650 } 651 652 // handle JSON with top-level objects 653 if hasPrefixByte(b, '{') { 654 // match {", ignoring spaces between: after {, the only valid syntax 655 // which can follow is the opening quote for the expected object-key 656 b = trimLeadingWhitespace(b[1:]) 657 if hasPrefixByte(b, '"') { 658 return json, true 659 } 660 return ``, false 661 } 662 663 // checking for a quoted string, any of the JSON keywords, or even a 664 // number seems too ambiguous to declare the data valid JSON 665 666 // no web-document format detected 667 return ``, false 668 } 669 670 // checkSpecial handles special file-format headers, which should be checked 671 // before the normal file-type headers, since the first-byte dispatch algo 672 // doesn't work for these 673 func checkSpecial(b []byte) (kind string, ok bool) { 674 if len(b) >= 8 && bytes.Index(b, []byte{'f', 't', 'y', 'p'}) == 4 { 675 for _, t := range specialHeaders { 676 if hasPrefixPattern(b[4:], t.Header[4:], cba) { 677 return t.Type, true 678 } 679 } 680 } 681 return ``, false 682 } 683 684 // hasPrefixPattern works like bytes.HasPrefix, except it allows for a special 685 // value to signal any byte is allowed on specific spots 686 func hasPrefixPattern(what []byte, pat []byte, wildcard byte) bool { 687 // if the data are shorter than the pattern to match, there's no match 688 if len(what) < len(pat) { 689 return false 690 } 691 692 // use a slice which ensures the pattern length is never exceeded 693 what = what[:len(pat)] 694 695 for i, x := range what { 696 y := pat[i] 697 if x != y && y != wildcard { 698 return false 699 } 700 } 701 return true 702 } 703 704 // all the MIME types used/recognized in this package 705 const ( 706 aiff = `audio/aiff` 707 au = `audio/basic` 708 avi = `video/avi` 709 avif = `image/avif` 710 bmp = `image/x-bmp` 711 caf = `audio/x-caf` 712 cur = `image/vnd.microsoft.icon` 713 css = `text/css` 714 csv = `text/csv` 715 djvu = `image/x-djvu` 716 elf = `application/x-elf` 717 exe = `application/vnd.microsoft.portable-executable` 718 flac = `audio/x-flac` 719 gif = `image/gif` 720 gz = `application/gzip` 721 heic = `image/heic` 722 htm = `text/html` 723 html = `text/html` 724 ico = `image/x-icon` 725 iso = `application/octet-stream` 726 jpg = `image/jpeg` 727 jpeg = `image/jpeg` 728 js = `application/javascript` 729 json = `application/json` 730 m4a = `audio/aac` 731 m4v = `video/x-m4v` 732 mid = `audio/midi` 733 mov = `video/quicktime` 734 mp4 = `video/mp4` 735 mp3 = `audio/mpeg` 736 mpg = `video/mpeg` 737 ogg = `audio/ogg` 738 opus = `audio/opus` 739 pdf = `application/pdf` 740 png = `image/png` 741 ps = `application/postscript` 742 psd = `image/vnd.adobe.photoshop` 743 rtf = `application/rtf` 744 sqlite3 = `application/x-sqlite3` 745 svg = `image/svg+xml` 746 text = `text/plain` 747 tiff = `image/tiff` 748 tsv = `text/tsv` 749 wasm = `application/wasm` 750 wav = `audio/x-wav` 751 webp = `image/webp` 752 webm = `video/webm` 753 xml = `application/xml` 754 zip = `application/zip` 755 zst = `application/zstd` 756 ) 757 758 // type2mime turns dotless format-names into MIME types 759 var type2mime = map[string]string{ 760 `aiff`: aiff, 761 `wav`: wav, 762 `avi`: avi, 763 `jpg`: jpg, 764 `jpeg`: jpeg, 765 `m4a`: m4a, 766 `mp4`: mp4, 767 `m4v`: m4v, 768 `mov`: mov, 769 `png`: png, 770 `avif`: avif, 771 `webp`: webp, 772 `gif`: gif, 773 `tiff`: tiff, 774 `psd`: psd, 775 `flac`: flac, 776 `webm`: webm, 777 `mpg`: mpg, 778 `zip`: zip, 779 `gz`: gz, 780 `zst`: zst, 781 `mp3`: mp3, 782 `opus`: opus, 783 `bmp`: bmp, 784 `mid`: mid, 785 `ogg`: ogg, 786 `html`: html, 787 `htm`: htm, 788 `svg`: svg, 789 `xml`: xml, 790 `rtf`: rtf, 791 `pdf`: pdf, 792 `ps`: ps, 793 `au`: au, 794 `ico`: ico, 795 `cur`: cur, 796 `caf`: caf, 797 `heic`: heic, 798 `sqlite3`: sqlite3, 799 `elf`: elf, 800 `exe`: exe, 801 `wasm`: wasm, 802 `iso`: iso, 803 `txt`: text, 804 `css`: css, 805 `csv`: csv, 806 `tsv`: tsv, 807 `js`: js, 808 `json`: json, 809 `geojson`: json, 810 } 811 812 // formatDescriptor ties a file-header pattern to its data-format type 813 type formatDescriptor struct { 814 Header []byte 815 Type string 816 } 817 818 // can be anything: ensure this value differs from all other literal bytes 819 // in the generic-headers table: failing that, its value could cause subtle 820 // type-misdetection bugs 821 const cba = 0xFD // 253, which is > 127, the highest-valued ascii symbol 822 823 // dash-streamed m4a format 824 var m4aDash = []byte{ 825 cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 826 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', 827 } 828 829 // format markers with leading wildcards, which should be checked before the 830 // normal ones: this is to prevent mismatches with the latter types, even 831 // though you can make probabilistic arguments which suggest these mismatches 832 // should be very unlikely in practice 833 var specialHeaders = []formatDescriptor{ 834 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a}, 835 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a}, 836 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4}, 837 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4}, 838 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v}, 839 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov}, 840 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic}, 841 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif}, 842 {m4aDash, m4a}, 843 } 844 845 // sqlite3 database format 846 var sqlite3db = []byte{ 847 'S', 'Q', 'L', 'i', 't', 'e', ' ', 848 'f', 'o', 'r', 'm', 'a', 't', ' ', '3', 849 000, 850 } 851 852 // windows-variant bitmap file-header, which is followed by a byte-counter for 853 // the 40-byte infoheader which follows that 854 var winbmp = []byte{ 855 'B', 'M', cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, 40, 856 } 857 858 // deja-vu document format 859 var djv = []byte{ 860 'A', 'T', '&', 'T', 'F', 'O', 'R', 'M', cba, cba, cba, cba, 'D', 'J', 'V', 861 } 862 863 var doctypeHTML = []byte{ 864 '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E', ' ', 'h', 't', 'm', 'l', 865 } 866 867 // hdrDispatch groups format-description-groups by their first byte, thus 868 // shortening total lookups for some data header: notice how the `ftyp` data 869 // formats aren't handled here, since these can start with any byte, instead 870 // of the literal value of the any-byte markers they use 871 var hdrDispatch = [256][]formatDescriptor{ 872 { 873 {[]byte{000, 000, 001, 0xBA}, mpg}, 874 {[]byte{000, 000, 001, 0xB3}, mpg}, 875 {[]byte{000, 000, 001, 000}, ico}, 876 {[]byte{000, 000, 002, 000}, cur}, 877 {[]byte{000, 'a', 's', 'm'}, wasm}, 878 }, // 0 879 nil, // 1 880 nil, // 2 881 nil, // 3 882 nil, // 4 883 nil, // 5 884 nil, // 6 885 nil, // 7 886 nil, // 8 887 nil, // 9 888 nil, // 10 889 nil, // 11 890 nil, // 12 891 nil, // 13 892 nil, // 14 893 nil, // 15 894 nil, // 16 895 nil, // 17 896 nil, // 18 897 nil, // 19 898 nil, // 20 899 nil, // 21 900 nil, // 22 901 nil, // 23 902 nil, // 24 903 nil, // 25 904 { 905 {[]byte{0x1A, 0x45, 0xDF, 0xA3}, webm}, 906 }, // 26 907 nil, // 27 908 nil, // 28 909 nil, // 29 910 nil, // 30 911 { 912 // {[]byte{0x1F, 0x8B, 0x08, 0x08}, gz}, 913 {[]byte{0x1F, 0x8B, 0x08}, gz}, 914 }, // 31 915 nil, // 32 916 nil, // 33 ! 917 nil, // 34 " 918 { 919 {[]byte{'#', '!', ' '}, text}, 920 {[]byte{'#', '!', '/'}, text}, 921 }, // 35 # 922 nil, // 36 $ 923 { 924 {[]byte{'%', 'P', 'D', 'F'}, pdf}, 925 {[]byte{'%', '!', 'P', 'S'}, ps}, 926 }, // 37 % 927 nil, // 38 & 928 nil, // 39 ' 929 { 930 {[]byte{0x28, 0xB5, 0x2F, 0xFD}, zst}, 931 }, // 40 ( 932 nil, // 41 ) 933 nil, // 42 * 934 nil, // 43 + 935 nil, // 44 , 936 nil, // 45 - 937 { 938 {[]byte{'.', 's', 'n', 'd'}, au}, 939 }, // 46 . 940 nil, // 47 / 941 nil, // 48 0 942 nil, // 49 1 943 nil, // 50 2 944 nil, // 51 3 945 nil, // 52 4 946 nil, // 53 5 947 nil, // 54 6 948 nil, // 55 7 949 { 950 {[]byte{'8', 'B', 'P', 'S'}, psd}, 951 }, // 56 8 952 nil, // 57 9 953 nil, // 58 : 954 nil, // 59 ; 955 { 956 // func checkDoc is better for these, since it's case-insensitive 957 {doctypeHTML, html}, 958 {[]byte{'<', 's', 'v', 'g'}, svg}, 959 {[]byte{'<', 'h', 't', 'm', 'l', '>'}, html}, 960 {[]byte{'<', 'h', 'e', 'a', 'd', '>'}, html}, 961 {[]byte{'<', 'b', 'o', 'd', 'y', '>'}, html}, 962 {[]byte{'<', '?', 'x', 'm', 'l'}, xml}, 963 }, // 60 < 964 nil, // 61 = 965 nil, // 62 > 966 nil, // 63 ? 967 nil, // 64 @ 968 { 969 {djv, djvu}, 970 }, // 65 A 971 { 972 {winbmp, bmp}, 973 }, // 66 B 974 nil, // 67 C 975 nil, // 68 D 976 nil, // 69 E 977 { 978 {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff}, 979 {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff}, 980 }, // 70 F 981 { 982 {[]byte{'G', 'I', 'F', '8', '7', 'a'}, gif}, 983 {[]byte{'G', 'I', 'F', '8', '9', 'a'}, gif}, 984 }, // 71 G 985 nil, // 72 H 986 { 987 {[]byte{'I', 'D', '3', 2}, mp3}, // ID3-format metadata 988 {[]byte{'I', 'D', '3', 3}, mp3}, // ID3-format metadata 989 {[]byte{'I', 'D', '3', 4}, mp3}, // ID3-format metadata 990 {[]byte{'I', 'I', '*', 000}, tiff}, 991 }, // 73 I 992 nil, // 74 J 993 nil, // 75 K 994 nil, // 76 L 995 { 996 {[]byte{'M', 'M', 000, '*'}, tiff}, 997 {[]byte{'M', 'T', 'h', 'd'}, mid}, 998 {[]byte{'M', 'Z', cba, 000, cba, 000}, exe}, 999 // {[]byte{'M', 'Z', 0x90, 000, 003, 000}, exe}, 1000 // {[]byte{'M', 'Z', 0x78, 000, 001, 000}, exe}, 1001 // {[]byte{'M', 'Z', 'P', 000, 002, 000}, exe}, 1002 }, // 77 M 1003 nil, // 78 N 1004 { 1005 {[]byte{'O', 'g', 'g', 'S'}, ogg}, 1006 }, // 79 O 1007 { 1008 {[]byte{'P', 'K', 003, 004}, zip}, 1009 }, // 80 P 1010 nil, // 81 Q 1011 { 1012 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp}, 1013 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav}, 1014 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi}, 1015 }, // 82 R 1016 { 1017 {sqlite3db, sqlite3}, 1018 }, // 83 S 1019 nil, // 84 T 1020 nil, // 85 U 1021 nil, // 86 V 1022 nil, // 87 W 1023 nil, // 88 X 1024 nil, // 89 Y 1025 nil, // 90 Z 1026 nil, // 91 [ 1027 nil, // 92 \ 1028 nil, // 93 ] 1029 nil, // 94 ^ 1030 nil, // 95 _ 1031 nil, // 96 ` 1032 nil, // 97 a 1033 nil, // 98 b 1034 { 1035 {[]byte{'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf}, 1036 }, // 99 c 1037 nil, // 100 d 1038 nil, // 101 e 1039 { 1040 {[]byte{'f', 'L', 'a', 'C'}, flac}, 1041 }, // 102 f 1042 nil, // 103 g 1043 nil, // 104 h 1044 nil, // 105 i 1045 nil, // 106 j 1046 nil, // 107 k 1047 nil, // 108 l 1048 nil, // 109 m 1049 nil, // 110 n 1050 nil, // 111 o 1051 nil, // 112 p 1052 nil, // 113 q 1053 nil, // 114 r 1054 nil, // 115 s 1055 nil, // 116 t 1056 nil, // 117 u 1057 nil, // 118 v 1058 nil, // 119 w 1059 nil, // 120 x 1060 nil, // 121 y 1061 nil, // 122 z 1062 { 1063 {[]byte{'{', '\\', 'r', 't', 'f'}, rtf}, 1064 }, // 123 { 1065 nil, // 124 | 1066 nil, // 125 } 1067 nil, // 126 1068 { 1069 {[]byte{127, 'E', 'L', 'F'}, elf}, 1070 }, // 127 1071 nil, // 128 1072 nil, // 129 1073 nil, // 130 1074 nil, // 131 1075 nil, // 132 1076 nil, // 133 1077 nil, // 134 1078 nil, // 135 1079 nil, // 136 1080 { 1081 {[]byte{0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png}, 1082 }, // 137 1083 nil, // 138 1084 nil, // 139 1085 nil, // 140 1086 nil, // 141 1087 nil, // 142 1088 nil, // 143 1089 nil, // 144 1090 nil, // 145 1091 nil, // 146 1092 nil, // 147 1093 nil, // 148 1094 nil, // 149 1095 nil, // 150 1096 nil, // 151 1097 nil, // 152 1098 nil, // 153 1099 nil, // 154 1100 nil, // 155 1101 nil, // 156 1102 nil, // 157 1103 nil, // 158 1104 nil, // 159 1105 nil, // 160 1106 nil, // 161 1107 nil, // 162 1108 nil, // 163 1109 nil, // 164 1110 nil, // 165 1111 nil, // 166 1112 nil, // 167 1113 nil, // 168 1114 nil, // 169 1115 nil, // 170 1116 nil, // 171 1117 nil, // 172 1118 nil, // 173 1119 nil, // 174 1120 nil, // 175 1121 nil, // 176 1122 nil, // 177 1123 nil, // 178 1124 nil, // 179 1125 nil, // 180 1126 nil, // 181 1127 nil, // 182 1128 nil, // 183 1129 nil, // 184 1130 nil, // 185 1131 nil, // 186 1132 nil, // 187 1133 nil, // 188 1134 nil, // 189 1135 nil, // 190 1136 nil, // 191 1137 nil, // 192 1138 nil, // 193 1139 nil, // 194 1140 nil, // 195 1141 nil, // 196 1142 nil, // 197 1143 nil, // 198 1144 nil, // 199 1145 nil, // 200 1146 nil, // 201 1147 nil, // 202 1148 nil, // 203 1149 nil, // 204 1150 nil, // 205 1151 nil, // 206 1152 nil, // 207 1153 nil, // 208 1154 nil, // 209 1155 nil, // 210 1156 nil, // 211 1157 nil, // 212 1158 nil, // 213 1159 nil, // 214 1160 nil, // 215 1161 nil, // 216 1162 nil, // 217 1163 nil, // 218 1164 nil, // 219 1165 nil, // 220 1166 nil, // 221 1167 nil, // 222 1168 nil, // 223 1169 nil, // 224 1170 nil, // 225 1171 nil, // 226 1172 nil, // 227 1173 nil, // 228 1174 nil, // 229 1175 nil, // 230 1176 nil, // 231 1177 nil, // 232 1178 nil, // 233 1179 nil, // 234 1180 nil, // 235 1181 nil, // 236 1182 nil, // 237 1183 nil, // 238 1184 nil, // 239 1185 nil, // 240 1186 nil, // 241 1187 nil, // 242 1188 nil, // 243 1189 nil, // 244 1190 nil, // 245 1191 nil, // 246 1192 nil, // 247 1193 nil, // 248 1194 nil, // 249 1195 nil, // 250 1196 nil, // 251 1197 nil, // 252 1198 nil, // 253 1199 nil, // 254 1200 { 1201 {[]byte{0xFF, 0xD8, 0xFF}, jpg}, 1202 {[]byte{0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3}, 1203 {[]byte{0xFF, 0xFB}, mp3}, 1204 }, // 255 1205 }