File: si.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Single-file source-code for si, with the unit-tests omitted. 27 28 To compile a smaller-sized command-line app, you can use the `go` command as 29 follows: 30 31 go build -ldflags "-s -w" -trimpath si.go 32 */ 33 34 package main 35 36 import ( 37 "bufio" 38 "bytes" 39 "encoding/base64" 40 "errors" 41 "flag" 42 "fmt" 43 "io" 44 "net" 45 "os" 46 "os/exec" 47 "path/filepath" 48 "runtime" 49 "strings" 50 ) 51 52 var usage = ` 53 si [filenames/URIs...] 54 55 56 This app (Show It) shows data using your default web browser by auto-opening 57 tabs. When reading from stdin, the content-type is auto-detected: data are 58 then sent right away to the browser via localhost, using a random port among 59 the available ones. 60 61 The localhost connection is available only until all data are transferred: 62 this means refreshing your browser tab will lose your content, replacing it 63 with a server-not-found message page. 64 65 When given filenames and/or URIs, the browser tabs will point their paths, so 66 accidentally reloading them doesn't make them disappear, unless those files 67 are actually deleted between reloads. 68 69 Dozens of common data-formats are recognized when piped from stdin, such as 70 - HTML (web pages) 71 - PDF 72 - pictures (PNG, JPEG, SVG, WEBP, GIF) 73 - audio (AAC, MP3, FLAC, WAV, AU, MIDI) 74 - video (MP4, MOV, WEBM, MKV, AVI) 75 - JSON 76 - generic UTF-8 plain-text 77 78 Base64-encoded data URIs are auto-detected and decoded appropriately. 79 ` 80 81 func main() { 82 cfg := parseFlags(usage[1:]) 83 narg := flag.NArg() 84 85 // show all filenames/URIs given by opening new browser tabs for each 86 nerr := 0 87 for i := 0; i < narg; i++ { 88 s := strings.TrimSpace(flag.Arg(i)) 89 if err := handle(s, cfg); err != nil { 90 fmt.Fprintln(os.Stderr, err.Error()) 91 nerr++ 92 } 93 } 94 95 // quit in failure if any input clearly failed to show up 96 if nerr > 0 { 97 os.Exit(1) 98 } 99 100 // serve from stdin only if no filenames were given 101 if narg == 0 { 102 if err := handleInput(os.Stdin, cfg); err != nil { 103 fmt.Fprintln(os.Stderr, err.Error()) 104 os.Exit(1) 105 } 106 } 107 } 108 109 // handle shows a filename/URI by operning a new browser tab for it 110 func handle(s string, cfg config) error { 111 // open a new browser window for each URI 112 if strings.HasPrefix(s, `https://`) || strings.HasPrefix(s, `http://`) { 113 return showURI(s) 114 } 115 116 // handle data-URIs 117 if strings.HasPrefix(s, `data:`) && strings.Contains(s, `;base64,`) { 118 if err := showURI(s); err != nil { 119 return err 120 } 121 return handleInput(strings.NewReader(s), cfg) 122 } 123 124 // the browser needs full paths when showing local files 125 fpath, err := filepath.Abs(s) 126 if err != nil { 127 return err 128 } 129 130 // open a new browser tab for each full-path filename 131 return showURI(fmt.Sprintf(`file:///%s`, fpath)) 132 } 133 134 // showURI tries to open the file/url given using the host operating system's 135 // defaults 136 func showURI(what string) error { 137 const fph = `url.dll,FileProtocolHandler` 138 139 switch runtime.GOOS { 140 case `windows`: 141 return exec.Command(`rundll32`, fph, what).Run() 142 case `darwin`: 143 return exec.Command(`open`, what).Run() 144 default: 145 return exec.Command(`xdg-open`, what).Run() 146 } 147 } 148 149 // handleInput specifically handles stdin and data-URIs 150 func handleInput(r io.Reader, cfg config) error { 151 if cfg.From != `` { 152 return serveOnce(nil, r, serveConfig{ 153 ContentType: cfg.From, 154 ContentLength: -1, 155 Autoplay: cfg.Autoplay, 156 }) 157 } 158 159 // before starting the single-request server, try to detect the MIME type 160 // by inspecting the first bytes of the stream and matching known filetype 161 // starting patterns 162 var buf [64]byte 163 n, err := r.Read(buf[:]) 164 if err != nil && err != io.EOF { 165 return err 166 } 167 start := buf[:n] 168 169 // handle data-URI-like inputs 170 if bytes.HasPrefix(start, []byte(`data:`)) { 171 if bytes.Contains(start, []byte(`;base64,`)) { 172 return handleDataURI(start, r, cfg) 173 } 174 } 175 176 // handle regular data, trying to auto-detect its MIME type using 177 // its first few bytes 178 mime, ok := detectMIME(start) 179 if !ok { 180 mime = cfg.From 181 } 182 if mime == `` { 183 mime = `text/plain` 184 } 185 186 // remember to precede the partly-used reader with the starting bytes; 187 // give a negative/invalid filesize hint, since stream is single-use 188 return serveOnce(start, r, serveConfig{ 189 ContentType: mime, 190 ContentLength: -1, 191 Autoplay: cfg.Autoplay, 192 }) 193 } 194 195 // handleDataURI handles data-URIs for func handleInput 196 func handleDataURI(start []byte, r io.Reader, cfg config) error { 197 if !bytes.HasPrefix(start, []byte(`data:`)) { 198 return errors.New(`invalid data-URI`) 199 } 200 201 i := bytes.Index(start, []byte(`;base64,`)) 202 if i < 0 { 203 return errors.New(`invalid data-URI`) 204 } 205 206 // force browser to play wave and aiff sounds, instead of 207 // showing a useless download-file option 208 switch mime := string(start[len(`data:`):i]); mime { 209 case `audio/wav`, `audio/wave`, `audio/x-wav`, `audio/aiff`, `audio/x-aiff`: 210 before := beforeAudio 211 if cfg.Autoplay { 212 before = beforeAutoplayAudio 213 } 214 215 // surround URI-encoded audio data with a web page only having 216 // a media player in it: this is necessary for wave and aiff 217 // sounds, since web browsers may insist on a useless download 218 // option for those media types 219 r = io.MultiReader( 220 strings.NewReader(before), 221 bytes.NewReader(start), 222 r, 223 strings.NewReader(afterAudio), 224 ) 225 226 return serveOnce(nil, r, serveConfig{ 227 ContentType: `text/html; charset=UTF-8`, 228 ContentLength: -1, 229 Autoplay: cfg.Autoplay, 230 }) 231 232 case `image/bmp`, `audio/x-bmp`: 233 // surround URI-encoded bitmap data with a web page only having 234 // an image element in it: this is necessary for bitmap pictures, 235 // since web browsers may insist on a useless download option for 236 // that media type 237 r = io.MultiReader( 238 strings.NewReader(beforeBitmap), 239 bytes.NewReader(start), 240 r, 241 strings.NewReader(afterBitmap), 242 ) 243 244 return serveOnce(nil, r, serveConfig{ 245 ContentType: `text/html; charset=UTF-8`, 246 ContentLength: -1, 247 Autoplay: cfg.Autoplay, 248 }) 249 250 default: 251 start = start[i+len(`;base64,`):] 252 r = io.MultiReader(bytes.NewReader(start), r) 253 dec := base64.NewDecoder(base64.URLEncoding, r) 254 255 // give a negative/invalid filesize hint, since stream is single-use 256 return serveOnce(nil, dec, serveConfig{ 257 ContentType: mime, 258 ContentLength: -1, 259 Autoplay: cfg.Autoplay, 260 }) 261 } 262 } 263 264 // config is the result of parsing all cmd-line arguments the app was given 265 type config struct { 266 // From is an optional hint for the source data format, and disables 267 // type-autodetection when it's non-empty 268 From string 269 270 // Autoplay autoplays audio/video data from stdin 271 Autoplay bool 272 } 273 274 const ( 275 fromUsage = `` + 276 `declare MIME-type, disabling type-autodetection; ` + 277 `use when MIME-type autodetection fails, or to use a ` + 278 `charset different from UTF-8` 279 280 mimeUsage = `alias for option -from` 281 playUsage = `alias for option -autoplay` 282 autoplayUsage = `autoplay; useful only when stdin has audio/video data` 283 ) 284 285 // parseFlags is the constructor for type config 286 func parseFlags(usage string) config { 287 flag.Usage = func() { 288 fmt.Fprintf(flag.CommandLine.Output(), "%s\n\nOptions\n\n", usage) 289 flag.PrintDefaults() 290 } 291 292 var cfg config 293 flag.StringVar(&cfg.From, `from`, cfg.From, fromUsage) 294 flag.StringVar(&cfg.From, `mime`, cfg.From, mimeUsage) 295 flag.BoolVar(&cfg.Autoplay, `play`, cfg.Autoplay, playUsage) 296 flag.BoolVar(&cfg.Autoplay, `autoplay`, cfg.Autoplay, autoplayUsage) 297 flag.Parse() 298 299 cfg.From = strings.ToLower(strings.TrimSpace(cfg.From)) 300 if mime, ok := nameToMIME(cfg.From); ok { 301 cfg.From = mime 302 } 303 return cfg 304 } 305 306 // serveConfig has all details func serveOnce needs 307 type serveConfig struct { 308 // ContentType is the MIME type of what's being served 309 ContentType string 310 311 // ContentLength is the byte-count of what's being served; negative 312 // values are ignored 313 ContentLength int 314 315 // Autoplay autoplays audio/video data from stdin 316 Autoplay bool 317 } 318 319 // makeDotless is similar to filepath.Ext, except its results never start 320 // with a dot 321 func makeDotless(s string) string { 322 i := strings.LastIndexByte(s, '.') 323 if i >= 0 { 324 return s[(i + 1):] 325 } 326 return s 327 } 328 329 // hasPrefixByte is a simpler, single-byte version of bytes.HasPrefix 330 func hasPrefixByte(b []byte, prefix byte) bool { 331 return len(b) > 0 && b[0] == prefix 332 } 333 334 // hasPrefixFold is a case-insensitive bytes.HasPrefix 335 func hasPrefixFold(s []byte, prefix []byte) bool { 336 n := len(prefix) 337 return len(s) >= n && bytes.EqualFold(s[:n], prefix) 338 } 339 340 // trimLeadingWhitespace ignores leading space-like symbols: this is useful 341 // to handle text-based data formats more flexibly 342 func trimLeadingWhitespace(b []byte) []byte { 343 for len(b) > 0 { 344 switch b[0] { 345 case ' ', '\t', '\n', '\r': 346 b = b[1:] 347 default: 348 return b 349 } 350 } 351 352 // an empty slice is all that's left, at this point 353 return nil 354 } 355 356 const ( 357 // maxbufsize is the max capacity the HTTP-protocol line-scanners are 358 // allowed to reach 359 maxbufsize = 128 * 1024 360 361 // beforeAudio starts HTML webpage with just an audio player 362 beforeAudio = `<!DOCTYPE html> 363 <html> 364 <head> 365 <meta charset="UTF-8"> 366 <link rel="icon" href="data:,"> 367 <title>wave sound</title> 368 <style> 369 body { margin: 2rem auto; width: 90vw; } 370 audio { margin: auto; width: 100%; } 371 </style> 372 </head> 373 <body> 374 <audio controls autofocus src="` 375 376 // beforeAutoplayAudio starts HTML webpage with just an audio player 377 // in autoplay mode 378 beforeAutoplayAudio = `<!DOCTYPE html> 379 <html> 380 <head> 381 <meta charset="UTF-8"> 382 <link rel="icon" href="data:,"> 383 <title>wave sound</title> 384 <style> 385 body { margin: 2rem auto; width: 90vw; } 386 audio { margin: auto; width: 100%; } 387 </style> 388 </head> 389 <body> 390 <audio controls autofocus autoplay src="` 391 392 // afterAudio ends HTML webpage with just an audio player 393 afterAudio = "\"></audio>\n</body>\n</html>\n" 394 395 // beforeBitmap starts HTML webpage with just an image 396 beforeBitmap = `<!DOCTYPE html> 397 <html> 398 <head> 399 <meta charset="UTF-8"> 400 <link rel="icon" href="data:,"> 401 <title>bitmap image</title> 402 <style> 403 body { margin: 0.5rem auto; width: 90vw; } 404 img { margin: auto; width: 100%; } 405 </style> 406 </head> 407 <body> 408 <img src="` 409 410 // afterBitmap ends HTML webpage with just an image 411 afterBitmap = "\"></img>\n</body>\n</html>\n" 412 ) 413 414 // serveOnce literally serves a single web request and no more 415 func serveOnce(start []byte, rest io.Reader, cfg serveConfig) error { 416 // pick a random port from the currently-available ones 417 srv, err := net.Listen(`tcp`, `127.0.0.1:0`) 418 if err != nil { 419 return err 420 } 421 defer srv.Close() 422 423 // open a new browser tab for that localhost port 424 err = showURI(fmt.Sprintf(`http://%s`, srv.Addr().String())) 425 if err != nil { 426 return err 427 } 428 429 // accept first connection: no need for async as the server quits after 430 // its first response 431 conn, err := srv.Accept() 432 if err != nil { 433 return err 434 } 435 defer conn.Close() 436 437 respond(conn, start, rest, cfg) 438 return nil 439 } 440 441 // respond reads/ignores all request headers, and then replies with some 442 // content given, quitting immediately after 443 func respond(conn net.Conn, start []byte, rest io.Reader, cfg serveConfig) { 444 sc := bufio.NewScanner(conn) 445 sc.Buffer(nil, maxbufsize) 446 for sc.Scan() && sc.Text() != `` { 447 // ignore all request headers 448 } 449 450 switch cfg.ContentType { 451 case `audio/wav`, `audio/wave`, `audio/x-wav`, `audio/aiff`, `audio/x-aiff`: 452 // force browser to play wave and aiff sounds, instead of showing 453 // a useless download-file option; encode audio bytes as data-URI 454 // in an intermediate buffer 455 456 writePreludeHTTP(conn, `text/html; charset=UTF-8`, -1) 457 // emit opening HTML right until <audio controls src=" 458 if cfg.Autoplay { 459 fmt.Fprint(conn, beforeAutoplayAudio) 460 } else { 461 fmt.Fprint(conn, beforeAudio) 462 } 463 // emit the data-URI 464 writeBase64(conn, cfg.ContentType, start, rest) 465 // emit closing HTML after data-URI audio 466 fmt.Fprint(conn, afterAudio) 467 return 468 469 case `image/bmp`, `image/x-bmp`: 470 // force browser to show bitmap pictures, instead of showing a 471 // useless download-file option; encode picture bytes as data-URI 472 // in an intermediate buffer 473 474 writePreludeHTTP(conn, `text/html; charset=UTF-8`, -1) 475 // emit opening HTML right until <img src=" 476 fmt.Fprint(conn, beforeBitmap) 477 // emit the data-URI 478 writeBase64(conn, cfg.ContentType, start, rest) 479 // emit closing HTML after data-URI image 480 fmt.Fprint(conn, afterBitmap) 481 return 482 483 default: 484 writePreludeHTTP(conn, cfg.ContentType, cfg.ContentLength) 485 // send the starting bytes used to auto-detect the content-type 486 conn.Write(start) 487 // send rest of payload at light-speed 488 io.Copy(conn, rest) 489 } 490 } 491 492 func writePreludeHTTP(conn net.Conn, contentType string, contentLength int) { 493 // respond right after the first empty line, which always follows the 494 // request's headers 495 fmt.Fprint(conn, "HTTP/1.1 200 OK\r\n") 496 fmt.Fprintf(conn, "Content-Type: %s\r\n", contentType) 497 if contentLength > 0 { 498 fmt.Fprintf(conn, "Content-Length: %d\r\n", contentLength) 499 } 500 501 // prevent download-dialog or auto-download from the browser's part 502 fmt.Fprintf(conn, "Content-Disposition: inline\r\n") 503 // tell browser this is the last request 504 fmt.Fprint(conn, "Connection: close\r\n") 505 // payload starts right after an empty line 506 fmt.Fprint(conn, "\r\n") 507 } 508 509 func writeBase64(conn net.Conn, mimeType string, start []byte, rest io.Reader) { 510 // send the data-URI intro 511 fmt.Fprintf(conn, `data:%s;base64,`, mimeType) 512 enc := base64.NewEncoder(base64.StdEncoding, conn) 513 // base64-encode the starting bytes used to auto-detect the input type 514 enc.Write(start) 515 // base64-encode the rest of the input 516 io.Copy(enc, rest) 517 enc.Close() 518 } 519 520 // nameToMIME tries to match a MIME type to a filename, dotted file extension, 521 // or a dot-less filetype/extension given 522 func nameToMIME(fname string) (mimeType string, ok bool) { 523 // handle dotless file types and filenames alike 524 kind, ok := type2mime[makeDotless(fname)] 525 return kind, ok 526 } 527 528 // detectMIME guesses the first appropriate MIME type from the first few 529 // data bytes given: 24 bytes are enough to detect all supported types 530 func detectMIME(b []byte) (mimeType string, ok bool) { 531 t, ok := detectType(b) 532 if ok { 533 return t, true 534 } 535 return ``, false 536 } 537 538 // detectType guesses the first appropriate file type for the data given: 539 // here the type is a a filename extension without the leading dot 540 func detectType(b []byte) (dotlessExt string, ok bool) { 541 // empty data, so there's no way to detect anything 542 if len(b) == 0 { 543 return ``, false 544 } 545 546 // check for plain-text web-document formats case-insensitively 547 kind, ok := checkDoc(b) 548 if ok { 549 return kind, true 550 } 551 552 // check data formats which allow any byte at the start 553 kind, ok = checkSpecial(b) 554 if ok { 555 return kind, true 556 } 557 558 // check all other supported data formats 559 headers := hdrDispatch[b[0]] 560 for _, t := range headers { 561 if hasPrefixPattern(b[1:], t.Header[1:], cba) { 562 return t.Type, true 563 } 564 } 565 566 // unrecognized data format 567 return ``, false 568 } 569 570 // checkDoc tries to guess if the bytes given are the start of HTML, SVG, 571 // XML, or JSON data 572 func checkDoc(b []byte) (kind string, ok bool) { 573 // ignore leading whitespaces 574 b = trimLeadingWhitespace(b) 575 576 // can't detect anything with empty data 577 if len(b) == 0 { 578 return ``, false 579 } 580 581 // handle XHTML documents which don't start with a doctype declaration 582 if bytes.Contains(b, doctypeHTML) { 583 return html, true 584 } 585 586 // handle HTML/SVG/XML documents 587 if hasPrefixByte(b, '<') { 588 if hasPrefixFold(b, []byte{'<', '?', 'x', 'm', 'l'}) { 589 if bytes.Contains(b, []byte{'<', 's', 'v', 'g'}) { 590 return svg, true 591 } 592 return xml, true 593 } 594 595 headers := hdrDispatch['<'] 596 for _, v := range headers { 597 if hasPrefixFold(b, v.Header) { 598 return v.Type, true 599 } 600 } 601 return ``, false 602 } 603 604 // handle JSON with top-level arrays 605 if hasPrefixByte(b, '[') { 606 // match [", or [[, or [{, ignoring spaces between 607 b = trimLeadingWhitespace(b[1:]) 608 if len(b) > 0 { 609 switch b[0] { 610 case '"', '[', '{': 611 return json, true 612 } 613 } 614 return ``, false 615 } 616 617 // handle JSON with top-level objects 618 if hasPrefixByte(b, '{') { 619 // match {", ignoring spaces between: after {, the only valid syntax 620 // which can follow is the opening quote for the expected object-key 621 b = trimLeadingWhitespace(b[1:]) 622 if hasPrefixByte(b, '"') { 623 return json, true 624 } 625 return ``, false 626 } 627 628 // checking for a quoted string, any of the JSON keywords, or even a 629 // number seems too ambiguous to declare the data valid JSON 630 631 // no web-document format detected 632 return ``, false 633 } 634 635 // checkSpecial handles special file-format headers, which should be checked 636 // before the normal file-type headers, since the first-byte dispatch algo 637 // doesn't work for these 638 func checkSpecial(b []byte) (kind string, ok bool) { 639 if len(b) >= 8 && bytes.Index(b, []byte{'f', 't', 'y', 'p'}) == 4 { 640 for _, t := range specialHeaders { 641 if hasPrefixPattern(b[4:], t.Header[4:], cba) { 642 return t.Type, true 643 } 644 } 645 } 646 return ``, false 647 } 648 649 // hasPrefixPattern works like bytes.HasPrefix, except it allows for a special 650 // value to signal any byte is allowed on specific spots 651 func hasPrefixPattern(what []byte, pat []byte, wildcard byte) bool { 652 // if the data are shorter than the pattern to match, there's no match 653 if len(what) < len(pat) { 654 return false 655 } 656 657 // use a slice which ensures the pattern length is never exceeded 658 what = what[:len(pat)] 659 660 for i, x := range what { 661 y := pat[i] 662 if x != y && y != wildcard { 663 return false 664 } 665 } 666 return true 667 } 668 669 // all the MIME types used/recognized in this package 670 const ( 671 aiff = `audio/aiff` 672 au = `audio/basic` 673 avi = `video/avi` 674 avif = `image/avif` 675 bmp = `image/x-bmp` 676 caf = `audio/x-caf` 677 cur = `image/vnd.microsoft.icon` 678 css = `text/css` 679 csv = `text/csv` 680 djvu = `image/x-djvu` 681 elf = `application/x-elf` 682 exe = `application/vnd.microsoft.portable-executable` 683 flac = `audio/x-flac` 684 gif = `image/gif` 685 gz = `application/gzip` 686 heic = `image/heic` 687 htm = `text/html` 688 html = `text/html` 689 ico = `image/x-icon` 690 iso = `application/octet-stream` 691 jpg = `image/jpeg` 692 jpeg = `image/jpeg` 693 js = `application/javascript` 694 json = `application/json` 695 m4a = `audio/aac` 696 m4v = `video/x-m4v` 697 mid = `audio/midi` 698 mov = `video/quicktime` 699 mp4 = `video/mp4` 700 mp3 = `audio/mpeg` 701 mpg = `video/mpeg` 702 ogg = `audio/ogg` 703 opus = `audio/opus` 704 pdf = `application/pdf` 705 png = `image/png` 706 ps = `application/postscript` 707 psd = `image/vnd.adobe.photoshop` 708 rtf = `application/rtf` 709 sqlite3 = `application/x-sqlite3` 710 svg = `image/svg+xml` 711 text = `text/plain` 712 tiff = `image/tiff` 713 tsv = `text/tsv` 714 wasm = `application/wasm` 715 wav = `audio/x-wav` 716 webp = `image/webp` 717 webm = `video/webm` 718 xml = `application/xml` 719 zip = `application/zip` 720 zst = `application/zstd` 721 ) 722 723 // type2mime turns dotless format-names into MIME types 724 var type2mime = map[string]string{ 725 `aiff`: aiff, 726 `wav`: wav, 727 `avi`: avi, 728 `jpg`: jpg, 729 `jpeg`: jpeg, 730 `m4a`: m4a, 731 `mp4`: mp4, 732 `m4v`: m4v, 733 `mov`: mov, 734 `png`: png, 735 `avif`: avif, 736 `webp`: webp, 737 `gif`: gif, 738 `tiff`: tiff, 739 `psd`: psd, 740 `flac`: flac, 741 `webm`: webm, 742 `mpg`: mpg, 743 `zip`: zip, 744 `gz`: gz, 745 `zst`: zst, 746 `mp3`: mp3, 747 `opus`: opus, 748 `bmp`: bmp, 749 `mid`: mid, 750 `ogg`: ogg, 751 `html`: html, 752 `htm`: htm, 753 `svg`: svg, 754 `xml`: xml, 755 `rtf`: rtf, 756 `pdf`: pdf, 757 `ps`: ps, 758 `au`: au, 759 `ico`: ico, 760 `cur`: cur, 761 `caf`: caf, 762 `heic`: heic, 763 `sqlite3`: sqlite3, 764 `elf`: elf, 765 `exe`: exe, 766 `wasm`: wasm, 767 `iso`: iso, 768 `txt`: text, 769 `css`: css, 770 `csv`: csv, 771 `tsv`: tsv, 772 `js`: js, 773 `json`: json, 774 `geojson`: json, 775 } 776 777 // formatDescriptor ties a file-header pattern to its data-format type 778 type formatDescriptor struct { 779 Header []byte 780 Type string 781 } 782 783 // can be anything: ensure this value differs from all other literal bytes 784 // in the generic-headers table: failing that, its value could cause subtle 785 // type-misdetection bugs 786 const cba = 0xFD // 253, which is > 127, the highest-valued ascii symbol 787 788 // dash-streamed m4a format 789 var m4aDash = []byte{ 790 cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 791 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', 792 } 793 794 // format markers with leading wildcards, which should be checked before the 795 // normal ones: this is to prevent mismatches with the latter types, even 796 // though you can make probabilistic arguments which suggest these mismatches 797 // should be very unlikely in practice 798 var specialHeaders = []formatDescriptor{ 799 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a}, 800 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a}, 801 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4}, 802 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4}, 803 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v}, 804 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov}, 805 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic}, 806 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif}, 807 {m4aDash, m4a}, 808 } 809 810 // sqlite3 database format 811 var sqlite3db = []byte{ 812 'S', 'Q', 'L', 'i', 't', 'e', ' ', 813 'f', 'o', 'r', 'm', 'a', 't', ' ', '3', 814 000, 815 } 816 817 // windows-variant bitmap file-header, which is followed by a byte-counter for 818 // the 40-byte infoheader which follows that 819 var winbmp = []byte{ 820 'B', 'M', cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, 40, 821 } 822 823 // deja-vu document format 824 var djv = []byte{ 825 'A', 'T', '&', 'T', 'F', 'O', 'R', 'M', cba, cba, cba, cba, 'D', 'J', 'V', 826 } 827 828 var doctypeHTML = []byte{ 829 '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E', ' ', 'h', 't', 'm', 'l', 830 } 831 832 // hdrDispatch groups format-description-groups by their first byte, thus 833 // shortening total lookups for some data header: notice how the `ftyp` data 834 // formats aren't handled here, since these can start with any byte, instead 835 // of the literal value of the any-byte markers they use 836 var hdrDispatch = [256][]formatDescriptor{ 837 { 838 {[]byte{000, 000, 001, 0xBA}, mpg}, 839 {[]byte{000, 000, 001, 0xB3}, mpg}, 840 {[]byte{000, 000, 001, 000}, ico}, 841 {[]byte{000, 000, 002, 000}, cur}, 842 {[]byte{000, 'a', 's', 'm'}, wasm}, 843 }, // 0 844 nil, // 1 845 nil, // 2 846 nil, // 3 847 nil, // 4 848 nil, // 5 849 nil, // 6 850 nil, // 7 851 nil, // 8 852 nil, // 9 853 nil, // 10 854 nil, // 11 855 nil, // 12 856 nil, // 13 857 nil, // 14 858 nil, // 15 859 nil, // 16 860 nil, // 17 861 nil, // 18 862 nil, // 19 863 nil, // 20 864 nil, // 21 865 nil, // 22 866 nil, // 23 867 nil, // 24 868 nil, // 25 869 { 870 {[]byte{0x1A, 0x45, 0xDF, 0xA3}, webm}, 871 }, // 26 872 nil, // 27 873 nil, // 28 874 nil, // 29 875 nil, // 30 876 { 877 // {[]byte{0x1F, 0x8B, 0x08, 0x08}, gz}, 878 {[]byte{0x1F, 0x8B, 0x08}, gz}, 879 }, // 31 880 nil, // 32 881 nil, // 33 ! 882 nil, // 34 " 883 { 884 {[]byte{'#', '!', ' '}, text}, 885 {[]byte{'#', '!', '/'}, text}, 886 }, // 35 # 887 nil, // 36 $ 888 { 889 {[]byte{'%', 'P', 'D', 'F'}, pdf}, 890 {[]byte{'%', '!', 'P', 'S'}, ps}, 891 }, // 37 % 892 nil, // 38 & 893 nil, // 39 ' 894 { 895 {[]byte{0x28, 0xB5, 0x2F, 0xFD}, zst}, 896 }, // 40 ( 897 nil, // 41 ) 898 nil, // 42 * 899 nil, // 43 + 900 nil, // 44 , 901 nil, // 45 - 902 { 903 {[]byte{'.', 's', 'n', 'd'}, au}, 904 }, // 46 . 905 nil, // 47 / 906 nil, // 48 0 907 nil, // 49 1 908 nil, // 50 2 909 nil, // 51 3 910 nil, // 52 4 911 nil, // 53 5 912 nil, // 54 6 913 nil, // 55 7 914 { 915 {[]byte{'8', 'B', 'P', 'S'}, psd}, 916 }, // 56 8 917 nil, // 57 9 918 nil, // 58 : 919 nil, // 59 ; 920 { 921 // func checkDoc is better for these, since it's case-insensitive 922 {doctypeHTML, html}, 923 {[]byte{'<', 's', 'v', 'g'}, svg}, 924 {[]byte{'<', 'h', 't', 'm', 'l', '>'}, html}, 925 {[]byte{'<', 'h', 'e', 'a', 'd', '>'}, html}, 926 {[]byte{'<', 'b', 'o', 'd', 'y', '>'}, html}, 927 {[]byte{'<', '?', 'x', 'm', 'l'}, xml}, 928 }, // 60 < 929 nil, // 61 = 930 nil, // 62 > 931 nil, // 63 ? 932 nil, // 64 @ 933 { 934 {djv, djvu}, 935 }, // 65 A 936 { 937 {winbmp, bmp}, 938 }, // 66 B 939 nil, // 67 C 940 nil, // 68 D 941 nil, // 69 E 942 { 943 {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff}, 944 {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff}, 945 }, // 70 F 946 { 947 {[]byte{'G', 'I', 'F', '8', '7', 'a'}, gif}, 948 {[]byte{'G', 'I', 'F', '8', '9', 'a'}, gif}, 949 }, // 71 G 950 nil, // 72 H 951 { 952 {[]byte{'I', 'D', '3', 2}, mp3}, // ID3-format metadata 953 {[]byte{'I', 'D', '3', 3}, mp3}, // ID3-format metadata 954 {[]byte{'I', 'D', '3', 4}, mp3}, // ID3-format metadata 955 {[]byte{'I', 'I', '*', 000}, tiff}, 956 }, // 73 I 957 nil, // 74 J 958 nil, // 75 K 959 nil, // 76 L 960 { 961 {[]byte{'M', 'M', 000, '*'}, tiff}, 962 {[]byte{'M', 'T', 'h', 'd'}, mid}, 963 {[]byte{'M', 'Z', cba, 000, cba, 000}, exe}, 964 // {[]byte{'M', 'Z', 0x90, 000, 003, 000}, exe}, 965 // {[]byte{'M', 'Z', 0x78, 000, 001, 000}, exe}, 966 // {[]byte{'M', 'Z', 'P', 000, 002, 000}, exe}, 967 }, // 77 M 968 nil, // 78 N 969 { 970 {[]byte{'O', 'g', 'g', 'S'}, ogg}, 971 }, // 79 O 972 { 973 {[]byte{'P', 'K', 003, 004}, zip}, 974 }, // 80 P 975 nil, // 81 Q 976 { 977 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp}, 978 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav}, 979 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi}, 980 }, // 82 R 981 { 982 {sqlite3db, sqlite3}, 983 }, // 83 S 984 nil, // 84 T 985 nil, // 85 U 986 nil, // 86 V 987 nil, // 87 W 988 nil, // 88 X 989 nil, // 89 Y 990 nil, // 90 Z 991 nil, // 91 [ 992 nil, // 92 \ 993 nil, // 93 ] 994 nil, // 94 ^ 995 nil, // 95 _ 996 nil, // 96 ` 997 nil, // 97 a 998 nil, // 98 b 999 { 1000 {[]byte{'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf}, 1001 }, // 99 c 1002 nil, // 100 d 1003 nil, // 101 e 1004 { 1005 {[]byte{'f', 'L', 'a', 'C'}, flac}, 1006 }, // 102 f 1007 nil, // 103 g 1008 nil, // 104 h 1009 nil, // 105 i 1010 nil, // 106 j 1011 nil, // 107 k 1012 nil, // 108 l 1013 nil, // 109 m 1014 nil, // 110 n 1015 nil, // 111 o 1016 nil, // 112 p 1017 nil, // 113 q 1018 nil, // 114 r 1019 nil, // 115 s 1020 nil, // 116 t 1021 nil, // 117 u 1022 nil, // 118 v 1023 nil, // 119 w 1024 nil, // 120 x 1025 nil, // 121 y 1026 nil, // 122 z 1027 { 1028 {[]byte{'{', '\\', 'r', 't', 'f'}, rtf}, 1029 }, // 123 { 1030 nil, // 124 | 1031 nil, // 125 } 1032 nil, // 126 1033 { 1034 {[]byte{127, 'E', 'L', 'F'}, elf}, 1035 }, // 127 1036 nil, // 128 1037 nil, // 129 1038 nil, // 130 1039 nil, // 131 1040 nil, // 132 1041 nil, // 133 1042 nil, // 134 1043 nil, // 135 1044 nil, // 136 1045 { 1046 {[]byte{0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png}, 1047 }, // 137 1048 nil, // 138 1049 nil, // 139 1050 nil, // 140 1051 nil, // 141 1052 nil, // 142 1053 nil, // 143 1054 nil, // 144 1055 nil, // 145 1056 nil, // 146 1057 nil, // 147 1058 nil, // 148 1059 nil, // 149 1060 nil, // 150 1061 nil, // 151 1062 nil, // 152 1063 nil, // 153 1064 nil, // 154 1065 nil, // 155 1066 nil, // 156 1067 nil, // 157 1068 nil, // 158 1069 nil, // 159 1070 nil, // 160 1071 nil, // 161 1072 nil, // 162 1073 nil, // 163 1074 nil, // 164 1075 nil, // 165 1076 nil, // 166 1077 nil, // 167 1078 nil, // 168 1079 nil, // 169 1080 nil, // 170 1081 nil, // 171 1082 nil, // 172 1083 nil, // 173 1084 nil, // 174 1085 nil, // 175 1086 nil, // 176 1087 nil, // 177 1088 nil, // 178 1089 nil, // 179 1090 nil, // 180 1091 nil, // 181 1092 nil, // 182 1093 nil, // 183 1094 nil, // 184 1095 nil, // 185 1096 nil, // 186 1097 nil, // 187 1098 nil, // 188 1099 nil, // 189 1100 nil, // 190 1101 nil, // 191 1102 nil, // 192 1103 nil, // 193 1104 nil, // 194 1105 nil, // 195 1106 nil, // 196 1107 nil, // 197 1108 nil, // 198 1109 nil, // 199 1110 nil, // 200 1111 nil, // 201 1112 nil, // 202 1113 nil, // 203 1114 nil, // 204 1115 nil, // 205 1116 nil, // 206 1117 nil, // 207 1118 nil, // 208 1119 nil, // 209 1120 nil, // 210 1121 nil, // 211 1122 nil, // 212 1123 nil, // 213 1124 nil, // 214 1125 nil, // 215 1126 nil, // 216 1127 nil, // 217 1128 nil, // 218 1129 nil, // 219 1130 nil, // 220 1131 nil, // 221 1132 nil, // 222 1133 nil, // 223 1134 nil, // 224 1135 nil, // 225 1136 nil, // 226 1137 nil, // 227 1138 nil, // 228 1139 nil, // 229 1140 nil, // 230 1141 nil, // 231 1142 nil, // 232 1143 nil, // 233 1144 nil, // 234 1145 nil, // 235 1146 nil, // 236 1147 nil, // 237 1148 nil, // 238 1149 nil, // 239 1150 nil, // 240 1151 nil, // 241 1152 nil, // 242 1153 nil, // 243 1154 nil, // 244 1155 nil, // 245 1156 nil, // 246 1157 nil, // 247 1158 nil, // 248 1159 nil, // 249 1160 nil, // 250 1161 nil, // 251 1162 nil, // 252 1163 nil, // 253 1164 nil, // 254 1165 { 1166 {[]byte{0xFF, 0xD8, 0xFF}, jpg}, 1167 {[]byte{0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3}, 1168 {[]byte{0xFF, 0xFB}, mp3}, 1169 }, // 255 1170 }