File: si/config.go 1 package main 2 3 import ( 4 "flag" 5 "fmt" 6 "strings" 7 ) 8 9 // config is the result of parsing all cmd-line arguments the app was given 10 type config struct { 11 // From is an optional hint for the source data format, and disables 12 // type-autodetection when it's non-empty 13 From string 14 15 // Autoplay autoplays audio/video data from stdin 16 Autoplay bool 17 } 18 19 const ( 20 fromUsage = `` + 21 `declare MIME-type, disabling type-autodetection; ` + 22 `use when MIME-type autodetection fails, or to use a ` + 23 `charset different from UTF-8` 24 25 mimeUsage = `alias for option -from` 26 playUsage = `alias for option -autoplay` 27 autoplayUsage = `autoplay; useful only when stdin has audio/video data` 28 ) 29 30 // parseFlags is the constructor for type config 31 func parseFlags(usage string) config { 32 flag.Usage = func() { 33 fmt.Fprintf(flag.CommandLine.Output(), "%s\n\nOptions\n\n", usage) 34 flag.PrintDefaults() 35 } 36 37 var cfg config 38 flag.StringVar(&cfg.From, `from`, cfg.From, fromUsage) 39 flag.StringVar(&cfg.From, `mime`, cfg.From, mimeUsage) 40 flag.BoolVar(&cfg.Autoplay, `play`, cfg.Autoplay, playUsage) 41 flag.BoolVar(&cfg.Autoplay, `autoplay`, cfg.Autoplay, autoplayUsage) 42 flag.Parse() 43 44 cfg.From = strings.ToLower(strings.TrimSpace(cfg.From)) 45 if mime, ok := nameToMIME(cfg.From); ok { 46 cfg.From = mime 47 } 48 return cfg 49 } 50 51 // serveConfig has all details func serveOnce needs 52 type serveConfig struct { 53 // ContentType is the MIME type of what's being served 54 ContentType string 55 56 // ContentLength is the byte-count of what's being served; negative 57 // values are ignored 58 ContentLength int 59 60 // Autoplay autoplays audio/video data from stdin 61 Autoplay bool 62 } File: si/data.go 1 package main 2 3 // all the MIME types used/recognized in this package 4 const ( 5 aiff = `audio/aiff` 6 au = `audio/basic` 7 avi = `video/avi` 8 avif = `image/avif` 9 bmp = `image/x-bmp` 10 caf = `audio/x-caf` 11 cur = `image/vnd.microsoft.icon` 12 css = `text/css` 13 csv = `text/csv` 14 djvu = `image/x-djvu` 15 elf = `application/x-elf` 16 exe = `application/vnd.microsoft.portable-executable` 17 flac = `audio/x-flac` 18 gif = `image/gif` 19 gz = `application/gzip` 20 heic = `image/heic` 21 htm = `text/html` 22 html = `text/html` 23 ico = `image/x-icon` 24 iso = `application/octet-stream` 25 jpg = `image/jpeg` 26 jpeg = `image/jpeg` 27 js = `application/javascript` 28 json = `application/json` 29 m4a = `audio/aac` 30 m4v = `video/x-m4v` 31 mid = `audio/midi` 32 mov = `video/quicktime` 33 mp4 = `video/mp4` 34 mp3 = `audio/mpeg` 35 mpg = `video/mpeg` 36 ogg = `audio/ogg` 37 opus = `audio/opus` 38 pdf = `application/pdf` 39 png = `image/png` 40 ps = `application/postscript` 41 psd = `image/vnd.adobe.photoshop` 42 rtf = `application/rtf` 43 sqlite3 = `application/x-sqlite3` 44 svg = `image/svg+xml` 45 text = `text/plain` 46 tiff = `image/tiff` 47 tsv = `text/tsv` 48 wasm = `application/wasm` 49 wav = `audio/x-wav` 50 webp = `image/webp` 51 webm = `video/webm` 52 xml = `application/xml` 53 zip = `application/zip` 54 zst = `application/zstd` 55 ) 56 57 // type2mime turns dotless format-names into MIME types 58 var type2mime = map[string]string{ 59 `aiff`: aiff, 60 `wav`: wav, 61 `avi`: avi, 62 `jpg`: jpg, 63 `jpeg`: jpeg, 64 `m4a`: m4a, 65 `mp4`: mp4, 66 `m4v`: m4v, 67 `mov`: mov, 68 `png`: png, 69 `avif`: avif, 70 `webp`: webp, 71 `gif`: gif, 72 `tiff`: tiff, 73 `psd`: psd, 74 `flac`: flac, 75 `webm`: webm, 76 `mpg`: mpg, 77 `zip`: zip, 78 `gz`: gz, 79 `zst`: zst, 80 `mp3`: mp3, 81 `opus`: opus, 82 `bmp`: bmp, 83 `mid`: mid, 84 `ogg`: ogg, 85 `html`: html, 86 `htm`: htm, 87 `svg`: svg, 88 `xml`: xml, 89 `rtf`: rtf, 90 `pdf`: pdf, 91 `ps`: ps, 92 `au`: au, 93 `ico`: ico, 94 `cur`: cur, 95 `caf`: caf, 96 `heic`: heic, 97 `sqlite3`: sqlite3, 98 `elf`: elf, 99 `exe`: exe, 100 `wasm`: wasm, 101 `iso`: iso, 102 `txt`: text, 103 `css`: css, 104 `csv`: csv, 105 `tsv`: tsv, 106 `js`: js, 107 `json`: json, 108 `geojson`: json, 109 } 110 111 // formatDescriptor ties a file-header pattern to its data-format type 112 type formatDescriptor struct { 113 Header []byte 114 Type string 115 } 116 117 // can be anything: ensure this value differs from all other literal bytes 118 // in the generic-headers table: failing that, its value could cause subtle 119 // type-misdetection bugs 120 const cba = 0xFD // 253, which is > 127, the highest-valued ascii symbol 121 122 // dash-streamed m4a format 123 var m4aDash = []byte{ 124 cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 125 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', 126 } 127 128 // format markers with leading wildcards, which should be checked before the 129 // normal ones: this is to prevent mismatches with the latter types, even 130 // though you can make probabilistic arguments which suggest these mismatches 131 // should be very unlikely in practice 132 var specialHeaders = []formatDescriptor{ 133 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a}, 134 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a}, 135 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4}, 136 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4}, 137 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v}, 138 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov}, 139 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic}, 140 {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif}, 141 {m4aDash, m4a}, 142 } 143 144 // sqlite3 database format 145 var sqlite3db = []byte{ 146 'S', 'Q', 'L', 'i', 't', 'e', ' ', 147 'f', 'o', 'r', 'm', 'a', 't', ' ', '3', 148 000, 149 } 150 151 // windows-variant bitmap file-header, which is followed by a byte-counter for 152 // the 40-byte infoheader which follows that 153 var winbmp = []byte{ 154 'B', 'M', cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, 40, 155 } 156 157 // deja-vu document format 158 var djv = []byte{ 159 'A', 'T', '&', 'T', 'F', 'O', 'R', 'M', cba, cba, cba, cba, 'D', 'J', 'V', 160 } 161 162 var doctypeHTML = []byte{ 163 '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E', ' ', 'h', 't', 'm', 'l', '>', 164 } 165 166 // hdrDispatch groups format-description-groups by their first byte, thus 167 // shortening total lookups for some data header: notice how the `ftyp` data 168 // formats aren't handled here, since these can start with any byte, instead 169 // of the literal value of the any-byte markers they use 170 var hdrDispatch = [256][]formatDescriptor{ 171 { 172 {[]byte{000, 000, 001, 0xBA}, mpg}, 173 {[]byte{000, 000, 001, 0xB3}, mpg}, 174 {[]byte{000, 000, 001, 000}, ico}, 175 {[]byte{000, 000, 002, 000}, cur}, 176 {[]byte{000, 'a', 's', 'm'}, wasm}, 177 }, // 0 178 nil, // 1 179 nil, // 2 180 nil, // 3 181 nil, // 4 182 nil, // 5 183 nil, // 6 184 nil, // 7 185 nil, // 8 186 nil, // 9 187 nil, // 10 188 nil, // 11 189 nil, // 12 190 nil, // 13 191 nil, // 14 192 nil, // 15 193 nil, // 16 194 nil, // 17 195 nil, // 18 196 nil, // 19 197 nil, // 20 198 nil, // 21 199 nil, // 22 200 nil, // 23 201 nil, // 24 202 nil, // 25 203 { 204 {[]byte{0x1A, 0x45, 0xDF, 0xA3}, webm}, 205 }, // 26 206 nil, // 27 207 nil, // 28 208 nil, // 29 209 nil, // 30 210 { 211 // {[]byte{0x1F, 0x8B, 0x08, 0x08}, gz}, 212 {[]byte{0x1F, 0x8B, 0x08}, gz}, 213 }, // 31 214 nil, // 32 215 nil, // 33 ! 216 nil, // 34 " 217 { 218 {[]byte{'#', '!', ' '}, text}, 219 {[]byte{'#', '!', '/'}, text}, 220 }, // 35 # 221 nil, // 36 $ 222 { 223 {[]byte{'%', 'P', 'D', 'F'}, pdf}, 224 {[]byte{'%', '!', 'P', 'S'}, ps}, 225 }, // 37 % 226 nil, // 38 & 227 nil, // 39 ' 228 { 229 {[]byte{0x28, 0xB5, 0x2F, 0xFD}, zst}, 230 }, // 40 ( 231 nil, // 41 ) 232 nil, // 42 * 233 nil, // 43 + 234 nil, // 44 , 235 nil, // 45 - 236 { 237 {[]byte{'.', 's', 'n', 'd'}, au}, 238 }, // 46 . 239 nil, // 47 / 240 nil, // 48 0 241 nil, // 49 1 242 nil, // 50 2 243 nil, // 51 3 244 nil, // 52 4 245 nil, // 53 5 246 nil, // 54 6 247 nil, // 55 7 248 { 249 {[]byte{'8', 'B', 'P', 'S'}, psd}, 250 }, // 56 8 251 nil, // 57 9 252 nil, // 58 : 253 nil, // 59 ; 254 { 255 // func checkDoc is better for these, since it's case-insensitive 256 {doctypeHTML, html}, 257 {[]byte{'<', 's', 'v', 'g'}, svg}, 258 {[]byte{'<', 'h', 't', 'm', 'l', '>'}, html}, 259 {[]byte{'<', 'h', 'e', 'a', 'd', '>'}, html}, 260 {[]byte{'<', 'b', 'o', 'd', 'y', '>'}, html}, 261 {[]byte{'<', '?', 'x', 'm', 'l'}, xml}, 262 }, // 60 < 263 nil, // 61 = 264 nil, // 62 > 265 nil, // 63 ? 266 nil, // 64 @ 267 { 268 {djv, djvu}, 269 }, // 65 A 270 { 271 {winbmp, bmp}, 272 }, // 66 B 273 nil, // 67 C 274 nil, // 68 D 275 nil, // 69 E 276 { 277 {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff}, 278 {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff}, 279 }, // 70 F 280 { 281 {[]byte{'G', 'I', 'F', '8', '7', 'a'}, gif}, 282 {[]byte{'G', 'I', 'F', '8', '9', 'a'}, gif}, 283 }, // 71 G 284 nil, // 72 H 285 { 286 {[]byte{'I', 'D', '3', 2}, mp3}, // ID3-format metadata 287 {[]byte{'I', 'D', '3', 3}, mp3}, // ID3-format metadata 288 {[]byte{'I', 'D', '3', 4}, mp3}, // ID3-format metadata 289 {[]byte{'I', 'I', '*', 000}, tiff}, 290 }, // 73 I 291 nil, // 74 J 292 nil, // 75 K 293 nil, // 76 L 294 { 295 {[]byte{'M', 'M', 000, '*'}, tiff}, 296 {[]byte{'M', 'T', 'h', 'd'}, mid}, 297 {[]byte{'M', 'Z', cba, 000, cba, 000}, exe}, 298 // {[]byte{'M', 'Z', 0x90, 000, 003, 000}, exe}, 299 // {[]byte{'M', 'Z', 0x78, 000, 001, 000}, exe}, 300 // {[]byte{'M', 'Z', 'P', 000, 002, 000}, exe}, 301 }, // 77 M 302 nil, // 78 N 303 { 304 {[]byte{'O', 'g', 'g', 'S'}, ogg}, 305 }, // 79 O 306 { 307 {[]byte{'P', 'K', 003, 004}, zip}, 308 }, // 80 P 309 nil, // 81 Q 310 { 311 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp}, 312 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav}, 313 {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi}, 314 }, // 82 R 315 { 316 {sqlite3db, sqlite3}, 317 }, // 83 S 318 nil, // 84 T 319 nil, // 85 U 320 nil, // 86 V 321 nil, // 87 W 322 nil, // 88 X 323 nil, // 89 Y 324 nil, // 90 Z 325 nil, // 91 [ 326 nil, // 92 \ 327 nil, // 93 ] 328 nil, // 94 ^ 329 nil, // 95 _ 330 nil, // 96 ` 331 nil, // 97 a 332 nil, // 98 b 333 { 334 {[]byte{'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf}, 335 }, // 99 c 336 nil, // 100 d 337 nil, // 101 e 338 { 339 {[]byte{'f', 'L', 'a', 'C'}, flac}, 340 }, // 102 f 341 nil, // 103 g 342 nil, // 104 h 343 nil, // 105 i 344 nil, // 106 j 345 nil, // 107 k 346 nil, // 108 l 347 nil, // 109 m 348 nil, // 110 n 349 nil, // 111 o 350 nil, // 112 p 351 nil, // 113 q 352 nil, // 114 r 353 nil, // 115 s 354 nil, // 116 t 355 nil, // 117 u 356 nil, // 118 v 357 nil, // 119 w 358 nil, // 120 x 359 nil, // 121 y 360 nil, // 122 z 361 { 362 {[]byte{'{', '\\', 'r', 't', 'f'}, rtf}, 363 }, // 123 { 364 nil, // 124 | 365 nil, // 125 } 366 nil, // 126 367 { 368 {[]byte{127, 'E', 'L', 'F'}, elf}, 369 }, // 127 370 nil, // 128 371 nil, // 129 372 nil, // 130 373 nil, // 131 374 nil, // 132 375 nil, // 133 376 nil, // 134 377 nil, // 135 378 nil, // 136 379 { 380 {[]byte{0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png}, 381 }, // 137 382 nil, // 138 383 nil, // 139 384 nil, // 140 385 nil, // 141 386 nil, // 142 387 nil, // 143 388 nil, // 144 389 nil, // 145 390 nil, // 146 391 nil, // 147 392 nil, // 148 393 nil, // 149 394 nil, // 150 395 nil, // 151 396 nil, // 152 397 nil, // 153 398 nil, // 154 399 nil, // 155 400 nil, // 156 401 nil, // 157 402 nil, // 158 403 nil, // 159 404 nil, // 160 405 nil, // 161 406 nil, // 162 407 nil, // 163 408 nil, // 164 409 nil, // 165 410 nil, // 166 411 nil, // 167 412 nil, // 168 413 nil, // 169 414 nil, // 170 415 nil, // 171 416 nil, // 172 417 nil, // 173 418 nil, // 174 419 nil, // 175 420 nil, // 176 421 nil, // 177 422 nil, // 178 423 nil, // 179 424 nil, // 180 425 nil, // 181 426 nil, // 182 427 nil, // 183 428 nil, // 184 429 nil, // 185 430 nil, // 186 431 nil, // 187 432 nil, // 188 433 nil, // 189 434 nil, // 190 435 nil, // 191 436 nil, // 192 437 nil, // 193 438 nil, // 194 439 nil, // 195 440 nil, // 196 441 nil, // 197 442 nil, // 198 443 nil, // 199 444 nil, // 200 445 nil, // 201 446 nil, // 202 447 nil, // 203 448 nil, // 204 449 nil, // 205 450 nil, // 206 451 nil, // 207 452 nil, // 208 453 nil, // 209 454 nil, // 210 455 nil, // 211 456 nil, // 212 457 nil, // 213 458 nil, // 214 459 nil, // 215 460 nil, // 216 461 nil, // 217 462 nil, // 218 463 nil, // 219 464 nil, // 220 465 nil, // 221 466 nil, // 222 467 nil, // 223 468 nil, // 224 469 nil, // 225 470 nil, // 226 471 nil, // 227 472 nil, // 228 473 nil, // 229 474 nil, // 230 475 nil, // 231 476 nil, // 232 477 nil, // 233 478 nil, // 234 479 nil, // 235 480 nil, // 236 481 nil, // 237 482 nil, // 238 483 nil, // 239 484 nil, // 240 485 nil, // 241 486 nil, // 242 487 nil, // 243 488 nil, // 244 489 nil, // 245 490 nil, // 246 491 nil, // 247 492 nil, // 248 493 nil, // 249 494 nil, // 250 495 nil, // 251 496 nil, // 252 497 nil, // 253 498 nil, // 254 499 { 500 {[]byte{0xFF, 0xD8, 0xFF}, jpg}, 501 {[]byte{0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3}, 502 {[]byte{0xFF, 0xFB}, mp3}, 503 }, // 255 504 } File: si/data_test.go 1 package main 2 3 import ( 4 "strconv" 5 "testing" 6 ) 7 8 func TestData(t *testing.T) { 9 t.Run(`could-be-anything constant`, func(t *testing.T) { 10 if len(hdrDispatch[cba]) != 0 { 11 const fs = `chosen constant %d collides with header entries` 12 t.Fatalf(fs, cba) 13 } 14 }) 15 16 for i, v := range hdrDispatch { 17 t.Run(`dispatch @ `+strconv.Itoa(i), func(t *testing.T) { 18 const fs = `expected leading byte to be %d, but got %d instead` 19 for _, e := range v { 20 if e.Header[0] != byte(i) { 21 t.Fatalf(fs, i, e.Header[0]) 22 return 23 } 24 } 25 }) 26 } 27 } File: si/filetypes.go 1 package main 2 3 import "bytes" 4 5 // nameToMIME tries to match a MIME type to a filename, dotted file extension, 6 // or a dot-less filetype/extension given 7 func nameToMIME(fname string) (mimeType string, ok bool) { 8 // handle dotless file types and filenames alike 9 kind, ok := type2mime[makeDotless(fname)] 10 return kind, ok 11 } 12 13 // detectMIME guesses the first appropriate MIME type from the first few 14 // data bytes given: 24 bytes are enough to detect all supported types 15 func detectMIME(b []byte) (mimeType string, ok bool) { 16 t, ok := detectType(b) 17 if ok { 18 return t, true 19 } 20 return ``, false 21 } 22 23 // detectType guesses the first appropriate file type for the data given: 24 // here the type is a a filename extension without the leading dot 25 func detectType(b []byte) (dotlessExt string, ok bool) { 26 // empty data, so there's no way to detect anything 27 if len(b) == 0 { 28 return ``, false 29 } 30 31 // check for plain-text web-document formats case-insensitively 32 kind, ok := checkDoc(b) 33 if ok { 34 return kind, true 35 } 36 37 // check data formats which allow any byte at the start 38 kind, ok = checkSpecial(b) 39 if ok { 40 return kind, true 41 } 42 43 // check all other supported data formats 44 headers := hdrDispatch[b[0]] 45 for _, t := range headers { 46 if hasPrefixPattern(b[1:], t.Header[1:], cba) { 47 return t.Type, true 48 } 49 } 50 51 // unrecognized data format 52 return ``, false 53 } 54 55 // checkDoc tries to guess if the bytes given are the start of HTML, SVG, 56 // XML, or JSON data 57 func checkDoc(b []byte) (kind string, ok bool) { 58 // ignore leading whitespaces 59 b = trimLeadingWhitespace(b) 60 61 // can't detect anything with empty data 62 if len(b) == 0 { 63 return ``, false 64 } 65 66 // handle HTML/SVG/XML documents 67 if hasPrefixByte(b, '<') { 68 if hasPrefixFold(b, []byte{'<', '?', 'x', 'm', 'l'}) { 69 if bytes.Contains(b, []byte{'<', 's', 'v', 'g'}) { 70 return svg, true 71 } 72 return xml, true 73 } 74 75 headers := hdrDispatch['<'] 76 for _, v := range headers { 77 if hasPrefixFold(b, v.Header) { 78 return v.Type, true 79 } 80 } 81 return ``, false 82 } 83 84 // handle JSON with top-level arrays 85 if hasPrefixByte(b, '[') { 86 // match [", or [[, or [{, ignoring spaces between 87 b = trimLeadingWhitespace(b[1:]) 88 if len(b) > 0 { 89 switch b[0] { 90 case '"', '[', '{': 91 return json, true 92 } 93 } 94 return ``, false 95 } 96 97 // handle JSON with top-level objects 98 if hasPrefixByte(b, '{') { 99 // match {", ignoring spaces between: after {, the only valid syntax 100 // which can follow is the opening quote for the expected object-key 101 b = trimLeadingWhitespace(b[1:]) 102 if hasPrefixByte(b, '"') { 103 return json, true 104 } 105 return ``, false 106 } 107 108 // checking for a quoted string, any of the JSON keywords, or even a 109 // number seems too ambiguous to declare the data valid JSON 110 111 // no web-document format detected 112 return ``, false 113 } 114 115 // checkSpecial handles special file-format headers, which should be checked 116 // before the normal file-type headers, since the first-byte dispatch algo 117 // doesn't work for these 118 func checkSpecial(b []byte) (kind string, ok bool) { 119 if len(b) >= 8 && bytes.Index(b, []byte{'f', 't', 'y', 'p'}) == 4 { 120 for _, t := range specialHeaders { 121 if hasPrefixPattern(b[4:], t.Header[4:], cba) { 122 return t.Type, true 123 } 124 } 125 } 126 return ``, false 127 } 128 129 // hasPrefixPattern works like bytes.HasPrefix, except it allows for a special 130 // value to signal any byte is allowed on specific spots 131 func hasPrefixPattern(what []byte, pat []byte, wildcard byte) bool { 132 // if the data are shorter than the pattern to match, there's no match 133 if len(what) < len(pat) { 134 return false 135 } 136 137 // use a slice which ensures the pattern length is never exceeded 138 what = what[:len(pat)] 139 140 for i, x := range what { 141 y := pat[i] 142 if x != y && y != wildcard { 143 return false 144 } 145 } 146 return true 147 } File: si/filetypes_test.go 1 package main 2 3 import ( 4 "bytes" 5 "testing" 6 ) 7 8 func TestCheckDoc(t *testing.T) { 9 const ( 10 lf = "\n" 11 crlf = "\r\n" 12 tab = "\t" 13 xmlIntro = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` 14 ) 15 16 tests := []struct { 17 Input string 18 Expected string 19 }{ 20 {``, ``}, 21 {`{"abc":123}`, json}, 22 {`[` + lf + ` {"abc":123}`, json}, 23 {`[` + lf + ` {"abc":123}`, json}, 24 {`[` + crlf + tab + `{"abc":123}`, json}, 25 26 {``, ``}, 27 {`<?xml?>`, xml}, 28 {`<?xml?><records>`, xml}, 29 {`<?xml?>` + lf + `<records>`, xml}, 30 {`<?xml?><svg>`, svg}, 31 {`<?xml?>` + crlf + `<svg>`, svg}, 32 {xmlIntro + lf + `<svg`, svg}, 33 {xmlIntro + crlf + `<svg`, svg}, 34 } 35 36 for _, tc := range tests { 37 t.Run(tc.Input, func(t *testing.T) { 38 res, _ := checkDoc([]byte(tc.Input)) 39 if res != tc.Expected { 40 t.Fatalf(`got %v, expected %v instead`, res, tc.Expected) 41 } 42 }) 43 } 44 } 45 46 func TestHasPrefixPattern(t *testing.T) { 47 var ( 48 data = []byte{ 49 'R', 'I', 'F', 'F', 0xf0, 0xba, 0xc8, 0x2b, 'A', 'V', 'I', ' ', 50 } 51 pat = []byte{ 52 'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' ', 53 } 54 ) 55 56 if !hasPrefixPattern(data, pat, cba) { 57 t.Fatal(`wildcard pattern not working`) 58 } 59 } 60 61 func BenchmarkHasPrefixMatch(b *testing.B) { 62 var ( 63 data = []byte{ 64 'R', 'I', 'F', 'F', 0xf0, 0xba, 0xc8, 0x2b, 'A', 'V', 'I', ' ', 65 } 66 pat = []byte{ 67 'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' ', 68 } 69 ) 70 71 b.ReportAllocs() 72 b.ResetTimer() 73 74 for i := 0; i < b.N; i++ { 75 if !bytes.HasPrefix(data, pat) { 76 b.Fatal(`pattern was specifically chosen to match, but didn't`) 77 } 78 } 79 } 80 81 func BenchmarkHasPrefixPatternMatch(b *testing.B) { 82 var ( 83 data = []byte{ 84 'R', 'I', 'F', 'F', 0xf0, 0xba, 0xc8, 0x2b, 'A', 'V', 'I', ' ', 85 } 86 pat = []byte{ 87 'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' ', 88 } 89 ) 90 91 b.ReportAllocs() 92 b.ResetTimer() 93 94 for i := 0; i < b.N; i++ { 95 if !hasPrefixPattern(data, pat, cba) { 96 b.Fatal(`pattern was specifically chosen to match, but didn't`) 97 } 98 } 99 } File: si/go.mod 1 module si 2 3 go 1.18 File: si/http.go 1 package main 2 3 import ( 4 "bufio" 5 "encoding/base64" 6 "fmt" 7 "io" 8 "net" 9 ) 10 11 const ( 12 // maxbufsize is the max capacity the HTTP-protocol line-scanners are 13 // allowed to reach 14 maxbufsize = 128 * 1024 15 16 // beforeAudio starts HTML webpage with just an audio player 17 beforeAudio = `<!DOCTYPE html> 18 <html> 19 <head> 20 <meta charset="UTF-8"> 21 <link rel="icon" href="data:,"> 22 <title>wave sound</title> 23 <style> 24 body { margin: 2rem auto; width: 90vw; } 25 audio { margin: auto; width: 100%; } 26 </style> 27 </head> 28 <body> 29 <audio controls autofocus src="` 30 31 // beforeAutoplayAudio starts HTML webpage with just an audio player 32 // in autoplay mode 33 beforeAutoplayAudio = `<!DOCTYPE html> 34 <html> 35 <head> 36 <meta charset="UTF-8"> 37 <link rel="icon" href="data:,"> 38 <title>wave sound</title> 39 <style> 40 body { margin: 2rem auto; width: 90vw; } 41 audio { margin: auto; width: 100%; } 42 </style> 43 </head> 44 <body> 45 <audio controls autofocus autoplay src="` 46 47 // afterAudio ends HTML webpage with just an audio player 48 afterAudio = "\"></audio>\n</body>\n</html>\n" 49 50 // beforeBitmap starts HTML webpage with just an image 51 beforeBitmap = `<!DOCTYPE html> 52 <html> 53 <head> 54 <meta charset="UTF-8"> 55 <link rel="icon" href="data:,"> 56 <title>bitmap image</title> 57 <style> 58 body { margin: 0.5rem auto; width: 90vw; } 59 img { margin: auto; width: 100%; } 60 </style> 61 </head> 62 <body> 63 <img src="` 64 65 // afterBitmap ends HTML webpage with just an image 66 afterBitmap = "\"></img>\n</body>\n</html>\n" 67 ) 68 69 // serveOnce literally serves a single web request and no more 70 func serveOnce(start []byte, rest io.Reader, cfg serveConfig) error { 71 // pick a random port from the currently-available ones 72 srv, err := net.Listen(`tcp`, `127.0.0.1:0`) 73 if err != nil { 74 return err 75 } 76 defer srv.Close() 77 78 // open a new browser tab for that localhost port 79 err = showURI(fmt.Sprintf(`http://%s`, srv.Addr().String())) 80 if err != nil { 81 return err 82 } 83 84 // accept first connection: no need for async as the server quits after 85 // its first response 86 conn, err := srv.Accept() 87 if err != nil { 88 return err 89 } 90 defer conn.Close() 91 92 respond(conn, start, rest, cfg) 93 return nil 94 } 95 96 // respond reads/ignores all request headers, and then replies with some 97 // content given, quitting immediately after 98 func respond(conn net.Conn, start []byte, rest io.Reader, cfg serveConfig) { 99 sc := bufio.NewScanner(conn) 100 sc.Buffer(nil, maxbufsize) 101 for sc.Scan() && sc.Text() != `` { 102 // ignore all request headers 103 } 104 105 switch cfg.ContentType { 106 case `audio/wav`, `audio/wave`, `audio/x-wav`, `audio/aiff`, `audio/x-aiff`: 107 // force browser to play wave and aiff sounds, instead of showing 108 // a useless download-file option; encode audio bytes as data-URI 109 // in an intermediate buffer 110 111 writePreludeHTTP(conn, `text/html; charset=UTF-8`, -1) 112 // emit opening HTML right until <audio controls src=" 113 if cfg.Autoplay { 114 fmt.Fprint(conn, beforeAutoplayAudio) 115 } else { 116 fmt.Fprint(conn, beforeAudio) 117 } 118 // emit the data-URI 119 writeBase64(conn, cfg.ContentType, start, rest) 120 // emit closing HTML after data-URI audio 121 fmt.Fprint(conn, afterAudio) 122 return 123 124 case `image/bmp`, `image/x-bmp`: 125 // force browser to show bitmap pictures, instead of showing a 126 // useless download-file option; encode picture bytes as data-URI 127 // in an intermediate buffer 128 129 writePreludeHTTP(conn, `text/html; charset=UTF-8`, -1) 130 // emit opening HTML right until <img src=" 131 fmt.Fprint(conn, beforeBitmap) 132 // emit the data-URI 133 writeBase64(conn, cfg.ContentType, start, rest) 134 // emit closing HTML after data-URI image 135 fmt.Fprint(conn, afterBitmap) 136 return 137 138 default: 139 writePreludeHTTP(conn, cfg.ContentType, cfg.ContentLength) 140 // send the starting bytes used to auto-detect the content-type 141 conn.Write(start) 142 // send rest of payload at light-speed 143 io.Copy(conn, rest) 144 } 145 } 146 147 func writePreludeHTTP(conn net.Conn, contentType string, contentLength int) { 148 // respond right after the first empty line, which always follows the 149 // request's headers 150 fmt.Fprint(conn, "HTTP/1.1 200 OK\r\n") 151 fmt.Fprintf(conn, "Content-Type: %s\r\n", contentType) 152 if contentLength > 0 { 153 fmt.Fprintf(conn, "Content-Length: %d\r\n", contentLength) 154 } 155 156 // prevent download-dialog or auto-download from the browser's part 157 fmt.Fprintf(conn, "Content-Disposition: inline\r\n") 158 // tell browser this is the last request 159 fmt.Fprint(conn, "Connection: close\r\n") 160 // payload starts right after an empty line 161 fmt.Fprint(conn, "\r\n") 162 } 163 164 func writeBase64(conn net.Conn, mimeType string, start []byte, rest io.Reader) { 165 // send the data-URI intro 166 fmt.Fprintf(conn, `data:%s;base64,`, mimeType) 167 enc := base64.NewEncoder(base64.StdEncoding, conn) 168 // base64-encode the starting bytes used to auto-detect the input type 169 enc.Write(start) 170 // base64-encode the rest of the input 171 io.Copy(enc, rest) 172 enc.Close() 173 } File: si/info.txt 1 si [filenames/URIs...] 2 3 This app (Show It) shows data using your default web browser by auto-opening 4 tabs. When reading from stdin, the content-type is auto-detected: data are 5 then sent right away to the browser via localhost, using a random port among 6 the available ones. 7 8 The localhost connection is available only until all data are transferred: 9 this means refreshing your browser tab will lose your content, replacing it 10 with a server-not-found message page. 11 12 When given filenames and/or URIs, the browser tabs will point their paths, so 13 accidentally reloading them doesn't make them disappear, unless those files 14 are actually deleted between reloads. 15 16 Dozens of common data-formats are recognized when piped from stdin, such as 17 - HTML (web pages) 18 - PDF 19 - pictures (PNG, JPEG, SVG, WEBP, GIF) 20 - audio (AAC, MP3, FLAC, WAV, AU, MIDI) 21 - video (MP4, MOV, WEBM, MKV, AVI) 22 - JSON 23 - generic UTF-8 plain-text 24 25 Base64-encoded data URIs are auto-detected and decoded appropriately. File: si/main.go 1 package main 2 3 import ( 4 "bytes" 5 "encoding/base64" 6 "errors" 7 "flag" 8 "fmt" 9 "io" 10 "os" 11 "os/exec" 12 "path/filepath" 13 "runtime" 14 "strings" 15 16 _ "embed" 17 ) 18 19 //go:embed info.txt 20 var usage string 21 22 func main() { 23 cfg := parseFlags(usage) 24 narg := flag.NArg() 25 26 // show all filenames/URIs given by opening new browser tabs for each 27 nerr := 0 28 for i := 0; i < narg; i++ { 29 s := strings.TrimSpace(flag.Arg(i)) 30 if err := handle(s, cfg); err != nil { 31 fmt.Fprintln(os.Stderr, err.Error()) 32 nerr++ 33 } 34 } 35 36 // quit in failure if any input clearly failed to show up 37 if nerr > 0 { 38 os.Exit(1) 39 } 40 41 // serve from stdin only if no filenames were given 42 if narg == 0 { 43 if err := handleInput(os.Stdin, cfg); err != nil { 44 fmt.Fprintln(os.Stderr, err.Error()) 45 os.Exit(1) 46 } 47 } 48 } 49 50 // handle shows a filename/URI by operning a new browser tab for it 51 func handle(s string, cfg config) error { 52 // open a new browser window for each URI 53 if strings.HasPrefix(s, `https://`) || strings.HasPrefix(s, `http://`) { 54 return showURI(s) 55 } 56 57 // handle data-URIs 58 if strings.HasPrefix(s, `data:`) && strings.Contains(s, `;base64,`) { 59 if err := showURI(s); err != nil { 60 return err 61 } 62 return handleInput(strings.NewReader(s), cfg) 63 } 64 65 // the browser needs full paths when showing local files 66 fpath, err := filepath.Abs(s) 67 if err != nil { 68 return err 69 } 70 71 // open a new browser tab for each full-path filename 72 return showURI(fmt.Sprintf(`file:///%s`, fpath)) 73 } 74 75 // showURI tries to open the file/url given using the host operating system's 76 // defaults 77 func showURI(what string) error { 78 const fph = `url.dll,FileProtocolHandler` 79 80 switch runtime.GOOS { 81 case `windows`: 82 return exec.Command(`rundll32`, fph, what).Run() 83 case `darwin`: 84 return exec.Command(`open`, what).Run() 85 default: 86 return exec.Command(`xdg-open`, what).Run() 87 } 88 } 89 90 // handleInput specifically handles stdin and data-URIs 91 func handleInput(r io.Reader, cfg config) error { 92 if cfg.From != `` { 93 return serveOnce(nil, r, serveConfig{ 94 ContentType: cfg.From, 95 ContentLength: -1, 96 Autoplay: cfg.Autoplay, 97 }) 98 } 99 100 // before starting the single-request server, try to detect the MIME type 101 // by inspecting the first bytes of the stream and matching known filetype 102 // starting patterns 103 var buf [64]byte 104 n, err := r.Read(buf[:]) 105 if err != nil && err != io.EOF { 106 return err 107 } 108 start := buf[:n] 109 110 // handle data-URI-like inputs 111 if bytes.HasPrefix(start, []byte(`data:`)) { 112 if bytes.Contains(start, []byte(`;base64,`)) { 113 return handleDataURI(start, r, cfg) 114 } 115 } 116 117 // handle regular data, trying to auto-detect its MIME type using 118 // its first few bytes 119 mime, ok := detectMIME(start) 120 if !ok { 121 mime = cfg.From 122 } 123 if mime == `` { 124 mime = `text/plain` 125 } 126 127 // remember to precede the partly-used reader with the starting bytes; 128 // give a negative/invalid filesize hint, since stream is single-use 129 return serveOnce(start, r, serveConfig{ 130 ContentType: mime, 131 ContentLength: -1, 132 Autoplay: cfg.Autoplay, 133 }) 134 } 135 136 // handleDataURI handles data-URIs for func handleInput 137 func handleDataURI(start []byte, r io.Reader, cfg config) error { 138 if !bytes.HasPrefix(start, []byte(`data:`)) { 139 return errors.New(`invalid data-URI`) 140 } 141 142 i := bytes.Index(start, []byte(`;base64,`)) 143 if i < 0 { 144 return errors.New(`invalid data-URI`) 145 } 146 147 // force browser to play wave and aiff sounds, instead of 148 // showing a useless download-file option 149 switch mime := string(start[len(`data:`):i]); mime { 150 case `audio/wav`, `audio/wave`, `audio/x-wav`, `audio/aiff`, `audio/x-aiff`: 151 before := beforeAudio 152 if cfg.Autoplay { 153 before = beforeAutoplayAudio 154 } 155 156 // surround URI-encoded audio data with a web page only having 157 // a media player in it: this is necessary for wave and aiff 158 // sounds, since web browsers may insist on a useless download 159 // option for those media types 160 r = io.MultiReader( 161 strings.NewReader(before), 162 bytes.NewReader(start), 163 r, 164 strings.NewReader(afterAudio), 165 ) 166 167 return serveOnce(nil, r, serveConfig{ 168 ContentType: `text/html; charset=UTF-8`, 169 ContentLength: -1, 170 Autoplay: cfg.Autoplay, 171 }) 172 173 case `image/bmp`, `audio/x-bmp`: 174 // surround URI-encoded bitmap data with a web page only having 175 // an image element in it: this is necessary for bitmap pictures, 176 // since web browsers may insist on a useless download option for 177 // that media type 178 r = io.MultiReader( 179 strings.NewReader(beforeBitmap), 180 bytes.NewReader(start), 181 r, 182 strings.NewReader(afterBitmap), 183 ) 184 185 return serveOnce(nil, r, serveConfig{ 186 ContentType: `text/html; charset=UTF-8`, 187 ContentLength: -1, 188 Autoplay: cfg.Autoplay, 189 }) 190 191 default: 192 start = start[i+len(`;base64,`):] 193 r = io.MultiReader(bytes.NewReader(start), r) 194 dec := base64.NewDecoder(base64.URLEncoding, r) 195 196 // give a negative/invalid filesize hint, since stream is single-use 197 return serveOnce(nil, dec, serveConfig{ 198 ContentType: mime, 199 ContentLength: -1, 200 Autoplay: cfg.Autoplay, 201 }) 202 } 203 } File: si/mit-license.txt 1 The MIT License (MIT) 2 3 Copyright © 2024 pacman64 4 5 Permission is hereby granted, free of charge, to any person obtaining a copy of 6 this software and associated documentation files (the “Software”), to deal 7 in the Software without restriction, including without limitation the rights to 8 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 of the Software, and to permit persons to whom the Software is furnished to do 10 so, subject to the following conditions: 11 12 The above copyright notice and this permission notice shall be included in all 13 copies or substantial portions of the Software. 14 15 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 SOFTWARE. File: si/strings.go 1 package main 2 3 import ( 4 "bytes" 5 "strings" 6 ) 7 8 // makeDotless is similar to filepath.Ext, except its results never start 9 // with a dot 10 func makeDotless(s string) string { 11 i := strings.LastIndexByte(s, '.') 12 if i >= 0 { 13 return s[(i + 1):] 14 } 15 return s 16 } 17 18 // hasPrefixByte is a simpler, single-byte version of bytes.HasPrefix 19 func hasPrefixByte(b []byte, prefix byte) bool { 20 return len(b) > 0 && b[0] == prefix 21 } 22 23 // hasPrefixFold is a case-insensitive bytes.HasPrefix 24 func hasPrefixFold(s []byte, prefix []byte) bool { 25 n := len(prefix) 26 return len(s) >= n && bytes.EqualFold(s[:n], prefix) 27 } 28 29 // trimLeadingWhitespace ignores leading space-like symbols: this is useful 30 // to handle text-based data formats more flexibly 31 func trimLeadingWhitespace(b []byte) []byte { 32 for len(b) > 0 { 33 switch b[0] { 34 case ' ', '\t', '\n', '\r': 35 b = b[1:] 36 default: 37 return b 38 } 39 } 40 41 // an empty slice is all that's left, at this point 42 return nil 43 } File: si/strings_test.go 1 package main 2 3 import ( 4 "bytes" 5 "testing" 6 ) 7 8 func TestHasPrefixByte(t *testing.T) { 9 var tests = []struct { 10 Data []byte 11 Prefix byte 12 Expected bool 13 }{ 14 {nil, 'x', false}, 15 {[]byte(`x`), 'x', true}, 16 {[]byte(` x`), 'x', false}, 17 {[]byte(`xyz`), 'a', false}, 18 {[]byte(`abcxyz`), 'a', true}, 19 } 20 21 for _, tc := range tests { 22 t.Run(string(tc.Data), func(t *testing.T) { 23 got := hasPrefixByte(tc.Data, tc.Prefix) 24 if got != tc.Expected { 25 const fs = `expected %v, but got %v instead` 26 t.Fatalf(fs, tc.Expected, got) 27 } 28 }) 29 } 30 } 31 32 func TestHasPrefixFold(t *testing.T) { 33 var tests = []struct { 34 Data []byte 35 Prefix []byte 36 Expected bool 37 }{ 38 {[]byte("<!docTYPE html>\n<html>"), []byte(`<!doctype HTML`), true}, 39 } 40 41 for _, tc := range tests { 42 t.Run("", func(t *testing.T) { 43 got := hasPrefixFold(tc.Data, tc.Prefix) 44 if got != tc.Expected { 45 const fs = `expected %v, but got %v instead` 46 t.Fatalf(fs, tc.Expected, got) 47 } 48 }) 49 } 50 } 51 52 func TestTrimLeadingWhitespaces(t *testing.T) { 53 var tests = []struct { 54 Data []byte 55 Expected []byte 56 }{ 57 {[]byte(`abc`), []byte(`abc`)}, 58 {[]byte(" \t"), nil}, 59 {[]byte(" \tabc"), []byte(`abc`)}, 60 {[]byte("\r\nabc"), []byte(`abc`)}, 61 } 62 63 for _, tc := range tests { 64 t.Run("", func(t *testing.T) { 65 got := trimLeadingWhitespace(tc.Data) 66 if !bytes.Equal(got, tc.Expected) { 67 const fs = `expected %#v, but got %#v instead` 68 t.Fatalf(fs, tc.Expected, got) 69 } 70 }) 71 } 72 } File: si/type-headers.txt 1 Sources 2 ======= 3 4 Got the file-type signatures from 5 https://en.wikipedia.org/wiki/List_of_file_signatures 6 https://www.garykessler.net/library/file_sigs.html 7 8 The latter site has info to detect various variants of mp4 files, as well as 9 mov files. 10 11 12 File Signatures 13 =============== 14 15 png 16 89 50 4E 47 0D 0A 1A 0A 17 18 gif 19 47 49 46 38 39 61 20 21 jpg 22 FF D8 FF E0 23 FF D8 FF E1 24 25 wav (riff wave) 26 52 49 46 46 ?? ?? ?? ?? 57 41 56 45 27 28 avi (riff avi) 29 52 49 46 46 ?? ?? ?? ?? 41 56 49 20 30 31 webp (riff webp) 32 52 49 46 46 ?? ?? ?? ?? 57 45 42 50 33 34 mp3 35 FF FB 36 49 44 33 37 38 bmp 39 42 4D 40 41 flac 42 66 4C 61 43 43 44 aiff 45 46 4F 52 4D ?? ?? ?? ?? 41 49 46 46 46 47 webm / mkv 48 1A 45 DF A3 49 50 tiff 51 49 49 2A 00 52 4D 4D 00 2A 53 54 rtf 55 7B 5C 72 74 66 56 57 pdf 58 25 50 44 46 59 60 djvu 61 41 54 26 54 46 4F 52 4D ?? ?? ?? ?? 44 4A 56 62 63 zip 64 50 4B 03 04 65 66 mpg 67 00 00 01 BA 68 00 00 01 B3 69 70 mp4 71 ?? ?? ?? ?? 66 74 79 70 4D 53 4E 56 72 ?? ?? ?? ?? 66 74 79 70 69 73 6F 6D 73 74 m4a 75 ?? ?? ?? ?? 66 74 79 70 4D 34 41 20 76 77 m4v 78 ?? ?? ?? ?? 66 74 79 70 6D 70 34 32 79 80 mov 81 ?? ?? ?? ?? 66 74 79 70 71 74 20 20