/* The MIT License (MIT) Copyright © 2020-2025 pacman64 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Single-file source-code for datauri, with the unit-tests omitted. To compile a smaller-sized command-line app, you can use the `go` command as follows: go build -ldflags "-s -w" -trimpath datauri.go */ package main import ( "bufio" "bytes" "encoding/base64" "errors" "io" "os" "strings" ) const info = ` datauri [options...] [filenames...] Encode bytes as data-URIs, auto-detecting the file/data type using the first few bytes from each data/file stream. When given multiple inputs, the output will be multiple lines, one for each file given. Empty files/inputs result in empty lines. A simple dash (-) stands for the standard-input, which is also used automatically when not given any files. Data-URIs are base64-encoded text representations of arbitrary data, which include their payload's MIME-type, and which are directly useable/shareable in web-browsers as links, despite not looking like normal links/URIs. Some web-browsers limit the size of handled data-URIs to tens of kilobytes. Options -h, -help, --h, --help show this help message ` const errorStyle = "\x1b[31m" // errNoMoreOutput is a dummy error whose message is ignored, and which // causes the app to quit immediately and successfully var errNoMoreOutput = errors.New(`no more output`) func main() { if len(os.Args) > 1 { switch os.Args[1] { case `-h`, `--h`, `-help`, `--help`: os.Stderr.WriteString(info[1:]) return } } if err := run(os.Stdout, os.Args[1:]); isActualError(err) { os.Stderr.WriteString(errorStyle) os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\x1b[0m\n") os.Exit(1) } } func run(w io.Writer, args []string) error { bw := bufio.NewWriter(w) defer bw.Flush() if len(args) == 0 { return dataURI(bw, os.Stdin, ``) } for _, name := range args { if err := handleFile(bw, name); err != nil { return err } } return nil } func handleFile(w *bufio.Writer, name string) error { if name == `` || name == `-` { return dataURI(w, os.Stdin, ``) } f, err := os.Open(name) if err != nil { return errors.New(`can't read from file named "` + name + `"`) } defer f.Close() return dataURI(w, f, name) } // isActualError is to figure out whether not to ignore an error, and thus // show it as an error message func isActualError(err error) bool { return err != nil && err != io.EOF && err != errNoMoreOutput } func dataURI(w *bufio.Writer, r io.Reader, name string) error { var buf [64]byte n, err := r.Read(buf[:]) if err != nil && err != io.EOF { return err } start := buf[:n] // handle regular data, trying to auto-detect its MIME type using // its first few bytes mime, ok := detectMIME(start) if !ok { return errors.New(name + `: unknown file type`) } w.WriteString(`data:`) w.WriteString(mime) w.WriteString(`;base64,`) r = io.MultiReader(bytes.NewReader(start), r) enc := base64.NewEncoder(base64.StdEncoding, w) if _, err := io.Copy(enc, r); err != nil { return err } enc.Close() w.WriteByte('\n') if err := w.Flush(); err != nil { return errNoMoreOutput } return nil } // makeDotless is similar to filepath.Ext, except its results never start // with a dot func makeDotless(s string) string { i := strings.LastIndexByte(s, '.') if i >= 0 { return s[(i + 1):] } return s } // hasPrefixByte is a simpler, single-byte version of bytes.HasPrefix func hasPrefixByte(b []byte, prefix byte) bool { return len(b) > 0 && b[0] == prefix } // hasPrefixFold is a case-insensitive bytes.HasPrefix func hasPrefixFold(s []byte, prefix []byte) bool { n := len(prefix) return len(s) >= n && bytes.EqualFold(s[:n], prefix) } // trimLeadingWhitespace ignores leading space-like symbols: this is useful // to handle text-based data formats more flexibly func trimLeadingWhitespace(b []byte) []byte { for len(b) > 0 { switch b[0] { case ' ', '\t', '\n', '\r': b = b[1:] default: return b } } // an empty slice is all that's left, at this point return nil } // nameToMIME tries to match a MIME type to a filename, dotted file extension, // or a dot-less filetype/extension given func nameToMIME(fname string) (mimeType string, ok bool) { // handle dotless file types and filenames alike kind, ok := type2mime[makeDotless(fname)] return kind, ok } // detectMIME guesses the first appropriate MIME type from the first few // data bytes given: 24 bytes are enough to detect all supported types func detectMIME(b []byte) (mimeType string, ok bool) { t, ok := detectType(b) if ok { return t, true } return ``, false } // detectType guesses the first appropriate file type for the data given: // here the type is a a filename extension without the leading dot func detectType(b []byte) (dotlessExt string, ok bool) { // empty data, so there's no way to detect anything if len(b) == 0 { return ``, false } // check for plain-text web-document formats case-insensitively kind, ok := checkDoc(b) if ok { return kind, true } // check data formats which allow any byte at the start kind, ok = checkSpecial(b) if ok { return kind, true } // check all other supported data formats headers := hdrDispatch[b[0]] for _, t := range headers { if hasPrefixPattern(b[1:], t.Header[1:], cba) { return t.Type, true } } // unrecognized data format return ``, false } // checkDoc tries to guess if the bytes given are the start of HTML, SVG, // XML, or JSON data func checkDoc(b []byte) (kind string, ok bool) { // ignore leading whitespaces b = trimLeadingWhitespace(b) // can't detect anything with empty data if len(b) == 0 { return ``, false } // handle XHTML documents which don't start with a doctype declaration if bytes.Contains(b, doctypeHTML) { return html, true } // handle HTML/SVG/XML documents if hasPrefixByte(b, '<') { if hasPrefixFold(b, []byte{'<', '?', 'x', 'm', 'l'}) { if bytes.Contains(b, []byte{'<', 's', 'v', 'g'}) { return svg, true } return xml, true } headers := hdrDispatch['<'] for _, v := range headers { if hasPrefixFold(b, v.Header) { return v.Type, true } } return ``, false } // handle JSON with top-level arrays if hasPrefixByte(b, '[') { // match [", or [[, or [{, ignoring spaces between b = trimLeadingWhitespace(b[1:]) if len(b) > 0 { switch b[0] { case '"', '[', '{': return json, true } } return ``, false } // handle JSON with top-level objects if hasPrefixByte(b, '{') { // match {", ignoring spaces between: after {, the only valid syntax // which can follow is the opening quote for the expected object-key b = trimLeadingWhitespace(b[1:]) if hasPrefixByte(b, '"') { return json, true } return ``, false } // checking for a quoted string, any of the JSON keywords, or even a // number seems too ambiguous to declare the data valid JSON // no web-document format detected return ``, false } // checkSpecial handles special file-format headers, which should be checked // before the normal file-type headers, since the first-byte dispatch algo // doesn't work for these func checkSpecial(b []byte) (kind string, ok bool) { if len(b) >= 8 && bytes.Index(b, []byte{'f', 't', 'y', 'p'}) == 4 { for _, t := range specialHeaders { if hasPrefixPattern(b[4:], t.Header[4:], cba) { return t.Type, true } } } return ``, false } // hasPrefixPattern works like bytes.HasPrefix, except it allows for a special // value to signal any byte is allowed on specific spots func hasPrefixPattern(what []byte, pat []byte, wildcard byte) bool { // if the data are shorter than the pattern to match, there's no match if len(what) < len(pat) { return false } // use a slice which ensures the pattern length is never exceeded what = what[:len(pat)] for i, x := range what { y := pat[i] if x != y && y != wildcard { return false } } return true } // all the MIME types used/recognized in this package const ( aiff = `audio/aiff` au = `audio/basic` avi = `video/avi` avif = `image/avif` bmp = `image/x-bmp` caf = `audio/x-caf` cur = `image/vnd.microsoft.icon` css = `text/css` csv = `text/csv` djvu = `image/x-djvu` elf = `application/x-elf` exe = `application/vnd.microsoft.portable-executable` flac = `audio/x-flac` gif = `image/gif` gz = `application/gzip` heic = `image/heic` htm = `text/html` html = `text/html` ico = `image/x-icon` iso = `application/octet-stream` jpg = `image/jpeg` jpeg = `image/jpeg` js = `application/javascript` json = `application/json` m4a = `audio/aac` m4v = `video/x-m4v` mid = `audio/midi` mov = `video/quicktime` mp4 = `video/mp4` mp3 = `audio/mpeg` mpg = `video/mpeg` ogg = `audio/ogg` opus = `audio/opus` pdf = `application/pdf` png = `image/png` ps = `application/postscript` psd = `image/vnd.adobe.photoshop` rtf = `application/rtf` sqlite3 = `application/x-sqlite3` svg = `image/svg+xml` text = `text/plain` tiff = `image/tiff` tsv = `text/tsv` wasm = `application/wasm` wav = `audio/x-wav` webp = `image/webp` webm = `video/webm` xml = `application/xml` zip = `application/zip` zst = `application/zstd` ) // type2mime turns dotless format-names into MIME types var type2mime = map[string]string{ `aiff`: aiff, `wav`: wav, `avi`: avi, `jpg`: jpg, `jpeg`: jpeg, `m4a`: m4a, `mp4`: mp4, `m4v`: m4v, `mov`: mov, `png`: png, `avif`: avif, `webp`: webp, `gif`: gif, `tiff`: tiff, `psd`: psd, `flac`: flac, `webm`: webm, `mpg`: mpg, `zip`: zip, `gz`: gz, `zst`: zst, `mp3`: mp3, `opus`: opus, `bmp`: bmp, `mid`: mid, `ogg`: ogg, `html`: html, `htm`: htm, `svg`: svg, `xml`: xml, `rtf`: rtf, `pdf`: pdf, `ps`: ps, `au`: au, `ico`: ico, `cur`: cur, `caf`: caf, `heic`: heic, `sqlite3`: sqlite3, `elf`: elf, `exe`: exe, `wasm`: wasm, `iso`: iso, `txt`: text, `css`: css, `csv`: csv, `tsv`: tsv, `js`: js, `json`: json, `geojson`: json, } // formatDescriptor ties a file-header pattern to its data-format type type formatDescriptor struct { Header []byte Type string } // can be anything: ensure this value differs from all other literal bytes // in the generic-headers table: failing that, its value could cause subtle // type-misdetection bugs const cba = 0xFD // 253, which is > 127, the highest-valued ascii symbol // dash-streamed m4a format var m4aDash = []byte{ cba, cba, cba, cba, 'f', 't', 'y', 'p', 'd', 'a', 's', 'h', 000, 000, 000, 000, 'i', 's', 'o', '6', 'm', 'p', '4', '1', } // format markers with leading wildcards, which should be checked before the // normal ones: this is to prevent mismatches with the latter types, even // though you can make probabilistic arguments which suggest these mismatches // should be very unlikely in practice var specialHeaders = []formatDescriptor{ {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', ' '}, m4a}, {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', '4', 'A', 000}, m4a}, {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'M', 'S', 'N', 'V'}, mp4}, {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'i', 's', 'o', 'm'}, mp4}, {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'm', 'p', '4', '2'}, m4v}, {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'q', 't', ' ', ' '}, mov}, {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'h', 'e', 'i', 'c'}, heic}, {[]byte{cba, cba, cba, cba, 'f', 't', 'y', 'p', 'a', 'v', 'i', 'f'}, avif}, {m4aDash, m4a}, } // sqlite3 database format var sqlite3db = []byte{ 'S', 'Q', 'L', 'i', 't', 'e', ' ', 'f', 'o', 'r', 'm', 'a', 't', ' ', '3', 000, } // windows-variant bitmap file-header, which is followed by a byte-counter for // the 40-byte infoheader which follows that var winbmp = []byte{ 'B', 'M', cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, cba, 40, } // deja-vu document format var djv = []byte{ 'A', 'T', '&', 'T', 'F', 'O', 'R', 'M', cba, cba, cba, cba, 'D', 'J', 'V', } var doctypeHTML = []byte{ '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E', ' ', 'h', 't', 'm', 'l', } // hdrDispatch groups format-description-groups by their first byte, thus // shortening total lookups for some data header: notice how the `ftyp` data // formats aren't handled here, since these can start with any byte, instead // of the literal value of the any-byte markers they use var hdrDispatch = [256][]formatDescriptor{ { {[]byte{000, 000, 001, 0xBA}, mpg}, {[]byte{000, 000, 001, 0xB3}, mpg}, {[]byte{000, 000, 001, 000}, ico}, {[]byte{000, 000, 002, 000}, cur}, {[]byte{000, 'a', 's', 'm'}, wasm}, }, // 0 nil, // 1 nil, // 2 nil, // 3 nil, // 4 nil, // 5 nil, // 6 nil, // 7 nil, // 8 nil, // 9 nil, // 10 nil, // 11 nil, // 12 nil, // 13 nil, // 14 nil, // 15 nil, // 16 nil, // 17 nil, // 18 nil, // 19 nil, // 20 nil, // 21 nil, // 22 nil, // 23 nil, // 24 nil, // 25 { {[]byte{0x1A, 0x45, 0xDF, 0xA3}, webm}, }, // 26 nil, // 27 nil, // 28 nil, // 29 nil, // 30 { // {[]byte{0x1F, 0x8B, 0x08, 0x08}, gz}, {[]byte{0x1F, 0x8B, 0x08}, gz}, }, // 31 nil, // 32 nil, // 33 ! nil, // 34 " { {[]byte{'#', '!', ' '}, text}, {[]byte{'#', '!', '/'}, text}, }, // 35 # nil, // 36 $ { {[]byte{'%', 'P', 'D', 'F'}, pdf}, {[]byte{'%', '!', 'P', 'S'}, ps}, }, // 37 % nil, // 38 & nil, // 39 ' { {[]byte{0x28, 0xB5, 0x2F, 0xFD}, zst}, }, // 40 ( nil, // 41 ) nil, // 42 * nil, // 43 + nil, // 44 , nil, // 45 - { {[]byte{'.', 's', 'n', 'd'}, au}, }, // 46 . nil, // 47 / nil, // 48 0 nil, // 49 1 nil, // 50 2 nil, // 51 3 nil, // 52 4 nil, // 53 5 nil, // 54 6 nil, // 55 7 { {[]byte{'8', 'B', 'P', 'S'}, psd}, }, // 56 8 nil, // 57 9 nil, // 58 : nil, // 59 ; { // func checkDoc is better for these, since it's case-insensitive {doctypeHTML, html}, {[]byte{'<', 's', 'v', 'g'}, svg}, {[]byte{'<', 'h', 't', 'm', 'l', '>'}, html}, {[]byte{'<', 'h', 'e', 'a', 'd', '>'}, html}, {[]byte{'<', 'b', 'o', 'd', 'y', '>'}, html}, {[]byte{'<', '?', 'x', 'm', 'l'}, xml}, }, // 60 < nil, // 61 = nil, // 62 > nil, // 63 ? nil, // 64 @ { {djv, djvu}, }, // 65 A { {winbmp, bmp}, }, // 66 B nil, // 67 C nil, // 68 D nil, // 69 E { {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'F'}, aiff}, {[]byte{'F', 'O', 'R', 'M', cba, cba, cba, cba, 'A', 'I', 'F', 'C'}, aiff}, }, // 70 F { {[]byte{'G', 'I', 'F', '8', '7', 'a'}, gif}, {[]byte{'G', 'I', 'F', '8', '9', 'a'}, gif}, }, // 71 G nil, // 72 H { {[]byte{'I', 'D', '3', 2}, mp3}, // ID3-format metadata {[]byte{'I', 'D', '3', 3}, mp3}, // ID3-format metadata {[]byte{'I', 'D', '3', 4}, mp3}, // ID3-format metadata {[]byte{'I', 'I', '*', 000}, tiff}, }, // 73 I nil, // 74 J nil, // 75 K nil, // 76 L { {[]byte{'M', 'M', 000, '*'}, tiff}, {[]byte{'M', 'T', 'h', 'd'}, mid}, {[]byte{'M', 'Z', cba, 000, cba, 000}, exe}, // {[]byte{'M', 'Z', 0x90, 000, 003, 000}, exe}, // {[]byte{'M', 'Z', 0x78, 000, 001, 000}, exe}, // {[]byte{'M', 'Z', 'P', 000, 002, 000}, exe}, }, // 77 M nil, // 78 N { {[]byte{'O', 'g', 'g', 'S'}, ogg}, }, // 79 O { {[]byte{'P', 'K', 003, 004}, zip}, }, // 80 P nil, // 81 Q { {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'E', 'B', 'P'}, webp}, {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'W', 'A', 'V', 'E'}, wav}, {[]byte{'R', 'I', 'F', 'F', cba, cba, cba, cba, 'A', 'V', 'I', ' '}, avi}, }, // 82 R { {sqlite3db, sqlite3}, }, // 83 S nil, // 84 T nil, // 85 U nil, // 86 V nil, // 87 W nil, // 88 X nil, // 89 Y nil, // 90 Z nil, // 91 [ nil, // 92 \ nil, // 93 ] nil, // 94 ^ nil, // 95 _ nil, // 96 ` nil, // 97 a nil, // 98 b { {[]byte{'c', 'a', 'f', 'f', 000, 001, 000, 000}, caf}, }, // 99 c nil, // 100 d nil, // 101 e { {[]byte{'f', 'L', 'a', 'C'}, flac}, }, // 102 f nil, // 103 g nil, // 104 h nil, // 105 i nil, // 106 j nil, // 107 k nil, // 108 l nil, // 109 m nil, // 110 n nil, // 111 o nil, // 112 p nil, // 113 q nil, // 114 r nil, // 115 s nil, // 116 t nil, // 117 u nil, // 118 v nil, // 119 w nil, // 120 x nil, // 121 y nil, // 122 z { {[]byte{'{', '\\', 'r', 't', 'f'}, rtf}, }, // 123 { nil, // 124 | nil, // 125 } nil, // 126 { {[]byte{127, 'E', 'L', 'F'}, elf}, }, // 127 nil, // 128 nil, // 129 nil, // 130 nil, // 131 nil, // 132 nil, // 133 nil, // 134 nil, // 135 nil, // 136 { {[]byte{0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A}, png}, }, // 137 nil, // 138 nil, // 139 nil, // 140 nil, // 141 nil, // 142 nil, // 143 nil, // 144 nil, // 145 nil, // 146 nil, // 147 nil, // 148 nil, // 149 nil, // 150 nil, // 151 nil, // 152 nil, // 153 nil, // 154 nil, // 155 nil, // 156 nil, // 157 nil, // 158 nil, // 159 nil, // 160 nil, // 161 nil, // 162 nil, // 163 nil, // 164 nil, // 165 nil, // 166 nil, // 167 nil, // 168 nil, // 169 nil, // 170 nil, // 171 nil, // 172 nil, // 173 nil, // 174 nil, // 175 nil, // 176 nil, // 177 nil, // 178 nil, // 179 nil, // 180 nil, // 181 nil, // 182 nil, // 183 nil, // 184 nil, // 185 nil, // 186 nil, // 187 nil, // 188 nil, // 189 nil, // 190 nil, // 191 nil, // 192 nil, // 193 nil, // 194 nil, // 195 nil, // 196 nil, // 197 nil, // 198 nil, // 199 nil, // 200 nil, // 201 nil, // 202 nil, // 203 nil, // 204 nil, // 205 nil, // 206 nil, // 207 nil, // 208 nil, // 209 nil, // 210 nil, // 211 nil, // 212 nil, // 213 nil, // 214 nil, // 215 nil, // 216 nil, // 217 nil, // 218 nil, // 219 nil, // 220 nil, // 221 nil, // 222 nil, // 223 nil, // 224 nil, // 225 nil, // 226 nil, // 227 nil, // 228 nil, // 229 nil, // 230 nil, // 231 nil, // 232 nil, // 233 nil, // 234 nil, // 235 nil, // 236 nil, // 237 nil, // 238 nil, // 239 nil, // 240 nil, // 241 nil, // 242 nil, // 243 nil, // 244 nil, // 245 nil, // 246 nil, // 247 nil, // 248 nil, // 249 nil, // 250 nil, // 251 nil, // 252 nil, // 253 nil, // 254 { {[]byte{0xFF, 0xD8, 0xFF}, jpg}, {[]byte{0xFF, 0xF3, 0x48, 0xC4, 0x00}, mp3}, {[]byte{0xFF, 0xFB}, mp3}, }, // 255 }