File: jsonl.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2024 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Single-file source-code for jsonl: this version has no http(s) support. Even 27 the unit-tests from the original jsonl are omitted. 28 29 To compile a smaller-sized command-line app, you can use the `go` command as 30 follows: 31 32 go build -ldflags "-s -w" -trimpath jsonl.go 33 */ 34 35 package main 36 37 import ( 38 "bufio" 39 "encoding/json" 40 "errors" 41 "io" 42 "os" 43 ) 44 45 // Note: the code is avoiding using the fmt package to save hundreds of 46 // kilobytes on the resulting executable, which is a noticeable difference. 47 48 const info = ` 49 jsonl [options...] [filepath...] 50 51 JSON Lines turns valid JSON-input arrays into separate JSON lines, one for 52 each top-level item. Non-arrays result in a single JSON-line. 53 54 When not given a filepath to load, standard input is used instead. Every 55 output line is always a single top-level item from the input. 56 ` 57 58 // errNoMoreOutput is a generic dummy output-error, which is meant to be 59 // ultimately ignored, being just an excuse to quit the app immediately 60 // and successfully 61 var errNoMoreOutput = errors.New(`no more output`) 62 63 func main() { 64 if len(os.Args) > 1 { 65 switch os.Args[1] { 66 case `-h`, `--h`, `-help`, `--help`: 67 os.Stderr.WriteString(info[1:]) 68 return 69 } 70 } 71 72 if len(os.Args) > 2 { 73 const msg = "\x1b[31mmultiple inputs not allowed\x1b[0m\n" 74 os.Stderr.WriteString(msg) 75 os.Exit(1) 76 } 77 78 // figure out whether input should come from a named file or from stdin 79 path := `-` 80 if len(os.Args) > 1 { 81 path = os.Args[1] 82 } 83 84 err := handleInput(os.Stdout, path) 85 if err != nil && err != io.EOF && err != errNoMoreOutput { 86 os.Stderr.WriteString("\x1b[31m") 87 os.Stderr.WriteString(err.Error()) 88 os.Stderr.WriteString("\x1b[0m\n") 89 os.Exit(1) 90 } 91 } 92 93 // handleInput simplifies control-flow for func main 94 func handleInput(w io.Writer, path string) error { 95 if path == `-` { 96 return convert(w, os.Stdin) 97 } 98 99 // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) { 100 // resp, err := http.Get(path) 101 // if err != nil { 102 // return err 103 // } 104 // defer resp.Body.Close() 105 // return convert(w, resp.Body) 106 // } 107 108 f, err := os.Open(path) 109 if err != nil { 110 // on windows, file-not-found error messages may mention `CreateFile`, 111 // even when trying to open files in read-only mode 112 return errors.New(`can't open file named ` + path) 113 } 114 defer f.Close() 115 return convert(w, f) 116 } 117 118 // convert simplifies control-flow for func handleInput 119 func convert(w io.Writer, r io.Reader) error { 120 bw := bufio.NewWriter(w) 121 defer bw.Flush() 122 return jsonl(bw, r) 123 } 124 125 // escapedStringBytes helps func handleString treat all string bytes quickly 126 // and correctly, using their officially-supported JSON escape sequences 127 // 128 // https://www.rfc-editor.org/rfc/rfc8259#section-7 129 var escapedStringBytes = [256][]byte{ 130 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 131 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 132 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 133 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 134 {'\\', 'b'}, {'\\', 't'}, 135 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 136 {'\\', 'f'}, {'\\', 'r'}, 137 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 138 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 139 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 140 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 141 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 142 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 143 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 144 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 145 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 146 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 147 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 148 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 149 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 150 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 151 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 152 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 153 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 154 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 155 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 156 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 157 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 158 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 159 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 160 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 161 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 162 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 163 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 164 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 165 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 166 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 167 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 168 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 169 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 170 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 171 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 172 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 173 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 174 } 175 176 // jsonl does it all, given a reader and a writer 177 func jsonl(w *bufio.Writer, r io.Reader) error { 178 dec := json.NewDecoder(r) 179 // avoid parsing numbers, so unusually-long numbers are kept verbatim, 180 // even if JSON parsers aren't required to guarantee such input-fidelity 181 // for numbers 182 dec.UseNumber() 183 184 t, err := dec.Token() 185 if err == io.EOF { 186 // return errors.New(`input has no JSON values`) 187 return nil 188 } 189 190 if t == json.Delim('[') { 191 if err := handleTopLevelArray(w, dec); err != nil { 192 return err 193 } 194 } else { 195 if err := handleToken(w, dec, t); err != nil { 196 return err 197 } 198 w.WriteByte('\n') 199 } 200 201 _, err = dec.Token() 202 if err == io.EOF { 203 // input is over, so it's a success 204 return nil 205 } 206 207 if err == nil { 208 // a successful `read` is a failure, as it means there are 209 // trailing JSON tokens 210 return errors.New(`unexpected trailing data`) 211 } 212 213 // any other error, perhaps some invalid-JSON-syntax-type error 214 return err 215 } 216 217 // handleToken handles recursion for func json2 218 func handleToken(w *bufio.Writer, dec *json.Decoder, t json.Token) error { 219 switch t := t.(type) { 220 case json.Delim: 221 switch t { 222 case json.Delim('['): 223 return handleArray(w, dec) 224 case json.Delim('{'): 225 return handleObject(w, dec) 226 default: 227 return errors.New(`unsupported JSON syntax ` + string(t)) 228 } 229 230 case nil: 231 w.WriteString(`null`) 232 return nil 233 234 case bool: 235 if t { 236 w.WriteString(`true`) 237 } else { 238 w.WriteString(`false`) 239 } 240 return nil 241 242 case json.Number: 243 w.WriteString(t.String()) 244 return nil 245 246 case string: 247 return handleString(w, t) 248 249 default: 250 // return fmt.Errorf(`unsupported token type %T`, t) 251 return errors.New(`invalid JSON token`) 252 } 253 } 254 255 func handleTopLevelArray(w *bufio.Writer, dec *json.Decoder) error { 256 for i := 0; true; i++ { 257 t, err := dec.Token() 258 if err == io.EOF { 259 return nil 260 } 261 262 if err != nil { 263 return err 264 } 265 266 if t == json.Delim(']') { 267 return nil 268 } 269 270 err = handleToken(w, dec, t) 271 if err != nil { 272 return err 273 } 274 275 if err := w.WriteByte('\n'); err != nil { 276 return errNoMoreOutput 277 } 278 } 279 280 // make the compiler happy 281 return nil 282 } 283 284 // handleArray handles arrays for func handleToken 285 func handleArray(w *bufio.Writer, dec *json.Decoder) error { 286 w.WriteByte('[') 287 288 for i := 0; true; i++ { 289 t, err := dec.Token() 290 if err == io.EOF { 291 w.WriteByte(']') 292 return nil 293 } 294 295 if err != nil { 296 return err 297 } 298 299 if t == json.Delim(']') { 300 w.WriteByte(']') 301 return nil 302 } 303 304 if i > 0 { 305 _, err := w.WriteString(", ") 306 if err != nil { 307 return errNoMoreOutput 308 } 309 } 310 311 err = handleToken(w, dec, t) 312 if err != nil { 313 return err 314 } 315 } 316 317 // make the compiler happy 318 return nil 319 } 320 321 // handleObject handles objects for func handleToken 322 func handleObject(w *bufio.Writer, dec *json.Decoder) error { 323 w.WriteByte('{') 324 325 for i := 0; true; i++ { 326 t, err := dec.Token() 327 if err == io.EOF { 328 w.WriteByte('}') 329 return nil 330 } 331 332 if err != nil { 333 return err 334 } 335 336 if t == json.Delim('}') { 337 w.WriteByte('}') 338 return nil 339 } 340 341 if i > 0 { 342 _, err := w.WriteString(", ") 343 if err != nil { 344 return errNoMoreOutput 345 } 346 } 347 348 k, ok := t.(string) 349 if !ok { 350 return errors.New(`expected a string for a key-value pair`) 351 } 352 353 err = handleString(w, k) 354 if err != nil { 355 return err 356 } 357 358 w.WriteString(": ") 359 360 t, err = dec.Token() 361 if err == io.EOF { 362 return errors.New(`expected a value for a key-value pair`) 363 } 364 365 err = handleToken(w, dec, t) 366 if err != nil { 367 return err 368 } 369 } 370 371 // make the compiler happy 372 return nil 373 } 374 375 // handleString handles strings for func handleToken, and keys for func 376 // handleObject 377 func handleString(w *bufio.Writer, s string) error { 378 w.WriteByte('"') 379 for i := range s { 380 w.Write(escapedStringBytes[s[i]]) 381 } 382 w.WriteByte('"') 383 return nil 384 }