File: jsonl.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath jsonl.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "encoding/json" 37 "errors" 38 "io" 39 "os" 40 ) 41 42 // Note: the code is avoiding using the fmt package to save hundreds of 43 // kilobytes on the resulting executable, which is a noticeable difference. 44 45 const info = ` 46 jsonl [options...] [filepath...] 47 48 JSON Lines turns valid JSON-input arrays into separate JSON lines, one for 49 each top-level item. Non-arrays result in a single JSON-line. 50 51 When not given a filepath to load, standard input is used instead. Every 52 output line is always a single top-level item from the input. 53 ` 54 55 // errNoMoreOutput is a generic dummy output-error, which is meant to be 56 // ultimately ignored, being just an excuse to quit the app immediately 57 // and successfully 58 var errNoMoreOutput = errors.New(`no more output`) 59 60 func main() { 61 if len(os.Args) > 1 { 62 switch os.Args[1] { 63 case `-h`, `--h`, `-help`, `--help`: 64 os.Stdout.WriteString(info[1:]) 65 return 66 } 67 } 68 69 if len(os.Args) > 2 { 70 os.Stderr.WriteString("multiple inputs not allowed\n") 71 os.Exit(1) 72 } 73 74 // figure out whether input should come from a named file or from stdin 75 path := `-` 76 if len(os.Args) > 1 { 77 path = os.Args[1] 78 } 79 80 err := handleInput(os.Stdout, path) 81 if err != nil && err != io.EOF && err != errNoMoreOutput { 82 os.Stderr.WriteString(err.Error()) 83 os.Stderr.WriteString("\n") 84 os.Exit(1) 85 } 86 } 87 88 // handleInput simplifies control-flow for func main 89 func handleInput(w io.Writer, path string) error { 90 if path == `-` { 91 return convert(w, os.Stdin) 92 } 93 94 // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) { 95 // resp, err := http.Get(path) 96 // if err != nil { 97 // return err 98 // } 99 // defer resp.Body.Close() 100 // return convert(w, resp.Body) 101 // } 102 103 f, err := os.Open(path) 104 if err != nil { 105 // on windows, file-not-found error messages may mention `CreateFile`, 106 // even when trying to open files in read-only mode 107 return errors.New(`can't open file named ` + path) 108 } 109 defer f.Close() 110 return convert(w, f) 111 } 112 113 // convert simplifies control-flow for func handleInput 114 func convert(w io.Writer, r io.Reader) error { 115 bw := bufio.NewWriter(w) 116 defer bw.Flush() 117 return jsonl(bw, r) 118 } 119 120 // escapedStringBytes helps func handleString treat all string bytes quickly 121 // and correctly, using their officially-supported JSON escape sequences 122 // 123 // https://www.rfc-editor.org/rfc/rfc8259#section-7 124 var escapedStringBytes = [256][]byte{ 125 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 126 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 127 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 128 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 129 {'\\', 'b'}, {'\\', 't'}, 130 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 131 {'\\', 'f'}, {'\\', 'r'}, 132 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 133 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 134 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 135 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 136 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 137 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 138 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 139 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 140 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 141 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 142 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 143 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 144 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 145 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 146 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 147 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 148 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 149 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 150 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 151 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 152 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 153 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 154 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 155 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 156 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 157 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 158 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 159 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 160 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 161 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 162 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 163 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 164 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 165 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 166 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 167 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 168 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 169 } 170 171 // jsonl does it all, given a reader and a writer 172 func jsonl(w *bufio.Writer, r io.Reader) error { 173 dec := json.NewDecoder(r) 174 // avoid parsing numbers, so unusually-long numbers are kept verbatim, 175 // even if JSON parsers aren't required to guarantee such input-fidelity 176 // for numbers 177 dec.UseNumber() 178 179 t, err := dec.Token() 180 if err == io.EOF { 181 // return errors.New(`input has no JSON values`) 182 return nil 183 } 184 185 if t == json.Delim('[') { 186 if err := handleTopLevelArray(w, dec); err != nil { 187 return err 188 } 189 } else { 190 if err := handleToken(w, dec, t); err != nil { 191 return err 192 } 193 w.WriteByte('\n') 194 } 195 196 _, err = dec.Token() 197 if err == io.EOF { 198 // input is over, so it's a success 199 return nil 200 } 201 202 if err == nil { 203 // a successful `read` is a failure, as it means there are 204 // trailing JSON tokens 205 return errors.New(`unexpected trailing data`) 206 } 207 208 // any other error, perhaps some invalid-JSON-syntax-type error 209 return err 210 } 211 212 // handleToken handles recursion for func json2 213 func handleToken(w *bufio.Writer, dec *json.Decoder, t json.Token) error { 214 switch t := t.(type) { 215 case json.Delim: 216 switch t { 217 case json.Delim('['): 218 return handleArray(w, dec) 219 case json.Delim('{'): 220 return handleObject(w, dec) 221 default: 222 return errors.New(`unsupported JSON syntax ` + string(t)) 223 } 224 225 case nil: 226 w.WriteString(`null`) 227 return nil 228 229 case bool: 230 if t { 231 w.WriteString(`true`) 232 } else { 233 w.WriteString(`false`) 234 } 235 return nil 236 237 case json.Number: 238 w.WriteString(t.String()) 239 return nil 240 241 case string: 242 return handleString(w, t) 243 244 default: 245 // return fmt.Errorf(`unsupported token type %T`, t) 246 return errors.New(`invalid JSON token`) 247 } 248 } 249 250 func handleTopLevelArray(w *bufio.Writer, dec *json.Decoder) error { 251 for i := 0; true; i++ { 252 t, err := dec.Token() 253 if err == io.EOF { 254 return nil 255 } 256 257 if err != nil { 258 return err 259 } 260 261 if t == json.Delim(']') { 262 return nil 263 } 264 265 err = handleToken(w, dec, t) 266 if err != nil { 267 return err 268 } 269 270 w.WriteByte('\n') 271 if err := w.Flush(); err != nil { 272 // a write error may be the consequence of stdout being closed, 273 // perhaps by another app along a pipe 274 return errNoMoreOutput 275 } 276 } 277 278 // make the compiler happy 279 return nil 280 } 281 282 // handleArray handles arrays for func handleToken 283 func handleArray(w *bufio.Writer, dec *json.Decoder) error { 284 w.WriteByte('[') 285 286 for i := 0; true; i++ { 287 t, err := dec.Token() 288 if err == io.EOF { 289 w.WriteByte(']') 290 return nil 291 } 292 293 if err != nil { 294 return err 295 } 296 297 if t == json.Delim(']') { 298 w.WriteByte(']') 299 return nil 300 } 301 302 if i > 0 { 303 _, err := w.WriteString(", ") 304 if err != nil { 305 return errNoMoreOutput 306 } 307 } 308 309 err = handleToken(w, dec, t) 310 if err != nil { 311 return err 312 } 313 } 314 315 // make the compiler happy 316 return nil 317 } 318 319 // handleObject handles objects for func handleToken 320 func handleObject(w *bufio.Writer, dec *json.Decoder) error { 321 w.WriteByte('{') 322 323 for i := 0; true; i++ { 324 t, err := dec.Token() 325 if err == io.EOF { 326 w.WriteByte('}') 327 return nil 328 } 329 330 if err != nil { 331 return err 332 } 333 334 if t == json.Delim('}') { 335 w.WriteByte('}') 336 return nil 337 } 338 339 if i > 0 { 340 _, err := w.WriteString(", ") 341 if err != nil { 342 return errNoMoreOutput 343 } 344 } 345 346 k, ok := t.(string) 347 if !ok { 348 return errors.New(`expected a string for a key-value pair`) 349 } 350 351 err = handleString(w, k) 352 if err != nil { 353 return err 354 } 355 356 w.WriteString(": ") 357 358 t, err = dec.Token() 359 if err == io.EOF { 360 return errors.New(`expected a value for a key-value pair`) 361 } 362 363 err = handleToken(w, dec, t) 364 if err != nil { 365 return err 366 } 367 } 368 369 // make the compiler happy 370 return nil 371 } 372 373 // handleString handles strings for func handleToken, and keys for func 374 // handleObject 375 func handleString(w *bufio.Writer, s string) error { 376 w.WriteByte('"') 377 for i := range s { 378 w.Write(escapedStringBytes[s[i]]) 379 } 380 w.WriteByte('"') 381 return nil 382 }