File: jsonl.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath jsonl.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "encoding/json" 37 "errors" 38 "io" 39 "os" 40 ) 41 42 const info = ` 43 jsonl [options...] [filepaths...] 44 45 JSON Lines turns valid JSON-input arrays into separate JSON lines, one for 46 each top-level item. Non-arrays result in a single JSON-line. 47 48 When not given a filepath to load, standard input is used instead. Every 49 output line is always a single top-level item from the input. 50 ` 51 52 func main() { 53 args := os.Args[1:] 54 buffered := false 55 56 for len(args) > 0 { 57 switch args[0] { 58 case `-b`, `--b`, `-buffered`, `--buffered`: 59 buffered = true 60 args = args[1:] 61 continue 62 63 case `-h`, `--h`, `-help`, `--help`: 64 os.Stdout.WriteString(info[1:]) 65 return 66 } 67 68 break 69 } 70 71 if len(args) > 0 && args[0] == `--` { 72 args = args[1:] 73 } 74 75 liveLines := !buffered 76 if !buffered { 77 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 78 liveLines = false 79 } 80 } 81 82 if err := run(os.Stdout, args, liveLines); err != nil && err != io.EOF { 83 os.Stderr.WriteString(err.Error()) 84 os.Stderr.WriteString("\n") 85 os.Exit(1) 86 return 87 } 88 } 89 90 func run(w io.Writer, args []string, liveLines bool) error { 91 dashes := 0 92 for _, path := range args { 93 if path == `-` { 94 dashes++ 95 } 96 if dashes > 1 { 97 return errors.New(`can't use stdin (dash) more than once`) 98 } 99 } 100 101 bw := bufio.NewWriter(w) 102 defer bw.Flush() 103 104 if len(args) == 0 { 105 return handleInput(bw, `-`, liveLines) 106 } 107 108 for _, path := range args { 109 if err := handleInput(bw, path, liveLines); err != nil { 110 return err 111 } 112 } 113 return nil 114 } 115 116 // handleInput simplifies control-flow for func main 117 func handleInput(w *bufio.Writer, path string, liveLines bool) error { 118 if path == `-` { 119 return jsonl(w, os.Stdin, liveLines) 120 } 121 122 f, err := os.Open(path) 123 if err != nil { 124 // on windows, file-not-found error messages may mention `CreateFile`, 125 // even when trying to open files in read-only mode 126 return errors.New(`can't open file named ` + path) 127 } 128 defer f.Close() 129 return jsonl(w, f, liveLines) 130 } 131 132 // escapedStringBytes helps func handleString treat all string bytes quickly 133 // and correctly, using their officially-supported JSON escape sequences 134 // 135 // https://www.rfc-editor.org/rfc/rfc8259#section-7 136 var escapedStringBytes = [256][]byte{ 137 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 138 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 139 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 140 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 141 {'\\', 'b'}, {'\\', 't'}, 142 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 143 {'\\', 'f'}, {'\\', 'r'}, 144 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 145 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 146 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 147 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 148 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 149 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 150 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 151 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 152 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 153 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 154 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 155 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 156 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 157 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 158 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 159 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 160 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 161 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 162 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 163 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 164 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 165 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 166 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 167 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 168 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 169 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 170 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 171 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 172 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 173 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 174 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 175 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 176 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 177 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 178 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 179 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 180 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 181 } 182 183 // jsonl does it all, given a reader and a writer 184 func jsonl(w *bufio.Writer, r io.Reader, live bool) error { 185 dec := json.NewDecoder(r) 186 // avoid parsing numbers, so unusually-long numbers are kept verbatim, 187 // even if JSON parsers aren't required to guarantee such input-fidelity 188 // for numbers 189 dec.UseNumber() 190 191 t, err := dec.Token() 192 if err == io.EOF { 193 // return errors.New(`input has no JSON values`) 194 return nil 195 } 196 197 if t == json.Delim('[') { 198 if err := handleTopLevelArray(w, dec, live); err != nil { 199 return err 200 } 201 } else { 202 if err := handleToken(w, dec, t); err != nil { 203 return err 204 } 205 w.WriteByte('\n') 206 } 207 208 _, err = dec.Token() 209 if err == io.EOF { 210 // input is over, so it's a success 211 return nil 212 } 213 214 if err == nil { 215 // a successful `read` is a failure, as it means there are 216 // trailing JSON tokens 217 return errors.New(`unexpected trailing data`) 218 } 219 220 // any other error, perhaps some invalid-JSON-syntax-type error 221 return err 222 } 223 224 // handleToken handles recursion for func json2 225 func handleToken(w *bufio.Writer, dec *json.Decoder, t json.Token) error { 226 switch t := t.(type) { 227 case json.Delim: 228 switch t { 229 case json.Delim('['): 230 return handleArray(w, dec) 231 case json.Delim('{'): 232 return handleObject(w, dec) 233 default: 234 return errors.New(`unsupported JSON syntax ` + string(t)) 235 } 236 237 case nil: 238 w.WriteString(`null`) 239 return nil 240 241 case bool: 242 if t { 243 w.WriteString(`true`) 244 } else { 245 w.WriteString(`false`) 246 } 247 return nil 248 249 case json.Number: 250 w.WriteString(t.String()) 251 return nil 252 253 case string: 254 return handleString(w, t) 255 256 default: 257 // return fmt.Errorf(`unsupported token type %T`, t) 258 return errors.New(`invalid JSON token`) 259 } 260 } 261 262 func handleTopLevelArray(w *bufio.Writer, dec *json.Decoder, live bool) error { 263 for i := 0; true; i++ { 264 t, err := dec.Token() 265 if err == io.EOF { 266 return nil 267 } 268 269 if err != nil { 270 return err 271 } 272 273 if t == json.Delim(']') { 274 return nil 275 } 276 277 err = handleToken(w, dec, t) 278 if err != nil { 279 return err 280 } 281 282 if w.WriteByte('\n') != nil { 283 return io.EOF 284 } 285 286 if !live { 287 continue 288 } 289 290 if w.Flush() != nil { 291 return io.EOF 292 } 293 } 294 295 // make the compiler happy 296 return nil 297 } 298 299 // handleArray handles arrays for func handleToken 300 func handleArray(w *bufio.Writer, dec *json.Decoder) error { 301 w.WriteByte('[') 302 303 for i := 0; true; i++ { 304 t, err := dec.Token() 305 if err == io.EOF { 306 return errors.New(`end of JSON before array was closed`) 307 } 308 if err != nil { 309 return err 310 } 311 312 if t == json.Delim(']') { 313 w.WriteByte(']') 314 return nil 315 } 316 317 if i > 0 { 318 _, err := w.WriteString(", ") 319 if err != nil { 320 return io.EOF 321 } 322 } 323 324 err = handleToken(w, dec, t) 325 if err != nil { 326 return err 327 } 328 } 329 330 // make the compiler happy 331 return nil 332 } 333 334 // handleObject handles objects for func handleToken 335 func handleObject(w *bufio.Writer, dec *json.Decoder) error { 336 w.WriteByte('{') 337 338 for i := 0; true; i++ { 339 t, err := dec.Token() 340 if err == io.EOF { 341 return errors.New(`end of JSON before object was closed`) 342 } 343 if err != nil { 344 return err 345 } 346 347 if t == json.Delim('}') { 348 w.WriteByte('}') 349 return nil 350 } 351 352 if i > 0 { 353 _, err := w.WriteString(", ") 354 if err != nil { 355 return io.EOF 356 } 357 } 358 359 k, ok := t.(string) 360 if !ok { 361 return errors.New(`expected a string for a key-value pair`) 362 } 363 364 err = handleString(w, k) 365 if err != nil { 366 return err 367 } 368 369 w.WriteString(": ") 370 371 t, err = dec.Token() 372 if err == io.EOF { 373 return errors.New(`expected a value for a key-value pair`) 374 } 375 376 err = handleToken(w, dec, t) 377 if err != nil { 378 return err 379 } 380 } 381 382 // make the compiler happy 383 return nil 384 } 385 386 // handleString handles strings for func handleToken, and keys for func 387 // handleObject 388 func handleString(w *bufio.Writer, s string) error { 389 w.WriteByte('"') 390 for i := range s { 391 w.Write(escapedStringBytes[s[i]]) 392 } 393 w.WriteByte('"') 394 return nil 395 }