File: jsonl.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath jsonl.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "encoding/json" 37 "errors" 38 "io" 39 "os" 40 ) 41 42 const info = ` 43 jsonl [options...] [filepaths...] 44 45 JSON Lines turns valid JSON-input arrays into separate JSON lines, one for 46 each top-level item. Non-arrays result in a single JSON-line. 47 48 When not given a filepath to load, standard input is used instead. Every 49 output line is always a single top-level item from the input. 50 ` 51 52 func main() { 53 args := os.Args[1:] 54 buffered := false 55 56 for len(args) > 0 { 57 switch args[0] { 58 case `-b`, `--b`, `-buffered`, `--buffered`: 59 buffered = true 60 args = args[1:] 61 continue 62 63 case `-h`, `--h`, `-help`, `--help`: 64 os.Stdout.WriteString(info[1:]) 65 return 66 } 67 68 break 69 } 70 71 if len(args) > 0 && args[0] == `--` { 72 args = args[1:] 73 } 74 75 liveLines := !buffered 76 if !buffered { 77 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 78 liveLines = false 79 } 80 } 81 82 if err := run(os.Stdout, args, liveLines); err != nil && err != io.EOF { 83 os.Stderr.WriteString(err.Error()) 84 os.Stderr.WriteString("\n") 85 os.Exit(1) 86 } 87 } 88 89 func run(w io.Writer, args []string, liveLines bool) error { 90 dashes := 0 91 for _, path := range args { 92 if path == `-` { 93 dashes++ 94 } 95 if dashes > 1 { 96 return errors.New(`can't use stdin (dash) more than once`) 97 } 98 } 99 100 bw := bufio.NewWriter(w) 101 defer bw.Flush() 102 103 if len(args) == 0 { 104 return handleInput(bw, `-`, liveLines) 105 } 106 107 for _, path := range args { 108 if err := handleInput(bw, path, liveLines); err != nil { 109 return err 110 } 111 } 112 return nil 113 } 114 115 // handleInput simplifies control-flow for func main 116 func handleInput(w *bufio.Writer, path string, liveLines bool) error { 117 if path == `-` { 118 return jsonl(w, os.Stdin, liveLines) 119 } 120 121 f, err := os.Open(path) 122 if err != nil { 123 // on windows, file-not-found error messages may mention `CreateFile`, 124 // even when trying to open files in read-only mode 125 return errors.New(`can't open file named ` + path) 126 } 127 defer f.Close() 128 return jsonl(w, f, liveLines) 129 } 130 131 // escapedStringBytes helps func handleString treat all string bytes quickly 132 // and correctly, using their officially-supported JSON escape sequences 133 // 134 // https://www.rfc-editor.org/rfc/rfc8259#section-7 135 var escapedStringBytes = [256][]byte{ 136 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 137 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 138 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 139 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 140 {'\\', 'b'}, {'\\', 't'}, 141 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 142 {'\\', 'f'}, {'\\', 'r'}, 143 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 144 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 145 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 146 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 147 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 148 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 149 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 150 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 151 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 152 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 153 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 154 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 155 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 156 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 157 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 158 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 159 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 160 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 161 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 162 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 163 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 164 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 165 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 166 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 167 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 168 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 169 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 170 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 171 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 172 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 173 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 174 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 175 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 176 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 177 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 178 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 179 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 180 } 181 182 // jsonl does it all, given a reader and a writer 183 func jsonl(w *bufio.Writer, r io.Reader, live bool) error { 184 dec := json.NewDecoder(r) 185 // avoid parsing numbers, so unusually-long numbers are kept verbatim, 186 // even if JSON parsers aren't required to guarantee such input-fidelity 187 // for numbers 188 dec.UseNumber() 189 190 t, err := dec.Token() 191 if err == io.EOF { 192 // return errors.New(`input has no JSON values`) 193 return nil 194 } 195 196 if t == json.Delim('[') { 197 if err := handleTopLevelArray(w, dec, live); err != nil { 198 return err 199 } 200 } else { 201 if err := handleToken(w, dec, t); err != nil { 202 return err 203 } 204 w.WriteByte('\n') 205 } 206 207 _, err = dec.Token() 208 if err == io.EOF { 209 // input is over, so it's a success 210 return nil 211 } 212 213 if err == nil { 214 // a successful `read` is a failure, as it means there are 215 // trailing JSON tokens 216 return errors.New(`unexpected trailing data`) 217 } 218 219 // any other error, perhaps some invalid-JSON-syntax-type error 220 return err 221 } 222 223 // handleToken handles recursion for func json2 224 func handleToken(w *bufio.Writer, dec *json.Decoder, t json.Token) error { 225 switch t := t.(type) { 226 case json.Delim: 227 switch t { 228 case json.Delim('['): 229 return handleArray(w, dec) 230 case json.Delim('{'): 231 return handleObject(w, dec) 232 default: 233 return errors.New(`unsupported JSON syntax ` + string(t)) 234 } 235 236 case nil: 237 w.WriteString(`null`) 238 return nil 239 240 case bool: 241 if t { 242 w.WriteString(`true`) 243 } else { 244 w.WriteString(`false`) 245 } 246 return nil 247 248 case json.Number: 249 w.WriteString(t.String()) 250 return nil 251 252 case string: 253 return handleString(w, t) 254 255 default: 256 // return fmt.Errorf(`unsupported token type %T`, t) 257 return errors.New(`invalid JSON token`) 258 } 259 } 260 261 func handleTopLevelArray(w *bufio.Writer, dec *json.Decoder, live bool) error { 262 for i := 0; true; i++ { 263 t, err := dec.Token() 264 if err == io.EOF { 265 return nil 266 } 267 268 if err != nil { 269 return err 270 } 271 272 if t == json.Delim(']') { 273 return nil 274 } 275 276 err = handleToken(w, dec, t) 277 if err != nil { 278 return err 279 } 280 281 if w.WriteByte('\n') != nil { 282 return io.EOF 283 } 284 285 if !live { 286 continue 287 } 288 289 if w.Flush() != nil { 290 return io.EOF 291 } 292 } 293 294 // make the compiler happy 295 return nil 296 } 297 298 // handleArray handles arrays for func handleToken 299 func handleArray(w *bufio.Writer, dec *json.Decoder) error { 300 w.WriteByte('[') 301 302 for i := 0; true; i++ { 303 t, err := dec.Token() 304 if err == io.EOF { 305 return errors.New(`end of JSON before array was closed`) 306 } 307 if err != nil { 308 return err 309 } 310 311 if t == json.Delim(']') { 312 w.WriteByte(']') 313 return nil 314 } 315 316 if i > 0 { 317 _, err := w.WriteString(", ") 318 if err != nil { 319 return io.EOF 320 } 321 } 322 323 err = handleToken(w, dec, t) 324 if err != nil { 325 return err 326 } 327 } 328 329 // make the compiler happy 330 return nil 331 } 332 333 // handleObject handles objects for func handleToken 334 func handleObject(w *bufio.Writer, dec *json.Decoder) error { 335 w.WriteByte('{') 336 337 for i := 0; true; i++ { 338 t, err := dec.Token() 339 if err == io.EOF { 340 return errors.New(`end of JSON before object was closed`) 341 } 342 if err != nil { 343 return err 344 } 345 346 if t == json.Delim('}') { 347 w.WriteByte('}') 348 return nil 349 } 350 351 if i > 0 { 352 _, err := w.WriteString(", ") 353 if err != nil { 354 return io.EOF 355 } 356 } 357 358 k, ok := t.(string) 359 if !ok { 360 return errors.New(`expected a string for a key-value pair`) 361 } 362 363 err = handleString(w, k) 364 if err != nil { 365 return err 366 } 367 368 w.WriteString(": ") 369 370 t, err = dec.Token() 371 if err == io.EOF { 372 return errors.New(`expected a value for a key-value pair`) 373 } 374 375 err = handleToken(w, dec, t) 376 if err != nil { 377 return err 378 } 379 } 380 381 // make the compiler happy 382 return nil 383 } 384 385 // handleString handles strings for func handleToken, and keys for func 386 // handleObject 387 func handleString(w *bufio.Writer, s string) error { 388 w.WriteByte('"') 389 for i := range s { 390 w.Write(escapedStringBytes[s[i]]) 391 } 392 w.WriteByte('"') 393 return nil 394 }