File: jsonl.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 To compile a smaller-sized command-line app, you can use the `go` command as 27 follows: 28 29 go build -ldflags "-s -w" -trimpath jsonl.go 30 */ 31 32 package main 33 34 import ( 35 "bufio" 36 "encoding/json" 37 "errors" 38 "io" 39 "os" 40 ) 41 42 // Note: the code is avoiding using the fmt package to save hundreds of 43 // kilobytes on the resulting executable, which is a noticeable difference. 44 45 const info = ` 46 jsonl [options...] [filepath...] 47 48 JSON Lines turns valid JSON-input arrays into separate JSON lines, one for 49 each top-level item. Non-arrays result in a single JSON-line. 50 51 When not given a filepath to load, standard input is used instead. Every 52 output line is always a single top-level item from the input. 53 ` 54 55 // errNoMoreOutput is a generic dummy output-error, which is meant to be 56 // ultimately ignored, being just an excuse to quit the app immediately 57 // and successfully 58 var errNoMoreOutput = errors.New(`no more output`) 59 60 func main() { 61 if len(os.Args) > 1 { 62 switch os.Args[1] { 63 case `-h`, `--h`, `-help`, `--help`: 64 os.Stdout.WriteString(info[1:]) 65 return 66 } 67 } 68 69 if len(os.Args) > 2 { 70 const msg = "\x1b[31mmultiple inputs not allowed\x1b[0m\n" 71 os.Stderr.WriteString(msg) 72 os.Exit(1) 73 } 74 75 // figure out whether input should come from a named file or from stdin 76 path := `-` 77 if len(os.Args) > 1 { 78 path = os.Args[1] 79 } 80 81 err := handleInput(os.Stdout, path) 82 if err != nil && err != io.EOF && err != errNoMoreOutput { 83 os.Stderr.WriteString(err.Error()) 84 os.Stderr.WriteString("\n") 85 os.Exit(1) 86 } 87 } 88 89 // handleInput simplifies control-flow for func main 90 func handleInput(w io.Writer, path string) error { 91 if path == `-` { 92 return convert(w, os.Stdin) 93 } 94 95 // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) { 96 // resp, err := http.Get(path) 97 // if err != nil { 98 // return err 99 // } 100 // defer resp.Body.Close() 101 // return convert(w, resp.Body) 102 // } 103 104 f, err := os.Open(path) 105 if err != nil { 106 // on windows, file-not-found error messages may mention `CreateFile`, 107 // even when trying to open files in read-only mode 108 return errors.New(`can't open file named ` + path) 109 } 110 defer f.Close() 111 return convert(w, f) 112 } 113 114 // convert simplifies control-flow for func handleInput 115 func convert(w io.Writer, r io.Reader) error { 116 bw := bufio.NewWriter(w) 117 defer bw.Flush() 118 return jsonl(bw, r) 119 } 120 121 // escapedStringBytes helps func handleString treat all string bytes quickly 122 // and correctly, using their officially-supported JSON escape sequences 123 // 124 // https://www.rfc-editor.org/rfc/rfc8259#section-7 125 var escapedStringBytes = [256][]byte{ 126 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 127 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 128 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 129 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 130 {'\\', 'b'}, {'\\', 't'}, 131 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 132 {'\\', 'f'}, {'\\', 'r'}, 133 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 134 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 135 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 136 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 137 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 138 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 139 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 140 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 141 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 142 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 143 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 144 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 145 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 146 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 147 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 148 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 149 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 150 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 151 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 152 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 153 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 154 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 155 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 156 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 157 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 158 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 159 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 160 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 161 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 162 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 163 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 164 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 165 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 166 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 167 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 168 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 169 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 170 } 171 172 // jsonl does it all, given a reader and a writer 173 func jsonl(w *bufio.Writer, r io.Reader) error { 174 dec := json.NewDecoder(r) 175 // avoid parsing numbers, so unusually-long numbers are kept verbatim, 176 // even if JSON parsers aren't required to guarantee such input-fidelity 177 // for numbers 178 dec.UseNumber() 179 180 t, err := dec.Token() 181 if err == io.EOF { 182 // return errors.New(`input has no JSON values`) 183 return nil 184 } 185 186 if t == json.Delim('[') { 187 if err := handleTopLevelArray(w, dec); err != nil { 188 return err 189 } 190 } else { 191 if err := handleToken(w, dec, t); err != nil { 192 return err 193 } 194 w.WriteByte('\n') 195 } 196 197 _, err = dec.Token() 198 if err == io.EOF { 199 // input is over, so it's a success 200 return nil 201 } 202 203 if err == nil { 204 // a successful `read` is a failure, as it means there are 205 // trailing JSON tokens 206 return errors.New(`unexpected trailing data`) 207 } 208 209 // any other error, perhaps some invalid-JSON-syntax-type error 210 return err 211 } 212 213 // handleToken handles recursion for func json2 214 func handleToken(w *bufio.Writer, dec *json.Decoder, t json.Token) error { 215 switch t := t.(type) { 216 case json.Delim: 217 switch t { 218 case json.Delim('['): 219 return handleArray(w, dec) 220 case json.Delim('{'): 221 return handleObject(w, dec) 222 default: 223 return errors.New(`unsupported JSON syntax ` + string(t)) 224 } 225 226 case nil: 227 w.WriteString(`null`) 228 return nil 229 230 case bool: 231 if t { 232 w.WriteString(`true`) 233 } else { 234 w.WriteString(`false`) 235 } 236 return nil 237 238 case json.Number: 239 w.WriteString(t.String()) 240 return nil 241 242 case string: 243 return handleString(w, t) 244 245 default: 246 // return fmt.Errorf(`unsupported token type %T`, t) 247 return errors.New(`invalid JSON token`) 248 } 249 } 250 251 func handleTopLevelArray(w *bufio.Writer, dec *json.Decoder) error { 252 for i := 0; true; i++ { 253 t, err := dec.Token() 254 if err == io.EOF { 255 return nil 256 } 257 258 if err != nil { 259 return err 260 } 261 262 if t == json.Delim(']') { 263 return nil 264 } 265 266 err = handleToken(w, dec, t) 267 if err != nil { 268 return err 269 } 270 271 w.WriteByte('\n') 272 if err := w.Flush(); err != nil { 273 // a write error may be the consequence of stdout being closed, 274 // perhaps by another app along a pipe 275 return errNoMoreOutput 276 } 277 } 278 279 // make the compiler happy 280 return nil 281 } 282 283 // handleArray handles arrays for func handleToken 284 func handleArray(w *bufio.Writer, dec *json.Decoder) error { 285 w.WriteByte('[') 286 287 for i := 0; true; i++ { 288 t, err := dec.Token() 289 if err == io.EOF { 290 w.WriteByte(']') 291 return nil 292 } 293 294 if err != nil { 295 return err 296 } 297 298 if t == json.Delim(']') { 299 w.WriteByte(']') 300 return nil 301 } 302 303 if i > 0 { 304 _, err := w.WriteString(", ") 305 if err != nil { 306 return errNoMoreOutput 307 } 308 } 309 310 err = handleToken(w, dec, t) 311 if err != nil { 312 return err 313 } 314 } 315 316 // make the compiler happy 317 return nil 318 } 319 320 // handleObject handles objects for func handleToken 321 func handleObject(w *bufio.Writer, dec *json.Decoder) error { 322 w.WriteByte('{') 323 324 for i := 0; true; i++ { 325 t, err := dec.Token() 326 if err == io.EOF { 327 w.WriteByte('}') 328 return nil 329 } 330 331 if err != nil { 332 return err 333 } 334 335 if t == json.Delim('}') { 336 w.WriteByte('}') 337 return nil 338 } 339 340 if i > 0 { 341 _, err := w.WriteString(", ") 342 if err != nil { 343 return errNoMoreOutput 344 } 345 } 346 347 k, ok := t.(string) 348 if !ok { 349 return errors.New(`expected a string for a key-value pair`) 350 } 351 352 err = handleString(w, k) 353 if err != nil { 354 return err 355 } 356 357 w.WriteString(": ") 358 359 t, err = dec.Token() 360 if err == io.EOF { 361 return errors.New(`expected a value for a key-value pair`) 362 } 363 364 err = handleToken(w, dec, t) 365 if err != nil { 366 return err 367 } 368 } 369 370 // make the compiler happy 371 return nil 372 } 373 374 // handleString handles strings for func handleToken, and keys for func 375 // handleObject 376 func handleString(w *bufio.Writer, s string) error { 377 w.WriteByte('"') 378 for i := range s { 379 w.Write(escapedStringBytes[s[i]]) 380 } 381 w.WriteByte('"') 382 return nil 383 }