File: jsonl.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath jsonl.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "encoding/json"
  37     "errors"
  38     "io"
  39     "os"
  40 )
  41 
  42 const info = `
  43 jsonl [options...] [filepaths...]
  44 
  45 JSON Lines turns valid JSON-input arrays into separate JSON lines, one for
  46 each top-level item. Non-arrays result in a single JSON-line.
  47 
  48 When not given a filepath to load, standard input is used instead. Every
  49 output line is always a single top-level item from the input.
  50 `
  51 
  52 func main() {
  53     args := os.Args[1:]
  54     buffered := false
  55 
  56     for len(args) > 0 {
  57         switch args[0] {
  58         case `-b`, `--b`, `-buffered`, `--buffered`:
  59             buffered = true
  60             args = args[1:]
  61             continue
  62 
  63         case `-h`, `--h`, `-help`, `--help`:
  64             os.Stdout.WriteString(info[1:])
  65             return
  66         }
  67 
  68         break
  69     }
  70 
  71     if len(args) > 0 && args[0] == `--` {
  72         args = args[1:]
  73     }
  74 
  75     liveLines := !buffered
  76     if !buffered {
  77         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
  78             liveLines = false
  79         }
  80     }
  81 
  82     if err := run(os.Stdout, args, liveLines); err != nil && err != io.EOF {
  83         os.Stderr.WriteString(err.Error())
  84         os.Stderr.WriteString("\n")
  85         os.Exit(1)
  86     }
  87 }
  88 
  89 func run(w io.Writer, args []string, liveLines bool) error {
  90     dashes := 0
  91     for _, path := range args {
  92         if path == `-` {
  93             dashes++
  94         }
  95         if dashes > 1 {
  96             return errors.New(`can't use stdin (dash) more than once`)
  97         }
  98     }
  99 
 100     bw := bufio.NewWriter(w)
 101     defer bw.Flush()
 102 
 103     if len(args) == 0 {
 104         return handleInput(bw, `-`, liveLines)
 105     }
 106 
 107     for _, path := range args {
 108         if err := handleInput(bw, path, liveLines); err != nil {
 109             return err
 110         }
 111     }
 112     return nil
 113 }
 114 
 115 // handleInput simplifies control-flow for func main
 116 func handleInput(w *bufio.Writer, path string, liveLines bool) error {
 117     if path == `-` {
 118         return jsonl(w, os.Stdin, liveLines)
 119     }
 120 
 121     f, err := os.Open(path)
 122     if err != nil {
 123         // on windows, file-not-found error messages may mention `CreateFile`,
 124         // even when trying to open files in read-only mode
 125         return errors.New(`can't open file named ` + path)
 126     }
 127     defer f.Close()
 128     return jsonl(w, f, liveLines)
 129 }
 130 
 131 // escapedStringBytes helps func handleString treat all string bytes quickly
 132 // and correctly, using their officially-supported JSON escape sequences
 133 //
 134 // https://www.rfc-editor.org/rfc/rfc8259#section-7
 135 var escapedStringBytes = [256][]byte{
 136     {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'},
 137     {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'},
 138     {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'},
 139     {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'},
 140     {'\\', 'b'}, {'\\', 't'},
 141     {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'},
 142     {'\\', 'f'}, {'\\', 'r'},
 143     {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'},
 144     {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'},
 145     {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'},
 146     {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'},
 147     {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'},
 148     {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'},
 149     {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'},
 150     {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'},
 151     {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'},
 152     {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39},
 153     {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47},
 154     {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55},
 155     {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63},
 156     {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71},
 157     {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79},
 158     {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87},
 159     {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95},
 160     {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103},
 161     {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111},
 162     {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119},
 163     {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127},
 164     {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135},
 165     {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143},
 166     {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151},
 167     {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159},
 168     {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167},
 169     {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175},
 170     {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183},
 171     {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191},
 172     {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199},
 173     {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207},
 174     {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215},
 175     {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223},
 176     {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231},
 177     {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239},
 178     {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247},
 179     {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255},
 180 }
 181 
 182 // jsonl does it all, given a reader and a writer
 183 func jsonl(w *bufio.Writer, r io.Reader, live bool) error {
 184     dec := json.NewDecoder(r)
 185     // avoid parsing numbers, so unusually-long numbers are kept verbatim,
 186     // even if JSON parsers aren't required to guarantee such input-fidelity
 187     // for numbers
 188     dec.UseNumber()
 189 
 190     t, err := dec.Token()
 191     if err == io.EOF {
 192         // return errors.New(`input has no JSON values`)
 193         return nil
 194     }
 195 
 196     if t == json.Delim('[') {
 197         if err := handleTopLevelArray(w, dec, live); err != nil {
 198             return err
 199         }
 200     } else {
 201         if err := handleToken(w, dec, t); err != nil {
 202             return err
 203         }
 204         w.WriteByte('\n')
 205     }
 206 
 207     _, err = dec.Token()
 208     if err == io.EOF {
 209         // input is over, so it's a success
 210         return nil
 211     }
 212 
 213     if err == nil {
 214         // a successful `read` is a failure, as it means there are
 215         // trailing JSON tokens
 216         return errors.New(`unexpected trailing data`)
 217     }
 218 
 219     // any other error, perhaps some invalid-JSON-syntax-type error
 220     return err
 221 }
 222 
 223 // handleToken handles recursion for func json2
 224 func handleToken(w *bufio.Writer, dec *json.Decoder, t json.Token) error {
 225     switch t := t.(type) {
 226     case json.Delim:
 227         switch t {
 228         case json.Delim('['):
 229             return handleArray(w, dec)
 230         case json.Delim('{'):
 231             return handleObject(w, dec)
 232         default:
 233             return errors.New(`unsupported JSON syntax ` + string(t))
 234         }
 235 
 236     case nil:
 237         w.WriteString(`null`)
 238         return nil
 239 
 240     case bool:
 241         if t {
 242             w.WriteString(`true`)
 243         } else {
 244             w.WriteString(`false`)
 245         }
 246         return nil
 247 
 248     case json.Number:
 249         w.WriteString(t.String())
 250         return nil
 251 
 252     case string:
 253         return handleString(w, t)
 254 
 255     default:
 256         // return fmt.Errorf(`unsupported token type %T`, t)
 257         return errors.New(`invalid JSON token`)
 258     }
 259 }
 260 
 261 func handleTopLevelArray(w *bufio.Writer, dec *json.Decoder, live bool) error {
 262     for i := 0; true; i++ {
 263         t, err := dec.Token()
 264         if err == io.EOF {
 265             return nil
 266         }
 267 
 268         if err != nil {
 269             return err
 270         }
 271 
 272         if t == json.Delim(']') {
 273             return nil
 274         }
 275 
 276         err = handleToken(w, dec, t)
 277         if err != nil {
 278             return err
 279         }
 280 
 281         if w.WriteByte('\n') != nil {
 282             return io.EOF
 283         }
 284 
 285         if !live {
 286             continue
 287         }
 288 
 289         if w.Flush() != nil {
 290             return io.EOF
 291         }
 292     }
 293 
 294     // make the compiler happy
 295     return nil
 296 }
 297 
 298 // handleArray handles arrays for func handleToken
 299 func handleArray(w *bufio.Writer, dec *json.Decoder) error {
 300     w.WriteByte('[')
 301 
 302     for i := 0; true; i++ {
 303         t, err := dec.Token()
 304         if err == io.EOF {
 305             return errors.New(`end of JSON before array was closed`)
 306         }
 307         if err != nil {
 308             return err
 309         }
 310 
 311         if t == json.Delim(']') {
 312             w.WriteByte(']')
 313             return nil
 314         }
 315 
 316         if i > 0 {
 317             _, err := w.WriteString(", ")
 318             if err != nil {
 319                 return io.EOF
 320             }
 321         }
 322 
 323         err = handleToken(w, dec, t)
 324         if err != nil {
 325             return err
 326         }
 327     }
 328 
 329     // make the compiler happy
 330     return nil
 331 }
 332 
 333 // handleObject handles objects for func handleToken
 334 func handleObject(w *bufio.Writer, dec *json.Decoder) error {
 335     w.WriteByte('{')
 336 
 337     for i := 0; true; i++ {
 338         t, err := dec.Token()
 339         if err == io.EOF {
 340             return errors.New(`end of JSON before object was closed`)
 341         }
 342         if err != nil {
 343             return err
 344         }
 345 
 346         if t == json.Delim('}') {
 347             w.WriteByte('}')
 348             return nil
 349         }
 350 
 351         if i > 0 {
 352             _, err := w.WriteString(", ")
 353             if err != nil {
 354                 return io.EOF
 355             }
 356         }
 357 
 358         k, ok := t.(string)
 359         if !ok {
 360             return errors.New(`expected a string for a key-value pair`)
 361         }
 362 
 363         err = handleString(w, k)
 364         if err != nil {
 365             return err
 366         }
 367 
 368         w.WriteString(": ")
 369 
 370         t, err = dec.Token()
 371         if err == io.EOF {
 372             return errors.New(`expected a value for a key-value pair`)
 373         }
 374 
 375         err = handleToken(w, dec, t)
 376         if err != nil {
 377             return err
 378         }
 379     }
 380 
 381     // make the compiler happy
 382     return nil
 383 }
 384 
 385 // handleString handles strings for func handleToken, and keys for func
 386 // handleObject
 387 func handleString(w *bufio.Writer, s string) error {
 388     w.WriteByte('"')
 389     for i := range s {
 390         w.Write(escapedStringBytes[s[i]])
 391     }
 392     w.WriteByte('"')
 393     return nil
 394 }