File: jsonl.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath jsonl.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "encoding/json"
  37     "errors"
  38     "io"
  39     "os"
  40 )
  41 
  42 const info = `
  43 jsonl [options...] [filepaths...]
  44 
  45 JSON Lines turns valid JSON-input arrays into separate JSON lines, one for
  46 each top-level item. Non-arrays result in a single JSON-line.
  47 
  48 When not given a filepath to load, standard input is used instead. Every
  49 output line is always a single top-level item from the input.
  50 `
  51 
  52 func main() {
  53     args := os.Args[1:]
  54     buffered := false
  55 
  56     for len(args) > 0 {
  57         switch args[0] {
  58         case `-b`, `--b`, `-buffered`, `--buffered`:
  59             buffered = true
  60             args = args[1:]
  61             continue
  62 
  63         case `-h`, `--h`, `-help`, `--help`:
  64             os.Stdout.WriteString(info[1:])
  65             return
  66         }
  67 
  68         break
  69     }
  70 
  71     if len(args) > 0 && args[0] == `--` {
  72         args = args[1:]
  73     }
  74 
  75     liveLines := !buffered
  76     if !buffered {
  77         if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil {
  78             liveLines = false
  79         }
  80     }
  81 
  82     if err := run(os.Stdout, args, liveLines); err != nil && err != io.EOF {
  83         os.Stderr.WriteString(err.Error())
  84         os.Stderr.WriteString("\n")
  85         os.Exit(1)
  86         return
  87     }
  88 }
  89 
  90 func run(w io.Writer, args []string, liveLines bool) error {
  91     dashes := 0
  92     for _, path := range args {
  93         if path == `-` {
  94             dashes++
  95         }
  96         if dashes > 1 {
  97             return errors.New(`can't use stdin (dash) more than once`)
  98         }
  99     }
 100 
 101     bw := bufio.NewWriter(w)
 102     defer bw.Flush()
 103 
 104     if len(args) == 0 {
 105         return handleInput(bw, `-`, liveLines)
 106     }
 107 
 108     for _, path := range args {
 109         if err := handleInput(bw, path, liveLines); err != nil {
 110             return err
 111         }
 112     }
 113     return nil
 114 }
 115 
 116 // handleInput simplifies control-flow for func main
 117 func handleInput(w *bufio.Writer, path string, liveLines bool) error {
 118     if path == `-` {
 119         return jsonl(w, os.Stdin, liveLines)
 120     }
 121 
 122     f, err := os.Open(path)
 123     if err != nil {
 124         // on windows, file-not-found error messages may mention `CreateFile`,
 125         // even when trying to open files in read-only mode
 126         return errors.New(`can't open file named ` + path)
 127     }
 128     defer f.Close()
 129     return jsonl(w, f, liveLines)
 130 }
 131 
 132 // escapedStringBytes helps func handleString treat all string bytes quickly
 133 // and correctly, using their officially-supported JSON escape sequences
 134 //
 135 // https://www.rfc-editor.org/rfc/rfc8259#section-7
 136 var escapedStringBytes = [256][]byte{
 137     {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'},
 138     {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'},
 139     {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'},
 140     {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'},
 141     {'\\', 'b'}, {'\\', 't'},
 142     {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'},
 143     {'\\', 'f'}, {'\\', 'r'},
 144     {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'},
 145     {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'},
 146     {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'},
 147     {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'},
 148     {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'},
 149     {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'},
 150     {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'},
 151     {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'},
 152     {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'},
 153     {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39},
 154     {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47},
 155     {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55},
 156     {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63},
 157     {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71},
 158     {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79},
 159     {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87},
 160     {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95},
 161     {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103},
 162     {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111},
 163     {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119},
 164     {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127},
 165     {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135},
 166     {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143},
 167     {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151},
 168     {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159},
 169     {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167},
 170     {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175},
 171     {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183},
 172     {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191},
 173     {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199},
 174     {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207},
 175     {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215},
 176     {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223},
 177     {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231},
 178     {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239},
 179     {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247},
 180     {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255},
 181 }
 182 
 183 // jsonl does it all, given a reader and a writer
 184 func jsonl(w *bufio.Writer, r io.Reader, live bool) error {
 185     dec := json.NewDecoder(r)
 186     // avoid parsing numbers, so unusually-long numbers are kept verbatim,
 187     // even if JSON parsers aren't required to guarantee such input-fidelity
 188     // for numbers
 189     dec.UseNumber()
 190 
 191     t, err := dec.Token()
 192     if err == io.EOF {
 193         // return errors.New(`input has no JSON values`)
 194         return nil
 195     }
 196 
 197     if t == json.Delim('[') {
 198         if err := handleTopLevelArray(w, dec, live); err != nil {
 199             return err
 200         }
 201     } else {
 202         if err := handleToken(w, dec, t); err != nil {
 203             return err
 204         }
 205         w.WriteByte('\n')
 206     }
 207 
 208     _, err = dec.Token()
 209     if err == io.EOF {
 210         // input is over, so it's a success
 211         return nil
 212     }
 213 
 214     if err == nil {
 215         // a successful `read` is a failure, as it means there are
 216         // trailing JSON tokens
 217         return errors.New(`unexpected trailing data`)
 218     }
 219 
 220     // any other error, perhaps some invalid-JSON-syntax-type error
 221     return err
 222 }
 223 
 224 // handleToken handles recursion for func json2
 225 func handleToken(w *bufio.Writer, dec *json.Decoder, t json.Token) error {
 226     switch t := t.(type) {
 227     case json.Delim:
 228         switch t {
 229         case json.Delim('['):
 230             return handleArray(w, dec)
 231         case json.Delim('{'):
 232             return handleObject(w, dec)
 233         default:
 234             return errors.New(`unsupported JSON syntax ` + string(t))
 235         }
 236 
 237     case nil:
 238         w.WriteString(`null`)
 239         return nil
 240 
 241     case bool:
 242         if t {
 243             w.WriteString(`true`)
 244         } else {
 245             w.WriteString(`false`)
 246         }
 247         return nil
 248 
 249     case json.Number:
 250         w.WriteString(t.String())
 251         return nil
 252 
 253     case string:
 254         return handleString(w, t)
 255 
 256     default:
 257         // return fmt.Errorf(`unsupported token type %T`, t)
 258         return errors.New(`invalid JSON token`)
 259     }
 260 }
 261 
 262 func handleTopLevelArray(w *bufio.Writer, dec *json.Decoder, live bool) error {
 263     for i := 0; true; i++ {
 264         t, err := dec.Token()
 265         if err == io.EOF {
 266             return nil
 267         }
 268 
 269         if err != nil {
 270             return err
 271         }
 272 
 273         if t == json.Delim(']') {
 274             return nil
 275         }
 276 
 277         err = handleToken(w, dec, t)
 278         if err != nil {
 279             return err
 280         }
 281 
 282         if w.WriteByte('\n') != nil {
 283             return io.EOF
 284         }
 285 
 286         if !live {
 287             continue
 288         }
 289 
 290         if w.Flush() != nil {
 291             return io.EOF
 292         }
 293     }
 294 
 295     // make the compiler happy
 296     return nil
 297 }
 298 
 299 // handleArray handles arrays for func handleToken
 300 func handleArray(w *bufio.Writer, dec *json.Decoder) error {
 301     w.WriteByte('[')
 302 
 303     for i := 0; true; i++ {
 304         t, err := dec.Token()
 305         if err == io.EOF {
 306             return errors.New(`end of JSON before array was closed`)
 307         }
 308         if err != nil {
 309             return err
 310         }
 311 
 312         if t == json.Delim(']') {
 313             w.WriteByte(']')
 314             return nil
 315         }
 316 
 317         if i > 0 {
 318             _, err := w.WriteString(", ")
 319             if err != nil {
 320                 return io.EOF
 321             }
 322         }
 323 
 324         err = handleToken(w, dec, t)
 325         if err != nil {
 326             return err
 327         }
 328     }
 329 
 330     // make the compiler happy
 331     return nil
 332 }
 333 
 334 // handleObject handles objects for func handleToken
 335 func handleObject(w *bufio.Writer, dec *json.Decoder) error {
 336     w.WriteByte('{')
 337 
 338     for i := 0; true; i++ {
 339         t, err := dec.Token()
 340         if err == io.EOF {
 341             return errors.New(`end of JSON before object was closed`)
 342         }
 343         if err != nil {
 344             return err
 345         }
 346 
 347         if t == json.Delim('}') {
 348             w.WriteByte('}')
 349             return nil
 350         }
 351 
 352         if i > 0 {
 353             _, err := w.WriteString(", ")
 354             if err != nil {
 355                 return io.EOF
 356             }
 357         }
 358 
 359         k, ok := t.(string)
 360         if !ok {
 361             return errors.New(`expected a string for a key-value pair`)
 362         }
 363 
 364         err = handleString(w, k)
 365         if err != nil {
 366             return err
 367         }
 368 
 369         w.WriteString(": ")
 370 
 371         t, err = dec.Token()
 372         if err == io.EOF {
 373             return errors.New(`expected a value for a key-value pair`)
 374         }
 375 
 376         err = handleToken(w, dec, t)
 377         if err != nil {
 378             return err
 379         }
 380     }
 381 
 382     // make the compiler happy
 383     return nil
 384 }
 385 
 386 // handleString handles strings for func handleToken, and keys for func
 387 // handleObject
 388 func handleString(w *bufio.Writer, s string) error {
 389     w.WriteByte('"')
 390     for i := range s {
 391         w.Write(escapedStringBytes[s[i]])
 392     }
 393     w.WriteByte('"')
 394     return nil
 395 }