File: jsonl.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright (c) 2026 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the "Software"), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath jsonl.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "encoding/json"
  37     "errors"
  38     "io"
  39     "os"
  40 )
  41 
  42 // Note: the code is avoiding using the fmt package to save hundreds of
  43 // kilobytes on the resulting executable, which is a noticeable difference.
  44 
  45 const info = `
  46 jsonl [options...] [filepath...]
  47 
  48 JSON Lines turns valid JSON-input arrays into separate JSON lines, one for
  49 each top-level item. Non-arrays result in a single JSON-line.
  50 
  51 When not given a filepath to load, standard input is used instead. Every
  52 output line is always a single top-level item from the input.
  53 `
  54 
  55 // errNoMoreOutput is a generic dummy output-error, which is meant to be
  56 // ultimately ignored, being just an excuse to quit the app immediately
  57 // and successfully
  58 var errNoMoreOutput = errors.New(`no more output`)
  59 
  60 func main() {
  61     if len(os.Args) > 1 {
  62         switch os.Args[1] {
  63         case `-h`, `--h`, `-help`, `--help`:
  64             os.Stdout.WriteString(info[1:])
  65             return
  66         }
  67     }
  68 
  69     if len(os.Args) > 2 {
  70         os.Stderr.WriteString("multiple inputs not allowed\n")
  71         os.Exit(1)
  72     }
  73 
  74     // figure out whether input should come from a named file or from stdin
  75     path := `-`
  76     if len(os.Args) > 1 {
  77         path = os.Args[1]
  78     }
  79 
  80     err := handleInput(os.Stdout, path)
  81     if err != nil && err != io.EOF && err != errNoMoreOutput {
  82         os.Stderr.WriteString(err.Error())
  83         os.Stderr.WriteString("\n")
  84         os.Exit(1)
  85     }
  86 }
  87 
  88 // handleInput simplifies control-flow for func main
  89 func handleInput(w io.Writer, path string) error {
  90     if path == `-` {
  91         return convert(w, os.Stdin)
  92     }
  93 
  94     // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) {
  95     //  resp, err := http.Get(path)
  96     //  if err != nil {
  97     //      return err
  98     //  }
  99     //  defer resp.Body.Close()
 100     //  return convert(w, resp.Body)
 101     // }
 102 
 103     f, err := os.Open(path)
 104     if err != nil {
 105         // on windows, file-not-found error messages may mention `CreateFile`,
 106         // even when trying to open files in read-only mode
 107         return errors.New(`can't open file named ` + path)
 108     }
 109     defer f.Close()
 110     return convert(w, f)
 111 }
 112 
 113 // convert simplifies control-flow for func handleInput
 114 func convert(w io.Writer, r io.Reader) error {
 115     bw := bufio.NewWriter(w)
 116     defer bw.Flush()
 117     return jsonl(bw, r)
 118 }
 119 
 120 // escapedStringBytes helps func handleString treat all string bytes quickly
 121 // and correctly, using their officially-supported JSON escape sequences
 122 //
 123 // https://www.rfc-editor.org/rfc/rfc8259#section-7
 124 var escapedStringBytes = [256][]byte{
 125     {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'},
 126     {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'},
 127     {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'},
 128     {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'},
 129     {'\\', 'b'}, {'\\', 't'},
 130     {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'},
 131     {'\\', 'f'}, {'\\', 'r'},
 132     {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'},
 133     {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'},
 134     {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'},
 135     {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'},
 136     {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'},
 137     {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'},
 138     {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'},
 139     {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'},
 140     {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'},
 141     {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39},
 142     {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47},
 143     {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55},
 144     {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63},
 145     {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71},
 146     {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79},
 147     {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87},
 148     {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95},
 149     {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103},
 150     {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111},
 151     {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119},
 152     {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127},
 153     {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135},
 154     {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143},
 155     {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151},
 156     {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159},
 157     {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167},
 158     {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175},
 159     {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183},
 160     {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191},
 161     {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199},
 162     {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207},
 163     {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215},
 164     {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223},
 165     {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231},
 166     {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239},
 167     {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247},
 168     {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255},
 169 }
 170 
 171 // jsonl does it all, given a reader and a writer
 172 func jsonl(w *bufio.Writer, r io.Reader) error {
 173     dec := json.NewDecoder(r)
 174     // avoid parsing numbers, so unusually-long numbers are kept verbatim,
 175     // even if JSON parsers aren't required to guarantee such input-fidelity
 176     // for numbers
 177     dec.UseNumber()
 178 
 179     t, err := dec.Token()
 180     if err == io.EOF {
 181         // return errors.New(`input has no JSON values`)
 182         return nil
 183     }
 184 
 185     if t == json.Delim('[') {
 186         if err := handleTopLevelArray(w, dec); err != nil {
 187             return err
 188         }
 189     } else {
 190         if err := handleToken(w, dec, t); err != nil {
 191             return err
 192         }
 193         w.WriteByte('\n')
 194     }
 195 
 196     _, err = dec.Token()
 197     if err == io.EOF {
 198         // input is over, so it's a success
 199         return nil
 200     }
 201 
 202     if err == nil {
 203         // a successful `read` is a failure, as it means there are
 204         // trailing JSON tokens
 205         return errors.New(`unexpected trailing data`)
 206     }
 207 
 208     // any other error, perhaps some invalid-JSON-syntax-type error
 209     return err
 210 }
 211 
 212 // handleToken handles recursion for func json2
 213 func handleToken(w *bufio.Writer, dec *json.Decoder, t json.Token) error {
 214     switch t := t.(type) {
 215     case json.Delim:
 216         switch t {
 217         case json.Delim('['):
 218             return handleArray(w, dec)
 219         case json.Delim('{'):
 220             return handleObject(w, dec)
 221         default:
 222             return errors.New(`unsupported JSON syntax ` + string(t))
 223         }
 224 
 225     case nil:
 226         w.WriteString(`null`)
 227         return nil
 228 
 229     case bool:
 230         if t {
 231             w.WriteString(`true`)
 232         } else {
 233             w.WriteString(`false`)
 234         }
 235         return nil
 236 
 237     case json.Number:
 238         w.WriteString(t.String())
 239         return nil
 240 
 241     case string:
 242         return handleString(w, t)
 243 
 244     default:
 245         // return fmt.Errorf(`unsupported token type %T`, t)
 246         return errors.New(`invalid JSON token`)
 247     }
 248 }
 249 
 250 func handleTopLevelArray(w *bufio.Writer, dec *json.Decoder) error {
 251     for i := 0; true; i++ {
 252         t, err := dec.Token()
 253         if err == io.EOF {
 254             return nil
 255         }
 256 
 257         if err != nil {
 258             return err
 259         }
 260 
 261         if t == json.Delim(']') {
 262             return nil
 263         }
 264 
 265         err = handleToken(w, dec, t)
 266         if err != nil {
 267             return err
 268         }
 269 
 270         w.WriteByte('\n')
 271         if err := w.Flush(); err != nil {
 272             // a write error may be the consequence of stdout being closed,
 273             // perhaps by another app along a pipe
 274             return errNoMoreOutput
 275         }
 276     }
 277 
 278     // make the compiler happy
 279     return nil
 280 }
 281 
 282 // handleArray handles arrays for func handleToken
 283 func handleArray(w *bufio.Writer, dec *json.Decoder) error {
 284     w.WriteByte('[')
 285 
 286     for i := 0; true; i++ {
 287         t, err := dec.Token()
 288         if err == io.EOF {
 289             w.WriteByte(']')
 290             return nil
 291         }
 292 
 293         if err != nil {
 294             return err
 295         }
 296 
 297         if t == json.Delim(']') {
 298             w.WriteByte(']')
 299             return nil
 300         }
 301 
 302         if i > 0 {
 303             _, err := w.WriteString(", ")
 304             if err != nil {
 305                 return errNoMoreOutput
 306             }
 307         }
 308 
 309         err = handleToken(w, dec, t)
 310         if err != nil {
 311             return err
 312         }
 313     }
 314 
 315     // make the compiler happy
 316     return nil
 317 }
 318 
 319 // handleObject handles objects for func handleToken
 320 func handleObject(w *bufio.Writer, dec *json.Decoder) error {
 321     w.WriteByte('{')
 322 
 323     for i := 0; true; i++ {
 324         t, err := dec.Token()
 325         if err == io.EOF {
 326             w.WriteByte('}')
 327             return nil
 328         }
 329 
 330         if err != nil {
 331             return err
 332         }
 333 
 334         if t == json.Delim('}') {
 335             w.WriteByte('}')
 336             return nil
 337         }
 338 
 339         if i > 0 {
 340             _, err := w.WriteString(", ")
 341             if err != nil {
 342                 return errNoMoreOutput
 343             }
 344         }
 345 
 346         k, ok := t.(string)
 347         if !ok {
 348             return errors.New(`expected a string for a key-value pair`)
 349         }
 350 
 351         err = handleString(w, k)
 352         if err != nil {
 353             return err
 354         }
 355 
 356         w.WriteString(": ")
 357 
 358         t, err = dec.Token()
 359         if err == io.EOF {
 360             return errors.New(`expected a value for a key-value pair`)
 361         }
 362 
 363         err = handleToken(w, dec, t)
 364         if err != nil {
 365             return err
 366         }
 367     }
 368 
 369     // make the compiler happy
 370     return nil
 371 }
 372 
 373 // handleString handles strings for func handleToken, and keys for func
 374 // handleObject
 375 func handleString(w *bufio.Writer, s string) error {
 376     w.WriteByte('"')
 377     for i := range s {
 378         w.Write(escapedStringBytes[s[i]])
 379     }
 380     w.WriteByte('"')
 381     return nil
 382 }