File: jsonl.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 To compile a smaller-sized command-line app, you can use the `go` command as
  27 follows:
  28 
  29 go build -ldflags "-s -w" -trimpath jsonl.go
  30 */
  31 
  32 package main
  33 
  34 import (
  35     "bufio"
  36     "encoding/json"
  37     "errors"
  38     "io"
  39     "os"
  40 )
  41 
  42 // Note: the code is avoiding using the fmt package to save hundreds of
  43 // kilobytes on the resulting executable, which is a noticeable difference.
  44 
  45 const info = `
  46 jsonl [options...] [filepath...]
  47 
  48 JSON Lines turns valid JSON-input arrays into separate JSON lines, one for
  49 each top-level item. Non-arrays result in a single JSON-line.
  50 
  51 When not given a filepath to load, standard input is used instead. Every
  52 output line is always a single top-level item from the input.
  53 `
  54 
  55 // errNoMoreOutput is a generic dummy output-error, which is meant to be
  56 // ultimately ignored, being just an excuse to quit the app immediately
  57 // and successfully
  58 var errNoMoreOutput = errors.New(`no more output`)
  59 
  60 func main() {
  61     if len(os.Args) > 1 {
  62         switch os.Args[1] {
  63         case `-h`, `--h`, `-help`, `--help`:
  64             os.Stdout.WriteString(info[1:])
  65             return
  66         }
  67     }
  68 
  69     if len(os.Args) > 2 {
  70         const msg = "\x1b[31mmultiple inputs not allowed\x1b[0m\n"
  71         os.Stderr.WriteString(msg)
  72         os.Exit(1)
  73     }
  74 
  75     // figure out whether input should come from a named file or from stdin
  76     path := `-`
  77     if len(os.Args) > 1 {
  78         path = os.Args[1]
  79     }
  80 
  81     err := handleInput(os.Stdout, path)
  82     if err != nil && err != io.EOF && err != errNoMoreOutput {
  83         os.Stderr.WriteString(err.Error())
  84         os.Stderr.WriteString("\n")
  85         os.Exit(1)
  86     }
  87 }
  88 
  89 // handleInput simplifies control-flow for func main
  90 func handleInput(w io.Writer, path string) error {
  91     if path == `-` {
  92         return convert(w, os.Stdin)
  93     }
  94 
  95     // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) {
  96     //  resp, err := http.Get(path)
  97     //  if err != nil {
  98     //      return err
  99     //  }
 100     //  defer resp.Body.Close()
 101     //  return convert(w, resp.Body)
 102     // }
 103 
 104     f, err := os.Open(path)
 105     if err != nil {
 106         // on windows, file-not-found error messages may mention `CreateFile`,
 107         // even when trying to open files in read-only mode
 108         return errors.New(`can't open file named ` + path)
 109     }
 110     defer f.Close()
 111     return convert(w, f)
 112 }
 113 
 114 // convert simplifies control-flow for func handleInput
 115 func convert(w io.Writer, r io.Reader) error {
 116     bw := bufio.NewWriter(w)
 117     defer bw.Flush()
 118     return jsonl(bw, r)
 119 }
 120 
 121 // escapedStringBytes helps func handleString treat all string bytes quickly
 122 // and correctly, using their officially-supported JSON escape sequences
 123 //
 124 // https://www.rfc-editor.org/rfc/rfc8259#section-7
 125 var escapedStringBytes = [256][]byte{
 126     {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'},
 127     {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'},
 128     {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'},
 129     {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'},
 130     {'\\', 'b'}, {'\\', 't'},
 131     {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'},
 132     {'\\', 'f'}, {'\\', 'r'},
 133     {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'},
 134     {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'},
 135     {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'},
 136     {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'},
 137     {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'},
 138     {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'},
 139     {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'},
 140     {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'},
 141     {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'},
 142     {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39},
 143     {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47},
 144     {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55},
 145     {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63},
 146     {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71},
 147     {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79},
 148     {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87},
 149     {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95},
 150     {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103},
 151     {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111},
 152     {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119},
 153     {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127},
 154     {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135},
 155     {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143},
 156     {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151},
 157     {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159},
 158     {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167},
 159     {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175},
 160     {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183},
 161     {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191},
 162     {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199},
 163     {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207},
 164     {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215},
 165     {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223},
 166     {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231},
 167     {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239},
 168     {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247},
 169     {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255},
 170 }
 171 
 172 // jsonl does it all, given a reader and a writer
 173 func jsonl(w *bufio.Writer, r io.Reader) error {
 174     dec := json.NewDecoder(r)
 175     // avoid parsing numbers, so unusually-long numbers are kept verbatim,
 176     // even if JSON parsers aren't required to guarantee such input-fidelity
 177     // for numbers
 178     dec.UseNumber()
 179 
 180     t, err := dec.Token()
 181     if err == io.EOF {
 182         // return errors.New(`input has no JSON values`)
 183         return nil
 184     }
 185 
 186     if t == json.Delim('[') {
 187         if err := handleTopLevelArray(w, dec); err != nil {
 188             return err
 189         }
 190     } else {
 191         if err := handleToken(w, dec, t); err != nil {
 192             return err
 193         }
 194         w.WriteByte('\n')
 195     }
 196 
 197     _, err = dec.Token()
 198     if err == io.EOF {
 199         // input is over, so it's a success
 200         return nil
 201     }
 202 
 203     if err == nil {
 204         // a successful `read` is a failure, as it means there are
 205         // trailing JSON tokens
 206         return errors.New(`unexpected trailing data`)
 207     }
 208 
 209     // any other error, perhaps some invalid-JSON-syntax-type error
 210     return err
 211 }
 212 
 213 // handleToken handles recursion for func json2
 214 func handleToken(w *bufio.Writer, dec *json.Decoder, t json.Token) error {
 215     switch t := t.(type) {
 216     case json.Delim:
 217         switch t {
 218         case json.Delim('['):
 219             return handleArray(w, dec)
 220         case json.Delim('{'):
 221             return handleObject(w, dec)
 222         default:
 223             return errors.New(`unsupported JSON syntax ` + string(t))
 224         }
 225 
 226     case nil:
 227         w.WriteString(`null`)
 228         return nil
 229 
 230     case bool:
 231         if t {
 232             w.WriteString(`true`)
 233         } else {
 234             w.WriteString(`false`)
 235         }
 236         return nil
 237 
 238     case json.Number:
 239         w.WriteString(t.String())
 240         return nil
 241 
 242     case string:
 243         return handleString(w, t)
 244 
 245     default:
 246         // return fmt.Errorf(`unsupported token type %T`, t)
 247         return errors.New(`invalid JSON token`)
 248     }
 249 }
 250 
 251 func handleTopLevelArray(w *bufio.Writer, dec *json.Decoder) error {
 252     for i := 0; true; i++ {
 253         t, err := dec.Token()
 254         if err == io.EOF {
 255             return nil
 256         }
 257 
 258         if err != nil {
 259             return err
 260         }
 261 
 262         if t == json.Delim(']') {
 263             return nil
 264         }
 265 
 266         err = handleToken(w, dec, t)
 267         if err != nil {
 268             return err
 269         }
 270 
 271         w.WriteByte('\n')
 272         if err := w.Flush(); err != nil {
 273             // a write error may be the consequence of stdout being closed,
 274             // perhaps by another app along a pipe
 275             return errNoMoreOutput
 276         }
 277     }
 278 
 279     // make the compiler happy
 280     return nil
 281 }
 282 
 283 // handleArray handles arrays for func handleToken
 284 func handleArray(w *bufio.Writer, dec *json.Decoder) error {
 285     w.WriteByte('[')
 286 
 287     for i := 0; true; i++ {
 288         t, err := dec.Token()
 289         if err == io.EOF {
 290             w.WriteByte(']')
 291             return nil
 292         }
 293 
 294         if err != nil {
 295             return err
 296         }
 297 
 298         if t == json.Delim(']') {
 299             w.WriteByte(']')
 300             return nil
 301         }
 302 
 303         if i > 0 {
 304             _, err := w.WriteString(", ")
 305             if err != nil {
 306                 return errNoMoreOutput
 307             }
 308         }
 309 
 310         err = handleToken(w, dec, t)
 311         if err != nil {
 312             return err
 313         }
 314     }
 315 
 316     // make the compiler happy
 317     return nil
 318 }
 319 
 320 // handleObject handles objects for func handleToken
 321 func handleObject(w *bufio.Writer, dec *json.Decoder) error {
 322     w.WriteByte('{')
 323 
 324     for i := 0; true; i++ {
 325         t, err := dec.Token()
 326         if err == io.EOF {
 327             w.WriteByte('}')
 328             return nil
 329         }
 330 
 331         if err != nil {
 332             return err
 333         }
 334 
 335         if t == json.Delim('}') {
 336             w.WriteByte('}')
 337             return nil
 338         }
 339 
 340         if i > 0 {
 341             _, err := w.WriteString(", ")
 342             if err != nil {
 343                 return errNoMoreOutput
 344             }
 345         }
 346 
 347         k, ok := t.(string)
 348         if !ok {
 349             return errors.New(`expected a string for a key-value pair`)
 350         }
 351 
 352         err = handleString(w, k)
 353         if err != nil {
 354             return err
 355         }
 356 
 357         w.WriteString(": ")
 358 
 359         t, err = dec.Token()
 360         if err == io.EOF {
 361             return errors.New(`expected a value for a key-value pair`)
 362         }
 363 
 364         err = handleToken(w, dec, t)
 365         if err != nil {
 366             return err
 367         }
 368     }
 369 
 370     // make the compiler happy
 371     return nil
 372 }
 373 
 374 // handleString handles strings for func handleToken, and keys for func
 375 // handleObject
 376 func handleString(w *bufio.Writer, s string) error {
 377     w.WriteByte('"')
 378     for i := range s {
 379         w.Write(escapedStringBytes[s[i]])
 380     }
 381     w.WriteByte('"')
 382     return nil
 383 }