File: jsonl.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2020-2025 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 Single-file source-code for jsonl: this version has no http(s) support. Even
  27 the unit-tests from the original jsonl are omitted.
  28 
  29 To compile a smaller-sized command-line app, you can use the `go` command as
  30 follows:
  31 
  32 go build -ldflags "-s -w" -trimpath jsonl.go
  33 */
  34 
  35 package main
  36 
  37 import (
  38     "bufio"
  39     "encoding/json"
  40     "errors"
  41     "io"
  42     "os"
  43 )
  44 
  45 // Note: the code is avoiding using the fmt package to save hundreds of
  46 // kilobytes on the resulting executable, which is a noticeable difference.
  47 
  48 const info = `
  49 jsonl [options...] [filepath...]
  50 
  51 JSON Lines turns valid JSON-input arrays into separate JSON lines, one for
  52 each top-level item. Non-arrays result in a single JSON-line.
  53 
  54 When not given a filepath to load, standard input is used instead. Every
  55 output line is always a single top-level item from the input.
  56 `
  57 
  58 // errNoMoreOutput is a generic dummy output-error, which is meant to be
  59 // ultimately ignored, being just an excuse to quit the app immediately
  60 // and successfully
  61 var errNoMoreOutput = errors.New(`no more output`)
  62 
  63 func main() {
  64     if len(os.Args) > 1 {
  65         switch os.Args[1] {
  66         case `-h`, `--h`, `-help`, `--help`:
  67             os.Stderr.WriteString(info[1:])
  68             return
  69         }
  70     }
  71 
  72     if len(os.Args) > 2 {
  73         const msg = "\x1b[31mmultiple inputs not allowed\x1b[0m\n"
  74         os.Stderr.WriteString(msg)
  75         os.Exit(1)
  76     }
  77 
  78     // figure out whether input should come from a named file or from stdin
  79     path := `-`
  80     if len(os.Args) > 1 {
  81         path = os.Args[1]
  82     }
  83 
  84     err := handleInput(os.Stdout, path)
  85     if err != nil && err != io.EOF && err != errNoMoreOutput {
  86         os.Stderr.WriteString("\x1b[31m")
  87         os.Stderr.WriteString(err.Error())
  88         os.Stderr.WriteString("\x1b[0m\n")
  89         os.Exit(1)
  90     }
  91 }
  92 
  93 // handleInput simplifies control-flow for func main
  94 func handleInput(w io.Writer, path string) error {
  95     if path == `-` {
  96         return convert(w, os.Stdin)
  97     }
  98 
  99     // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) {
 100     //  resp, err := http.Get(path)
 101     //  if err != nil {
 102     //      return err
 103     //  }
 104     //  defer resp.Body.Close()
 105     //  return convert(w, resp.Body)
 106     // }
 107 
 108     f, err := os.Open(path)
 109     if err != nil {
 110         // on windows, file-not-found error messages may mention `CreateFile`,
 111         // even when trying to open files in read-only mode
 112         return errors.New(`can't open file named ` + path)
 113     }
 114     defer f.Close()
 115     return convert(w, f)
 116 }
 117 
 118 // convert simplifies control-flow for func handleInput
 119 func convert(w io.Writer, r io.Reader) error {
 120     bw := bufio.NewWriter(w)
 121     defer bw.Flush()
 122     return jsonl(bw, r)
 123 }
 124 
 125 // escapedStringBytes helps func handleString treat all string bytes quickly
 126 // and correctly, using their officially-supported JSON escape sequences
 127 //
 128 // https://www.rfc-editor.org/rfc/rfc8259#section-7
 129 var escapedStringBytes = [256][]byte{
 130     {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'},
 131     {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'},
 132     {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'},
 133     {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'},
 134     {'\\', 'b'}, {'\\', 't'},
 135     {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'},
 136     {'\\', 'f'}, {'\\', 'r'},
 137     {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'},
 138     {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'},
 139     {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'},
 140     {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'},
 141     {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'},
 142     {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'},
 143     {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'},
 144     {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'},
 145     {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'},
 146     {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39},
 147     {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47},
 148     {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55},
 149     {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63},
 150     {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71},
 151     {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79},
 152     {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87},
 153     {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95},
 154     {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103},
 155     {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111},
 156     {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119},
 157     {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127},
 158     {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135},
 159     {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143},
 160     {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151},
 161     {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159},
 162     {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167},
 163     {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175},
 164     {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183},
 165     {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191},
 166     {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199},
 167     {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207},
 168     {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215},
 169     {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223},
 170     {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231},
 171     {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239},
 172     {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247},
 173     {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255},
 174 }
 175 
 176 // jsonl does it all, given a reader and a writer
 177 func jsonl(w *bufio.Writer, r io.Reader) error {
 178     dec := json.NewDecoder(r)
 179     // avoid parsing numbers, so unusually-long numbers are kept verbatim,
 180     // even if JSON parsers aren't required to guarantee such input-fidelity
 181     // for numbers
 182     dec.UseNumber()
 183 
 184     t, err := dec.Token()
 185     if err == io.EOF {
 186         // return errors.New(`input has no JSON values`)
 187         return nil
 188     }
 189 
 190     if t == json.Delim('[') {
 191         if err := handleTopLevelArray(w, dec); err != nil {
 192             return err
 193         }
 194     } else {
 195         if err := handleToken(w, dec, t); err != nil {
 196             return err
 197         }
 198         w.WriteByte('\n')
 199     }
 200 
 201     _, err = dec.Token()
 202     if err == io.EOF {
 203         // input is over, so it's a success
 204         return nil
 205     }
 206 
 207     if err == nil {
 208         // a successful `read` is a failure, as it means there are
 209         // trailing JSON tokens
 210         return errors.New(`unexpected trailing data`)
 211     }
 212 
 213     // any other error, perhaps some invalid-JSON-syntax-type error
 214     return err
 215 }
 216 
 217 // handleToken handles recursion for func json2
 218 func handleToken(w *bufio.Writer, dec *json.Decoder, t json.Token) error {
 219     switch t := t.(type) {
 220     case json.Delim:
 221         switch t {
 222         case json.Delim('['):
 223             return handleArray(w, dec)
 224         case json.Delim('{'):
 225             return handleObject(w, dec)
 226         default:
 227             return errors.New(`unsupported JSON syntax ` + string(t))
 228         }
 229 
 230     case nil:
 231         w.WriteString(`null`)
 232         return nil
 233 
 234     case bool:
 235         if t {
 236             w.WriteString(`true`)
 237         } else {
 238             w.WriteString(`false`)
 239         }
 240         return nil
 241 
 242     case json.Number:
 243         w.WriteString(t.String())
 244         return nil
 245 
 246     case string:
 247         return handleString(w, t)
 248 
 249     default:
 250         // return fmt.Errorf(`unsupported token type %T`, t)
 251         return errors.New(`invalid JSON token`)
 252     }
 253 }
 254 
 255 func handleTopLevelArray(w *bufio.Writer, dec *json.Decoder) error {
 256     for i := 0; true; i++ {
 257         t, err := dec.Token()
 258         if err == io.EOF {
 259             return nil
 260         }
 261 
 262         if err != nil {
 263             return err
 264         }
 265 
 266         if t == json.Delim(']') {
 267             return nil
 268         }
 269 
 270         err = handleToken(w, dec, t)
 271         if err != nil {
 272             return err
 273         }
 274 
 275         w.WriteByte('\n')
 276         if err := w.Flush(); err != nil {
 277             // a write error may be the consequence of stdout being closed,
 278             // perhaps by another app along a pipe
 279             return errNoMoreOutput
 280         }
 281     }
 282 
 283     // make the compiler happy
 284     return nil
 285 }
 286 
 287 // handleArray handles arrays for func handleToken
 288 func handleArray(w *bufio.Writer, dec *json.Decoder) error {
 289     w.WriteByte('[')
 290 
 291     for i := 0; true; i++ {
 292         t, err := dec.Token()
 293         if err == io.EOF {
 294             w.WriteByte(']')
 295             return nil
 296         }
 297 
 298         if err != nil {
 299             return err
 300         }
 301 
 302         if t == json.Delim(']') {
 303             w.WriteByte(']')
 304             return nil
 305         }
 306 
 307         if i > 0 {
 308             _, err := w.WriteString(", ")
 309             if err != nil {
 310                 return errNoMoreOutput
 311             }
 312         }
 313 
 314         err = handleToken(w, dec, t)
 315         if err != nil {
 316             return err
 317         }
 318     }
 319 
 320     // make the compiler happy
 321     return nil
 322 }
 323 
 324 // handleObject handles objects for func handleToken
 325 func handleObject(w *bufio.Writer, dec *json.Decoder) error {
 326     w.WriteByte('{')
 327 
 328     for i := 0; true; i++ {
 329         t, err := dec.Token()
 330         if err == io.EOF {
 331             w.WriteByte('}')
 332             return nil
 333         }
 334 
 335         if err != nil {
 336             return err
 337         }
 338 
 339         if t == json.Delim('}') {
 340             w.WriteByte('}')
 341             return nil
 342         }
 343 
 344         if i > 0 {
 345             _, err := w.WriteString(", ")
 346             if err != nil {
 347                 return errNoMoreOutput
 348             }
 349         }
 350 
 351         k, ok := t.(string)
 352         if !ok {
 353             return errors.New(`expected a string for a key-value pair`)
 354         }
 355 
 356         err = handleString(w, k)
 357         if err != nil {
 358             return err
 359         }
 360 
 361         w.WriteString(": ")
 362 
 363         t, err = dec.Token()
 364         if err == io.EOF {
 365             return errors.New(`expected a value for a key-value pair`)
 366         }
 367 
 368         err = handleToken(w, dec, t)
 369         if err != nil {
 370             return err
 371         }
 372     }
 373 
 374     // make the compiler happy
 375     return nil
 376 }
 377 
 378 // handleString handles strings for func handleToken, and keys for func
 379 // handleObject
 380 func handleString(w *bufio.Writer, s string) error {
 381     w.WriteByte('"')
 382     for i := range s {
 383         w.Write(escapedStringBytes[s[i]])
 384     }
 385     w.WriteByte('"')
 386     return nil
 387 }