File: jsonl.go
   1 /*
   2 The MIT License (MIT)
   3 
   4 Copyright © 2024 pacman64
   5 
   6 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7 this software and associated documentation files (the “Software”), to deal
   8 in the Software without restriction, including without limitation the rights to
   9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 of the Software, and to permit persons to whom the Software is furnished to do
  11 so, subject to the following conditions:
  12 
  13 The above copyright notice and this permission notice shall be included in all
  14 copies or substantial portions of the Software.
  15 
  16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 SOFTWARE.
  23 */
  24 
  25 /*
  26 Single-file source-code for jsonl: this version has no http(s) support. Even
  27 the unit-tests from the original jsonl are omitted.
  28 
  29 To compile a smaller-sized command-line app, you can use the `go` command as
  30 follows:
  31 
  32 go build -ldflags "-s -w" -trimpath jsonl.go
  33 */
  34 
  35 package main
  36 
  37 import (
  38     "bufio"
  39     "encoding/json"
  40     "errors"
  41     "io"
  42     "os"
  43 )
  44 
  45 // Note: the code is avoiding using the fmt package to save hundreds of
  46 // kilobytes on the resulting executable, which is a noticeable difference.
  47 
  48 const info = `
  49 jsonl [options...] [filepath...]
  50 
  51 JSON Lines turns valid JSON-input arrays into separate JSON lines, one for
  52 each top-level item. Non-arrays result in a single JSON-line.
  53 
  54 When not given a filepath to load, standard input is used instead. Every
  55 output line is always a single top-level item from the input.
  56 `
  57 
  58 // errNoMoreOutput is a generic dummy output-error, which is meant to be
  59 // ultimately ignored, being just an excuse to quit the app immediately
  60 // and successfully
  61 var errNoMoreOutput = errors.New(`no more output`)
  62 
  63 func main() {
  64     if len(os.Args) > 1 {
  65         switch os.Args[1] {
  66         case `-h`, `--h`, `-help`, `--help`:
  67             os.Stderr.WriteString(info[1:])
  68             return
  69         }
  70     }
  71 
  72     if len(os.Args) > 2 {
  73         const msg = "\x1b[31mmultiple inputs not allowed\x1b[0m\n"
  74         os.Stderr.WriteString(msg)
  75         os.Exit(1)
  76     }
  77 
  78     // figure out whether input should come from a named file or from stdin
  79     path := `-`
  80     if len(os.Args) > 1 {
  81         path = os.Args[1]
  82     }
  83 
  84     err := handleInput(os.Stdout, path)
  85     if err != nil && err != io.EOF && err != errNoMoreOutput {
  86         os.Stderr.WriteString("\x1b[31m")
  87         os.Stderr.WriteString(err.Error())
  88         os.Stderr.WriteString("\x1b[0m\n")
  89         os.Exit(1)
  90     }
  91 }
  92 
  93 // handleInput simplifies control-flow for func main
  94 func handleInput(w io.Writer, path string) error {
  95     if path == `-` {
  96         return convert(w, os.Stdin)
  97     }
  98 
  99     // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) {
 100     //  resp, err := http.Get(path)
 101     //  if err != nil {
 102     //      return err
 103     //  }
 104     //  defer resp.Body.Close()
 105     //  return convert(w, resp.Body)
 106     // }
 107 
 108     f, err := os.Open(path)
 109     if err != nil {
 110         // on windows, file-not-found error messages may mention `CreateFile`,
 111         // even when trying to open files in read-only mode
 112         return errors.New(`can't open file named ` + path)
 113     }
 114     defer f.Close()
 115     return convert(w, f)
 116 }
 117 
 118 // convert simplifies control-flow for func handleInput
 119 func convert(w io.Writer, r io.Reader) error {
 120     bw := bufio.NewWriter(w)
 121     defer bw.Flush()
 122     return jsonl(bw, r)
 123 }
 124 
 125 // escapedStringBytes helps func handleString treat all string bytes quickly
 126 // and correctly, using their officially-supported JSON escape sequences
 127 //
 128 // https://www.rfc-editor.org/rfc/rfc8259#section-7
 129 var escapedStringBytes = [256][]byte{
 130     {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'},
 131     {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'},
 132     {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'},
 133     {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'},
 134     {'\\', 'b'}, {'\\', 't'},
 135     {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'},
 136     {'\\', 'f'}, {'\\', 'r'},
 137     {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'},
 138     {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'},
 139     {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'},
 140     {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'},
 141     {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'},
 142     {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'},
 143     {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'},
 144     {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'},
 145     {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'},
 146     {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39},
 147     {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47},
 148     {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55},
 149     {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63},
 150     {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71},
 151     {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79},
 152     {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87},
 153     {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95},
 154     {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103},
 155     {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111},
 156     {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119},
 157     {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127},
 158     {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135},
 159     {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143},
 160     {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151},
 161     {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159},
 162     {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167},
 163     {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175},
 164     {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183},
 165     {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191},
 166     {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199},
 167     {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207},
 168     {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215},
 169     {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223},
 170     {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231},
 171     {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239},
 172     {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247},
 173     {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255},
 174 }
 175 
 176 // jsonl does it all, given a reader and a writer
 177 func jsonl(w *bufio.Writer, r io.Reader) error {
 178     dec := json.NewDecoder(r)
 179     // avoid parsing numbers, so unusually-long numbers are kept verbatim,
 180     // even if JSON parsers aren't required to guarantee such input-fidelity
 181     // for numbers
 182     dec.UseNumber()
 183 
 184     t, err := dec.Token()
 185     if err == io.EOF {
 186         // return errors.New(`input has no JSON values`)
 187         return nil
 188     }
 189 
 190     if t == json.Delim('[') {
 191         if err := handleTopLevelArray(w, dec); err != nil {
 192             return err
 193         }
 194     } else {
 195         if err := handleToken(w, dec, t); err != nil {
 196             return err
 197         }
 198         w.WriteByte('\n')
 199     }
 200 
 201     _, err = dec.Token()
 202     if err == io.EOF {
 203         // input is over, so it's a success
 204         return nil
 205     }
 206 
 207     if err == nil {
 208         // a successful `read` is a failure, as it means there are
 209         // trailing JSON tokens
 210         return errors.New(`unexpected trailing data`)
 211     }
 212 
 213     // any other error, perhaps some invalid-JSON-syntax-type error
 214     return err
 215 }
 216 
 217 // handleToken handles recursion for func json2
 218 func handleToken(w *bufio.Writer, dec *json.Decoder, t json.Token) error {
 219     switch t := t.(type) {
 220     case json.Delim:
 221         switch t {
 222         case json.Delim('['):
 223             return handleArray(w, dec)
 224         case json.Delim('{'):
 225             return handleObject(w, dec)
 226         default:
 227             return errors.New(`unsupported JSON syntax ` + string(t))
 228         }
 229 
 230     case nil:
 231         w.WriteString(`null`)
 232         return nil
 233 
 234     case bool:
 235         if t {
 236             w.WriteString(`true`)
 237         } else {
 238             w.WriteString(`false`)
 239         }
 240         return nil
 241 
 242     case json.Number:
 243         w.WriteString(t.String())
 244         return nil
 245 
 246     case string:
 247         return handleString(w, t)
 248 
 249     default:
 250         // return fmt.Errorf(`unsupported token type %T`, t)
 251         return errors.New(`invalid JSON token`)
 252     }
 253 }
 254 
 255 func handleTopLevelArray(w *bufio.Writer, dec *json.Decoder) error {
 256     for i := 0; true; i++ {
 257         t, err := dec.Token()
 258         if err == io.EOF {
 259             return nil
 260         }
 261 
 262         if err != nil {
 263             return err
 264         }
 265 
 266         if t == json.Delim(']') {
 267             return nil
 268         }
 269 
 270         err = handleToken(w, dec, t)
 271         if err != nil {
 272             return err
 273         }
 274 
 275         if err := w.WriteByte('\n'); err != nil {
 276             return errNoMoreOutput
 277         }
 278     }
 279 
 280     // make the compiler happy
 281     return nil
 282 }
 283 
 284 // handleArray handles arrays for func handleToken
 285 func handleArray(w *bufio.Writer, dec *json.Decoder) error {
 286     w.WriteByte('[')
 287 
 288     for i := 0; true; i++ {
 289         t, err := dec.Token()
 290         if err == io.EOF {
 291             w.WriteByte(']')
 292             return nil
 293         }
 294 
 295         if err != nil {
 296             return err
 297         }
 298 
 299         if t == json.Delim(']') {
 300             w.WriteByte(']')
 301             return nil
 302         }
 303 
 304         if i > 0 {
 305             _, err := w.WriteString(", ")
 306             if err != nil {
 307                 return errNoMoreOutput
 308             }
 309         }
 310 
 311         err = handleToken(w, dec, t)
 312         if err != nil {
 313             return err
 314         }
 315     }
 316 
 317     // make the compiler happy
 318     return nil
 319 }
 320 
 321 // handleObject handles objects for func handleToken
 322 func handleObject(w *bufio.Writer, dec *json.Decoder) error {
 323     w.WriteByte('{')
 324 
 325     for i := 0; true; i++ {
 326         t, err := dec.Token()
 327         if err == io.EOF {
 328             w.WriteByte('}')
 329             return nil
 330         }
 331 
 332         if err != nil {
 333             return err
 334         }
 335 
 336         if t == json.Delim('}') {
 337             w.WriteByte('}')
 338             return nil
 339         }
 340 
 341         if i > 0 {
 342             _, err := w.WriteString(", ")
 343             if err != nil {
 344                 return errNoMoreOutput
 345             }
 346         }
 347 
 348         k, ok := t.(string)
 349         if !ok {
 350             return errors.New(`expected a string for a key-value pair`)
 351         }
 352 
 353         err = handleString(w, k)
 354         if err != nil {
 355             return err
 356         }
 357 
 358         w.WriteString(": ")
 359 
 360         t, err = dec.Token()
 361         if err == io.EOF {
 362             return errors.New(`expected a value for a key-value pair`)
 363         }
 364 
 365         err = handleToken(w, dec, t)
 366         if err != nil {
 367             return err
 368         }
 369     }
 370 
 371     // make the compiler happy
 372     return nil
 373 }
 374 
 375 // handleString handles strings for func handleToken, and keys for func
 376 // handleObject
 377 func handleString(w *bufio.Writer, s string) error {
 378     w.WriteByte('"')
 379     for i := range s {
 380         w.Write(escapedStringBytes[s[i]])
 381     }
 382     w.WriteByte('"')
 383     return nil
 384 }