/* The MIT License (MIT) Copyright © 2024 pacman64 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Single-file source-code for jsonl: this version has no http(s) support. Even the unit-tests from the original jsonl are omitted. To compile a smaller-sized command-line app, you can use the `go` command as follows: go build -ldflags "-s -w" -trimpath jsonl.go */ package main import ( "bufio" "encoding/json" "errors" "io" "os" ) // Note: the code is avoiding using the fmt package to save hundreds of // kilobytes on the resulting executable, which is a noticeable difference. const info = ` jsonl [options...] [filepath...] JSON Lines turns valid JSON-input arrays into separate JSON lines, one for each top-level item. Non-arrays result in a single JSON-line. When not given a filepath to load, standard input is used instead. Every output line is always a single top-level item from the input. ` // errNoMoreOutput is a generic dummy output-error, which is meant to be // ultimately ignored, being just an excuse to quit the app immediately // and successfully var errNoMoreOutput = errors.New(`no more output`) func main() { if len(os.Args) > 1 { switch os.Args[1] { case `-h`, `--h`, `-help`, `--help`: os.Stderr.WriteString(info[1:]) return } } if len(os.Args) > 2 { const msg = "\x1b[31mmultiple inputs not allowed\x1b[0m\n" os.Stderr.WriteString(msg) os.Exit(1) } // figure out whether input should come from a named file or from stdin path := `-` if len(os.Args) > 1 { path = os.Args[1] } err := handleInput(os.Stdout, path) if err != nil && err != io.EOF && err != errNoMoreOutput { os.Stderr.WriteString("\x1b[31m") os.Stderr.WriteString(err.Error()) os.Stderr.WriteString("\x1b[0m\n") os.Exit(1) } } // handleInput simplifies control-flow for func main func handleInput(w io.Writer, path string) error { if path == `-` { return convert(w, os.Stdin) } // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) { // resp, err := http.Get(path) // if err != nil { // return err // } // defer resp.Body.Close() // return convert(w, resp.Body) // } f, err := os.Open(path) if err != nil { // on windows, file-not-found error messages may mention `CreateFile`, // even when trying to open files in read-only mode return errors.New(`can't open file named ` + path) } defer f.Close() return convert(w, f) } // convert simplifies control-flow for func handleInput func convert(w io.Writer, r io.Reader) error { bw := bufio.NewWriter(w) defer bw.Flush() return jsonl(bw, r) } // escapedStringBytes helps func handleString treat all string bytes quickly // and correctly, using their officially-supported JSON escape sequences // // https://www.rfc-editor.org/rfc/rfc8259#section-7 var escapedStringBytes = [256][]byte{ {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, {'\\', 'b'}, {'\\', 't'}, {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, {'\\', 'f'}, {'\\', 'r'}, {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, } // jsonl does it all, given a reader and a writer func jsonl(w *bufio.Writer, r io.Reader) error { dec := json.NewDecoder(r) // avoid parsing numbers, so unusually-long numbers are kept verbatim, // even if JSON parsers aren't required to guarantee such input-fidelity // for numbers dec.UseNumber() t, err := dec.Token() if err == io.EOF { // return errors.New(`input has no JSON values`) return nil } if t == json.Delim('[') { if err := handleTopLevelArray(w, dec); err != nil { return err } } else { if err := handleToken(w, dec, t); err != nil { return err } w.WriteByte('\n') } _, err = dec.Token() if err == io.EOF { // input is over, so it's a success return nil } if err == nil { // a successful `read` is a failure, as it means there are // trailing JSON tokens return errors.New(`unexpected trailing data`) } // any other error, perhaps some invalid-JSON-syntax-type error return err } // handleToken handles recursion for func json2 func handleToken(w *bufio.Writer, dec *json.Decoder, t json.Token) error { switch t := t.(type) { case json.Delim: switch t { case json.Delim('['): return handleArray(w, dec) case json.Delim('{'): return handleObject(w, dec) default: return errors.New(`unsupported JSON syntax ` + string(t)) } case nil: w.WriteString(`null`) return nil case bool: if t { w.WriteString(`true`) } else { w.WriteString(`false`) } return nil case json.Number: w.WriteString(t.String()) return nil case string: return handleString(w, t) default: // return fmt.Errorf(`unsupported token type %T`, t) return errors.New(`invalid JSON token`) } } func handleTopLevelArray(w *bufio.Writer, dec *json.Decoder) error { for i := 0; true; i++ { t, err := dec.Token() if err == io.EOF { return nil } if err != nil { return err } if t == json.Delim(']') { return nil } err = handleToken(w, dec, t) if err != nil { return err } if err := w.WriteByte('\n'); err != nil { return errNoMoreOutput } } // make the compiler happy return nil } // handleArray handles arrays for func handleToken func handleArray(w *bufio.Writer, dec *json.Decoder) error { w.WriteByte('[') for i := 0; true; i++ { t, err := dec.Token() if err == io.EOF { w.WriteByte(']') return nil } if err != nil { return err } if t == json.Delim(']') { w.WriteByte(']') return nil } if i > 0 { _, err := w.WriteString(", ") if err != nil { return errNoMoreOutput } } err = handleToken(w, dec, t) if err != nil { return err } } // make the compiler happy return nil } // handleObject handles objects for func handleToken func handleObject(w *bufio.Writer, dec *json.Decoder) error { w.WriteByte('{') for i := 0; true; i++ { t, err := dec.Token() if err == io.EOF { w.WriteByte('}') return nil } if err != nil { return err } if t == json.Delim('}') { w.WriteByte('}') return nil } if i > 0 { _, err := w.WriteString(", ") if err != nil { return errNoMoreOutput } } k, ok := t.(string) if !ok { return errors.New(`expected a string for a key-value pair`) } err = handleString(w, k) if err != nil { return err } w.WriteString(": ") t, err = dec.Token() if err == io.EOF { return errors.New(`expected a value for a key-value pair`) } err = handleToken(w, dec, t) if err != nil { return err } } // make the compiler happy return nil } // handleString handles strings for func handleToken, and keys for func // handleObject func handleString(w *bufio.Writer, s string) error { w.WriteByte('"') for i := range s { w.Write(escapedStringBytes[s[i]]) } w.WriteByte('"') return nil }