File: j0/errors.go
   1 package main
   2 
   3 import (
   4     "errors"
   5     "io"
   6     "strconv"
   7 )
   8 
   9 var (
  10     errCommentEarlyEnd = errors.New(`unexpected early-end of comment`)
  11     errInputEarlyEnd   = errors.New(`expected end of input data`)
  12     errInvalidComment  = errors.New(`expected / or *`)
  13     errInvalidHex      = errors.New(`expected a base-16 digit`)
  14     errInvalidToken    = errors.New(`invalid JSON token`)
  15     errNoDigits        = errors.New(`expected numeric digits`)
  16     errNoStringQuote   = errors.New(`expected " or '`)
  17     errNoArrayComma    = errors.New(`missing comma between array values`)
  18     errNoObjectComma   = errors.New(`missing comma between key-value pairs`)
  19     errStringEarlyEnd  = errors.New(`unexpected early-end of string`)
  20     errExtraBytes      = errors.New(`unexpected extra input bytes`)
  21 
  22     // errNoMoreOutput is a generic dummy output-error, which is meant to be
  23     // ultimately ignored, being just an excuse to quit the app immediately
  24     // and successfully
  25     errNoMoreOutput = errors.New(`no more output`)
  26 )
  27 
  28 // isActualError is to figure out whether not to ignore an error, and thus
  29 // show it as an error message
  30 func isActualError(err error) bool {
  31     return err != nil && err != io.EOF && err != errNoMoreOutput
  32 }
  33 
  34 // linePosError is a more descriptive kind of error, showing the source of
  35 // the input-related problem, as 1-based a line/pos number pair in front
  36 // of the error message
  37 type linePosError struct {
  38     // line is the 1-based line count from the input
  39     line int
  40 
  41     // pos is the 1-based `horizontal` position in its line
  42     pos int
  43 
  44     // err is the error message to `decorate` with the position info
  45     err error
  46 }
  47 
  48 // Error satisfies the error interface
  49 func (lpe linePosError) Error() string {
  50     where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos)
  51     return where + `: ` + lpe.err.Error()
  52 }

     File: j0/go.mod
   1 module j0
   2 
   3 go 1.18

     File: j0/info.txt
   1 j0 [filepath...]
   2 
   3 Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output.
   4 
   5 Besides minimizing bytes, this tool also adapts almost-JSON input into
   6 valid JSON, since it ignores comments and trailing commas, neither of
   7 which are supported in JSON, but which are still commonly used.
   8 
   9 Output is always a single line, which ends with a line-feed.

     File: j0/json.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "bytes"
   6     "errors"
   7     "io"
   8 )
   9 
  10 // isIdentifier improves control-flow of func handleKey, when it handles
  11 // unquoted object keys
  12 var isIdentifier = [256]bool{
  13     '_': true,
  14 
  15     '0': true, '1': true, '2': true, '3': true, '4': true,
  16     '5': true, '6': true, '7': true, '8': true, '9': true,
  17 
  18     'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true,
  19     'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true,
  20     'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true,
  21     'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
  22     'Y': true, 'Z': true,
  23 
  24     'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true,
  25     'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true,
  26     'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true,
  27     's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
  28     'y': true, 'z': true,
  29 }
  30 
  31 // matchHex both figures out if a byte is a valid ASCII hex-digit, by not
  32 // being 0, and normalizes letter-case for the hex letters
  33 var matchHex = [256]byte{
  34     '0': '0', '1': '1', '2': '2', '3': '3', '4': '4',
  35     '5': '5', '6': '6', '7': '7', '8': '8', '9': '9',
  36     'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F',
  37     'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F',
  38 }
  39 
  40 // escapedStringBytes helps func handleString treat all string bytes quickly
  41 // and correctly, using their officially-supported JSON escape sequences
  42 //
  43 // https://www.rfc-editor.org/rfc/rfc8259#section-7
  44 var escapedStringBytes = [256][]byte{
  45     {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'},
  46     {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'},
  47     {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'},
  48     {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'},
  49     {'\\', 'b'}, {'\\', 't'},
  50     {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'},
  51     {'\\', 'f'}, {'\\', 'r'},
  52     {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'},
  53     {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'},
  54     {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'},
  55     {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'},
  56     {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'},
  57     {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'},
  58     {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'},
  59     {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'},
  60     {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'},
  61     {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39},
  62     {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47},
  63     {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55},
  64     {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63},
  65     {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71},
  66     {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79},
  67     {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87},
  68     {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95},
  69     {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103},
  70     {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111},
  71     {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119},
  72     {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127},
  73     {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135},
  74     {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143},
  75     {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151},
  76     {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159},
  77     {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167},
  78     {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175},
  79     {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183},
  80     {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191},
  81     {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199},
  82     {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207},
  83     {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215},
  84     {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223},
  85     {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231},
  86     {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239},
  87     {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247},
  88     {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255},
  89 }
  90 
  91 // json0 converts JSON/pseudo-JSON into (valid) minimal JSON; this func
  92 // avoids writing a trailing line-feed, leaving that up to its caller
  93 func json0(w *bufio.Writer, r *bufio.Reader) error {
  94     jr := jsonReader{r, 1, 1}
  95 
  96     // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order
  97     // mark) gives no useful info if present, as UTF-8 leaves no ambiguity
  98     // about byte-order by design
  99     jr.skipUTF8BOM()
 100 
 101     // ignore leading whitespace and/or comments
 102     if err := jr.seekNext(); err != nil {
 103         return err
 104     }
 105 
 106     // handle a single top-level JSON value
 107     if err := handleValue(w, &jr); err != nil {
 108         return err
 109     }
 110 
 111     // ignore trailing whitespace and/or comments
 112     if err := jr.seekNext(); err != nil {
 113         return err
 114     }
 115 
 116     // beyond trailing whitespace and/or comments, any more bytes
 117     // make the whole input data invalid JSON
 118     if _, ok := jr.peekByte(); ok {
 119         return jr.improveError(errExtraBytes)
 120     }
 121     return nil
 122 }
 123 
 124 // jsonReader reads data via a buffer, keeping track of the input position:
 125 // this in turn allows showing much more useful errors, when these happen
 126 type jsonReader struct {
 127     // r is the actual reader
 128     r *bufio.Reader
 129 
 130     // line is the 1-based line-counter for input bytes, and gives errors
 131     // useful position info
 132     line int
 133 
 134     // pos is the 1-based `horizontal` position in its line, and gives
 135     // errors useful position info
 136     pos int
 137 }
 138 
 139 // improveError makes any error more useful, by giving it info about the
 140 // current input-position, as a 1-based line/within-line-position pair
 141 func (jr jsonReader) improveError(err error) error {
 142     if _, ok := err.(linePosError); ok {
 143         return err
 144     }
 145 
 146     if err == io.EOF {
 147         return linePosError{jr.line, jr.pos, errInputEarlyEnd}
 148     }
 149     if err != nil {
 150         return linePosError{jr.line, jr.pos, err}
 151     }
 152     return nil
 153 }
 154 
 155 // demandSyntax fails with an error when the next byte isn't the one given;
 156 // when it is, the byte is then read/skipped, and a nil error is returned
 157 func (jr *jsonReader) demandSyntax(syntax byte) error {
 158     chunk, err := jr.r.Peek(1)
 159     if err == io.EOF {
 160         return jr.improveError(errInputEarlyEnd)
 161     }
 162     if err != nil {
 163         return jr.improveError(err)
 164     }
 165 
 166     if len(chunk) < 1 || chunk[0] != syntax {
 167         msg := `expected ` + string(rune(syntax))
 168         return jr.improveError(errors.New(msg))
 169     }
 170 
 171     jr.readByte()
 172     return nil
 173 }
 174 
 175 // updatePosInfo does what it says, given the byte just read separately
 176 func (jr *jsonReader) updatePosInfo(b byte) {
 177     if b == '\n' {
 178         jr.line += 1
 179         jr.pos = 1
 180     } else {
 181         jr.pos++
 182     }
 183 }
 184 
 185 // peekByte simplifies control-flow for various other funcs
 186 func (jr jsonReader) peekByte() (b byte, ok bool) {
 187     chunk, err := jr.r.Peek(1)
 188     if err == nil && len(chunk) >= 1 {
 189         return chunk[0], true
 190     }
 191     return 0, false
 192 }
 193 
 194 // readByte does what it says, updating the reader's position info
 195 func (jr *jsonReader) readByte() (b byte, err error) {
 196     b, err = jr.r.ReadByte()
 197     if err == nil {
 198         jr.updatePosInfo(b)
 199         return b, nil
 200     }
 201     return b, jr.improveError(err)
 202 }
 203 
 204 // seekNext skips/seeks the next token, ignoring runs of whitespace symbols
 205 // and comments, either single-line (starting with //) or general (starting
 206 // with /* and ending with */)
 207 func (jr *jsonReader) seekNext() error {
 208     for {
 209         b, ok := jr.peekByte()
 210         if !ok {
 211             return nil
 212         }
 213 
 214         // case ' ', '\t', '\f', '\v', '\r', '\n':
 215         if b <= 32 {
 216             // keep skipping whitespace bytes
 217             b, _ := jr.readByte()
 218             jr.updatePosInfo(b)
 219             continue
 220         }
 221 
 222         if b != '/' {
 223             // reached the next token
 224             return nil
 225         }
 226 
 227         if err := jr.skipComment(); err != nil {
 228             return err
 229         }
 230 
 231         // after comments, keep looking for more whitespace and/or comments
 232     }
 233 }
 234 
 235 // skipComment helps func seekNext skip over comments, simplifying the latter
 236 // func's control-flow
 237 func (jr *jsonReader) skipComment() error {
 238     err := jr.demandSyntax('/')
 239     if err != nil {
 240         return err
 241     }
 242 
 243     b, ok := jr.peekByte()
 244     if !ok {
 245         return jr.improveError(errInputEarlyEnd)
 246     }
 247 
 248     switch b {
 249     case '/':
 250         // handle single-line comments
 251         return jr.skipLine()
 252 
 253     case '*':
 254         // handle (potentially) multi-line comments
 255         return jr.skipGeneralComment()
 256 
 257     default:
 258         return jr.improveError(errInvalidComment)
 259     }
 260 }
 261 
 262 // skipLine handles single-line comments for func skipComment
 263 func (jr *jsonReader) skipLine() error {
 264     for {
 265         b, err := jr.r.ReadByte()
 266         if err == io.EOF {
 267             // end of input is fine in this case
 268             return nil
 269         }
 270         if err != nil {
 271             return err
 272         }
 273 
 274         jr.updatePosInfo(b)
 275         if b == '\n' {
 276             jr.line++
 277             return nil
 278         }
 279     }
 280 }
 281 
 282 // skipGeneralComment handles (potentially) multi-line comments for func
 283 // skipComment
 284 func (jr *jsonReader) skipGeneralComment() error {
 285     var prev byte
 286     for {
 287         b, err := jr.readByte()
 288         if err != nil {
 289             return jr.improveError(errCommentEarlyEnd)
 290         }
 291 
 292         if prev == '*' && b == '/' {
 293             return nil
 294         }
 295         if b == '\n' {
 296             jr.line++
 297         }
 298         prev = b
 299     }
 300 }
 301 
 302 // skipUTF8BOM does what it says, if a UTF-8 BOM is present
 303 func (jr *jsonReader) skipUTF8BOM() {
 304     lead, err := jr.r.Peek(3)
 305     if err == nil && bytes.HasPrefix(lead, []byte{0xef, 0xbb, 0xbf}) {
 306         jr.readByte()
 307         jr.readByte()
 308         jr.readByte()
 309         jr.pos += 3
 310     }
 311 }
 312 
 313 // outputByte is a small wrapper on func WriteByte, which adapts any error
 314 // into a custom dummy output-error, which is in turn meant to be ignored,
 315 // being just an excuse to quit the app immediately and successfully
 316 func outputByte(w *bufio.Writer, b byte) error {
 317     err := w.WriteByte(b)
 318     if err == nil {
 319         return nil
 320     }
 321     return errNoMoreOutput
 322 }
 323 
 324 // handleArray handles arrays for func handleValue
 325 func handleArray(w *bufio.Writer, jr *jsonReader) error {
 326     if err := jr.demandSyntax('['); err != nil {
 327         return err
 328     }
 329     w.WriteByte('[')
 330 
 331     for n := 0; true; n++ {
 332         // there may be whitespace/comments before the next comma
 333         if err := jr.seekNext(); err != nil {
 334             return err
 335         }
 336 
 337         // handle commas between values, as well as trailing ones
 338         comma := false
 339         b, _ := jr.peekByte()
 340         if b == ',' {
 341             jr.readByte()
 342             comma = true
 343 
 344             // there may be whitespace/comments before an ending ']'
 345             if err := jr.seekNext(); err != nil {
 346                 return err
 347             }
 348             b, _ = jr.peekByte()
 349         }
 350 
 351         // handle end of array
 352         if b == ']' {
 353             jr.readByte()
 354             w.WriteByte(']')
 355             return nil
 356         }
 357 
 358         // don't forget commas between adjacent values
 359         if n > 0 {
 360             if !comma {
 361                 return errNoArrayComma
 362             }
 363             if err := outputByte(w, ','); err != nil {
 364                 return err
 365             }
 366         }
 367 
 368         // handle the next value
 369         if err := jr.seekNext(); err != nil {
 370             return err
 371         }
 372         if err := handleValue(w, jr); err != nil {
 373             return err
 374         }
 375     }
 376 
 377     // make the compiler happy
 378     return nil
 379 }
 380 
 381 // handleDigits helps various number-handling funcs do their job
 382 func handleDigits(w *bufio.Writer, jr *jsonReader) error {
 383     for n := 0; true; n++ {
 384         b, _ := jr.peekByte()
 385 
 386         // support `nice` long numbers by ignoring their underscores
 387         if b == '_' {
 388             jr.readByte()
 389             continue
 390         }
 391 
 392         if '0' <= b && b <= '9' {
 393             jr.readByte()
 394             w.WriteByte(b)
 395             continue
 396         }
 397 
 398         if n == 0 {
 399             return errNoDigits
 400         }
 401         return nil
 402     }
 403 
 404     // make the compiler happy
 405     return nil
 406 }
 407 
 408 // handleDot handles pseudo-JSON numbers which start with a decimal dot
 409 func handleDot(w *bufio.Writer, jr *jsonReader) error {
 410     if err := jr.demandSyntax('.'); err != nil {
 411         return err
 412     }
 413     w.Write([]byte{'0', '.'})
 414     return handleDigits(w, jr)
 415 }
 416 
 417 // handleKey is used by func handleObjects and generalizes func handleString,
 418 // by allowing unquoted object keys; it's not used anywhere else, as allowing
 419 // unquoted string values is ambiguous with actual JSON-keyword values null,
 420 // false, and true.
 421 func handleKey(w *bufio.Writer, jr *jsonReader) error {
 422     quote, ok := jr.peekByte()
 423     if quote == '"' || quote == '\'' {
 424         return handleString(w, jr)
 425     }
 426     if !ok {
 427         return jr.improveError(errStringEarlyEnd)
 428     }
 429 
 430     w.WriteByte('"')
 431     for {
 432         if b, _ := jr.peekByte(); isIdentifier[b] {
 433             jr.readByte()
 434             w.WriteByte(b)
 435             continue
 436         }
 437 
 438         w.WriteByte('"')
 439         return nil
 440     }
 441 }
 442 
 443 // trySimpleInner tries to handle (more quickly) inner-strings where all bytes
 444 // are unescaped ASCII symbols: this is a very common case for strings, and is
 445 // almost always the case for object keys; returns whether it succeeded, so
 446 // this func's caller knows knows if it needs to do anything, the slower way
 447 func trySimpleInner(w *bufio.Writer, jr *jsonReader, quote byte) (gotIt bool) {
 448     chunk, _ := jr.r.Peek(64)
 449 
 450     for i, b := range chunk {
 451         if b < 32 || b > 127 || b == '\\' {
 452             return false
 453         }
 454         if b != quote {
 455             continue
 456         }
 457 
 458         // bulk-writing the chunk is this func's whole point
 459         w.WriteByte('"')
 460         w.Write(chunk[:i])
 461         w.WriteByte('"')
 462 
 463         jr.r.Discard(i + 1)
 464         return true
 465     }
 466 
 467     // maybe the inner-string is ok, but it's just longer than the chunk
 468     return false
 469 }
 470 
 471 // handleKeyword is used by funcs handleFalse, handleNull, and handleTrue
 472 func handleKeyword(w *bufio.Writer, jr *jsonReader, kw []byte) error {
 473     for rest := kw; len(rest) > 0; rest = rest[1:] {
 474         b, err := jr.readByte()
 475         if err == nil && b == rest[0] {
 476             // keywords given to this func have no line-feeds
 477             jr.pos++
 478             continue
 479         }
 480 
 481         msg := `expected JSON value ` + string(kw)
 482         return jr.improveError(errors.New(msg))
 483     }
 484 
 485     w.Write(kw)
 486     return nil
 487 }
 488 
 489 // handleNegative handles numbers starting with a negative sign for func
 490 // handleValue
 491 func handleNegative(w *bufio.Writer, jr *jsonReader) error {
 492     if err := jr.demandSyntax('-'); err != nil {
 493         return err
 494     }
 495 
 496     w.WriteByte('-')
 497     if b, _ := jr.peekByte(); b == '.' {
 498         jr.readByte()
 499         w.Write([]byte{'0', '.'})
 500         return handleDigits(w, jr)
 501     }
 502     return handleNumber(w, jr)
 503 }
 504 
 505 // handleNumber handles numeric values/tokens, including invalid-JSON cases,
 506 // such as values starting with a decimal dot
 507 func handleNumber(w *bufio.Writer, jr *jsonReader) error {
 508     // handle integer digits
 509     if err := handleDigits(w, jr); err != nil {
 510         return err
 511     }
 512 
 513     // handle optional decimal digits, starting with a leading dot
 514     if b, _ := jr.peekByte(); b == '.' {
 515         jr.readByte()
 516         w.WriteByte('.')
 517         return handleDigits(w, jr)
 518     }
 519     return nil
 520 }
 521 
 522 // handleObject handles objects for func handleValue
 523 func handleObject(w *bufio.Writer, jr *jsonReader) error {
 524     if err := jr.demandSyntax('{'); err != nil {
 525         return err
 526     }
 527     w.WriteByte('{')
 528 
 529     for npairs := 0; true; npairs++ {
 530         // there may be whitespace/comments before the next comma
 531         if err := jr.seekNext(); err != nil {
 532             return err
 533         }
 534 
 535         // handle commas between key-value pairs, as well as trailing ones
 536         comma := false
 537         b, _ := jr.peekByte()
 538         if b == ',' {
 539             jr.readByte()
 540             comma = true
 541 
 542             // there may be whitespace/comments before an ending '}'
 543             if err := jr.seekNext(); err != nil {
 544                 return err
 545             }
 546             b, _ = jr.peekByte()
 547         }
 548 
 549         // handle end of object
 550         if b == '}' {
 551             jr.readByte()
 552             w.WriteByte('}')
 553             return nil
 554         }
 555 
 556         // don't forget commas between adjacent key-value pairs
 557         if npairs > 0 {
 558             if !comma {
 559                 return errNoObjectComma
 560             }
 561             if err := outputByte(w, ','); err != nil {
 562                 return err
 563             }
 564         }
 565 
 566         // handle the next pair's key
 567         if err := jr.seekNext(); err != nil {
 568             return err
 569         }
 570         if err := handleKey(w, jr); err != nil {
 571             return err
 572         }
 573 
 574         // demand a colon right after the key
 575         if err := jr.seekNext(); err != nil {
 576             return err
 577         }
 578         if err := jr.demandSyntax(':'); err != nil {
 579             return err
 580         }
 581         w.WriteByte(':')
 582 
 583         // handle the next pair's value
 584         if err := jr.seekNext(); err != nil {
 585             return err
 586         }
 587         if err := handleValue(w, jr); err != nil {
 588             return err
 589         }
 590     }
 591 
 592     // make the compiler happy
 593     return nil
 594 }
 595 
 596 // handlePositive handles numbers starting with a positive sign for func
 597 // handleValue
 598 func handlePositive(w *bufio.Writer, jr *jsonReader) error {
 599     if err := jr.demandSyntax('+'); err != nil {
 600         return err
 601     }
 602 
 603     // valid JSON isn't supposed to have leading pluses on numbers, so
 604     // emit nothing for it, unlike for negative numbers
 605 
 606     if b, _ := jr.peekByte(); b == '.' {
 607         jr.readByte()
 608         w.Write([]byte{'0', '.'})
 609         return handleDigits(w, jr)
 610     }
 611     return handleNumber(w, jr)
 612 }
 613 
 614 // handleString handles strings for funcs handleValue and handleObject, and
 615 // supports both single-quotes and double-quotes, always emitting the latter
 616 // in the output, of course
 617 func handleString(w *bufio.Writer, jr *jsonReader) error {
 618     quote, ok := jr.peekByte()
 619     if !ok || (quote != '"' && quote != '\'') {
 620         return errNoStringQuote
 621     }
 622 
 623     jr.readByte()
 624     // try the quicker all-unescaped-ASCII handler
 625     if trySimpleInner(w, jr, quote) {
 626         return nil
 627     }
 628 
 629     // it's a non-trivial inner-string, so handle it byte-by-byte
 630     w.WriteByte('"')
 631     escaped := false
 632 
 633     for {
 634         b, err := jr.r.ReadByte()
 635         if err != nil {
 636             if err == io.EOF {
 637                 return jr.improveError(errStringEarlyEnd)
 638             }
 639             return jr.improveError(err)
 640         }
 641 
 642         if !escaped {
 643             if b == '\\' {
 644                 escaped = true
 645                 continue
 646             }
 647 
 648             // handle end of string
 649             if b == quote {
 650                 return outputByte(w, '"')
 651             }
 652 
 653             w.Write(escapedStringBytes[b])
 654             jr.updatePosInfo(b)
 655             continue
 656         }
 657 
 658         // handle escaped items
 659         escaped = false
 660 
 661         switch b {
 662         case 'u':
 663             // \u needs exactly 4 hex-digits to follow it
 664             w.Write([]byte{'\\', 'u'})
 665             if err := copyHex(w, 4, jr); err != nil {
 666                 return jr.improveError(err)
 667             }
 668 
 669         case 'x':
 670             // JSON only supports 4 escaped hex-digits, so pad the 2
 671             // expected hex-digits with 2 zeros
 672             w.Write([]byte{'\\', 'u', '0', '0'})
 673             if err := copyHex(w, 2, jr); err != nil {
 674                 return jr.improveError(err)
 675             }
 676 
 677         case 't', 'f', 'r', 'n', 'b', '\\', '"':
 678             // handle valid-JSON escaped string sequences
 679             w.WriteByte('\\')
 680             w.WriteByte(b)
 681 
 682         // case '\'':
 683         //  // escaped single-quotes aren't standard JSON, but they can
 684         //  // be handy when the input uses non-standard single-quoted
 685         //  // strings
 686         //  w.WriteByte('\'')
 687 
 688         default:
 689             // return jr.decorateError(unexpectedByte{b})
 690             w.Write(escapedStringBytes[b])
 691         }
 692     }
 693 }
 694 
 695 // copyHex handles a run of hex-digits for func handleString, starting right
 696 // after the leading `\u` (or `\x`) part; this func doesn't `improve` its
 697 // errors with position info: that's up to the caller
 698 func copyHex(w *bufio.Writer, n int, jr *jsonReader) error {
 699     for i := 0; i < n; i++ {
 700         b, err := jr.r.ReadByte()
 701         if err == io.EOF {
 702             return errStringEarlyEnd
 703         }
 704         if err != nil {
 705             return err
 706         }
 707 
 708         jr.updatePosInfo(b)
 709 
 710         if b := matchHex[b]; b != 0 {
 711             w.WriteByte(b)
 712             continue
 713         }
 714 
 715         return errInvalidHex
 716     }
 717 
 718     return nil
 719 }
 720 
 721 // handleValue is a generic JSON-token handler, which allows the recursive
 722 // behavior to handle any kind of JSON/pseudo-JSON input
 723 func handleValue(w *bufio.Writer, jr *jsonReader) error {
 724     chunk, err := jr.r.Peek(1)
 725     if err == nil && len(chunk) >= 1 {
 726         return handleValueDispatch(w, jr, chunk[0])
 727     }
 728 
 729     if err == io.EOF {
 730         return jr.improveError(errInputEarlyEnd)
 731     }
 732     return jr.improveError(errInputEarlyEnd)
 733 }
 734 
 735 // handleValueDispatch simplifies control-flow for func handleValue
 736 func handleValueDispatch(w *bufio.Writer, jr *jsonReader, b byte) error {
 737     switch b {
 738     case 'f':
 739         return handleKeyword(w, jr, []byte{'f', 'a', 'l', 's', 'e'})
 740     case 'n':
 741         return handleKeyword(w, jr, []byte{'n', 'u', 'l', 'l'})
 742     case 't':
 743         return handleKeyword(w, jr, []byte{'t', 'r', 'u', 'e'})
 744     case '.':
 745         return handleDot(w, jr)
 746     case '+':
 747         return handlePositive(w, jr)
 748     case '-':
 749         return handleNegative(w, jr)
 750     case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 751         return handleNumber(w, jr)
 752     case '\'', '"':
 753         return handleString(w, jr)
 754     case '[':
 755         return handleArray(w, jr)
 756     case '{':
 757         return handleObject(w, jr)
 758     default:
 759         return jr.improveError(errInvalidToken)
 760     }
 761 }

     File: j0/json_test.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "bytes"
   6     "strings"
   7     "testing"
   8 )
   9 
  10 func TestJSON0(t *testing.T) {
  11     var tests = []struct {
  12         Input    string
  13         Expected string
  14     }{
  15         {`false`, `false`},
  16         {`null`, `null`},
  17         {`  true  `, `true`},
  18 
  19         {`0`, `0`},
  20         {`1`, `1`},
  21         {`2`, `2`},
  22         {`3`, `3`},
  23         {`4`, `4`},
  24         {`5`, `5`},
  25         {`6`, `6`},
  26         {`7`, `7`},
  27         {`8`, `8`},
  28         {`9`, `9`},
  29 
  30         {`  .345`, `0.345`},
  31         {` -.345`, `-0.345`},
  32         {` +.345`, `0.345`},
  33         {` +123.345`, `123.345`},
  34         {` +.345`, `0.345`},
  35         {` 123.34523`, `123.34523`},
  36         {` 123.34_523`, `123.34523`},
  37         {` 123_456.123`, `123456.123`},
  38 
  39         {`""`, `""`},
  40         {`''`, `""`},
  41         {`"\""`, `"\""`},
  42         {`'\"'`, `"\""`},
  43         {`'\''`, `"'"`},
  44         {`'abc\u0e9A'`, `"abc\u0E9A"`},
  45         {`'abc\x1f[0m'`, `"abc\u001F[0m"`},
  46 
  47         {`[  ]`, `[]`},
  48         {`[ , ]`, `[]`},
  49         {`[.345, false,null , ]`, `[0.345,false,null]`},
  50 
  51         {`{  }`, `{}`},
  52         {`{ , }`, `{}`},
  53 
  54         {
  55             `{ 'abc': .345, "def"  : false, 'xyz':null , }`,
  56             `{"abc":0.345,"def":false,"xyz":null}`,
  57         },
  58 
  59         {`{0problems:123,}`, `{"0problems":123}`},
  60         {`{0_problems:123}`, `{"0_problems":123}`},
  61     }
  62 
  63     for _, tc := range tests {
  64         t.Run(tc.Input, func(t *testing.T) {
  65             var out strings.Builder
  66             w := bufio.NewWriter(&out)
  67             r := bufio.NewReader(strings.NewReader(tc.Input))
  68             if err := json0(w, r); isActualError(err) {
  69                 t.Fatal(err)
  70                 return
  71             }
  72             // don't forget to flush the buffer, or output will be empty
  73             w.Flush()
  74 
  75             s := out.String()
  76             if s != tc.Expected {
  77                 t.Fatalf("<got>\n%s\n<expected>\n%s", s, tc.Expected)
  78                 return
  79             }
  80         })
  81     }
  82 }
  83 
  84 func TestEscapedStringBytes(t *testing.T) {
  85     var escaped = map[rune][]byte{
  86         '\x00': {'\\', 'u', '0', '0', '0', '0'},
  87         '\x01': {'\\', 'u', '0', '0', '0', '1'},
  88         '\x02': {'\\', 'u', '0', '0', '0', '2'},
  89         '\x03': {'\\', 'u', '0', '0', '0', '3'},
  90         '\x04': {'\\', 'u', '0', '0', '0', '4'},
  91         '\x05': {'\\', 'u', '0', '0', '0', '5'},
  92         '\x06': {'\\', 'u', '0', '0', '0', '6'},
  93         '\x07': {'\\', 'u', '0', '0', '0', '7'},
  94         '\x0b': {'\\', 'u', '0', '0', '0', 'b'},
  95         '\x0e': {'\\', 'u', '0', '0', '0', 'e'},
  96         '\x0f': {'\\', 'u', '0', '0', '0', 'f'},
  97         '\x10': {'\\', 'u', '0', '0', '1', '0'},
  98         '\x11': {'\\', 'u', '0', '0', '1', '1'},
  99         '\x12': {'\\', 'u', '0', '0', '1', '2'},
 100         '\x13': {'\\', 'u', '0', '0', '1', '3'},
 101         '\x14': {'\\', 'u', '0', '0', '1', '4'},
 102         '\x15': {'\\', 'u', '0', '0', '1', '5'},
 103         '\x16': {'\\', 'u', '0', '0', '1', '6'},
 104         '\x17': {'\\', 'u', '0', '0', '1', '7'},
 105         '\x18': {'\\', 'u', '0', '0', '1', '8'},
 106         '\x19': {'\\', 'u', '0', '0', '1', '9'},
 107         '\x1a': {'\\', 'u', '0', '0', '1', 'a'},
 108         '\x1b': {'\\', 'u', '0', '0', '1', 'b'},
 109         '\x1c': {'\\', 'u', '0', '0', '1', 'c'},
 110         '\x1d': {'\\', 'u', '0', '0', '1', 'd'},
 111         '\x1e': {'\\', 'u', '0', '0', '1', 'e'},
 112         '\x1f': {'\\', 'u', '0', '0', '1', 'f'},
 113 
 114         '\t': {'\\', 't'},
 115         '\f': {'\\', 'f'},
 116         '\b': {'\\', 'b'},
 117         '\r': {'\\', 'r'},
 118         '\n': {'\\', 'n'},
 119         '\\': {'\\', '\\'},
 120         '"':  {'\\', '"'},
 121     }
 122 
 123     if n := len(escapedStringBytes); n != 256 {
 124         t.Fatalf(`expected 256 entries, instead of %d`, n)
 125         return
 126     }
 127 
 128     for i, v := range escapedStringBytes {
 129         exp := []byte{byte(i)}
 130         if esc, ok := escaped[rune(i)]; ok {
 131             exp = esc
 132         }
 133 
 134         if !bytes.Equal(v, exp) {
 135             t.Fatalf("%d: expected %#v, got %#v", i, exp, v)
 136             return
 137         }
 138     }
 139 }

     File: j0/main.go
   1 package main
   2 
   3 import (
   4     "bufio"
   5     "errors"
   6     "io"
   7     "os"
   8 
   9     _ "embed"
  10 )
  11 
  12 // Note: the code is avoiding using the fmt package to save hundreds of
  13 // kilobytes on the resulting executable, which is a noticeable difference.
  14 
  15 //go:embed info.txt
  16 var info string
  17 
  18 func main() {
  19     if len(os.Args) > 1 {
  20         switch os.Args[1] {
  21         case `-h`, `--h`, `-help`, `--help`:
  22             os.Stderr.WriteString(info)
  23             return
  24         }
  25     }
  26 
  27     if len(os.Args) > 2 {
  28         const msg = "\x1b[31mmultiple inputs not allowed\x1b[0m\n"
  29         os.Stderr.WriteString(msg)
  30         os.Exit(1)
  31     }
  32 
  33     // figure out whether input should come from a named file or from stdin
  34     path := `-`
  35     if len(os.Args) > 1 {
  36         path = os.Args[1]
  37     }
  38 
  39     if err := handleInput(os.Stdout, path); isActualError(err) {
  40         os.Stderr.WriteString("\x1b[31m")
  41         os.Stderr.WriteString(err.Error())
  42         os.Stderr.WriteString("\x1b[0m\n")
  43         os.Exit(1)
  44     }
  45 }
  46 
  47 // handleInput simplifies control-flow for func main
  48 func handleInput(w io.Writer, path string) error {
  49     // pro, _ := os.Create(`j0.prof`)
  50     // defer pro.Close()
  51     // pprof.StartCPUProfile(pro)
  52     // defer pprof.StopCPUProfile()
  53 
  54     if path == `-` {
  55         return convert(w, os.Stdin)
  56     }
  57 
  58     // if f := strings.HasPrefix; f(path, `https://`) || f(path, `http://`) {
  59     //  resp, err := http.Get(path)
  60     //  if err != nil {
  61     //      return err
  62     //  }
  63     //  defer resp.Body.Close()
  64     //  return convert(w, resp.Body)
  65     // }
  66 
  67     f, err := os.Open(path)
  68     if err != nil {
  69         // on windows, file-not-found error messages may mention `CreateFile`,
  70         // even when trying to open files in read-only mode
  71         return errors.New(`can't open file named ` + path)
  72     }
  73     defer f.Close()
  74     return convert(w, f)
  75 }
  76 
  77 // convert simplifies control-flow for func handleInput
  78 func convert(w io.Writer, r io.Reader) error {
  79     const bufSize = 16 * 1024
  80     bw := bufio.NewWriterSize(w, bufSize)
  81     br := bufio.NewReaderSize(r, bufSize)
  82     defer bw.Flush()
  83 
  84     err := json0(bw, br)
  85     // end the only output-line with a line-feed; this also avoids showing
  86     // error messages on the same line as the main output, since JSON-0
  87     // output has no line-feeds before its last byte
  88     bw.WriteByte('\n')
  89     return err
  90 }

     File: j0/mit-license.txt
   1 The MIT License (MIT)
   2 
   3 Copyright © 2024 pacman64
   4 
   5 Permission is hereby granted, free of charge, to any person obtaining a copy of
   6 this software and associated documentation files (the “Software”), to deal
   7 in the Software without restriction, including without limitation the rights to
   8 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   9 of the Software, and to permit persons to whom the Software is furnished to do
  10 so, subject to the following conditions:
  11 
  12 The above copyright notice and this permission notice shall be included in all
  13 copies or substantial portions of the Software.
  14 
  15 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21 SOFTWARE.