File: json0.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Single-file source-code for json0: this version has no http(s) support. Even 27 the unit-tests from the original json0 are omitted. 28 29 To compile a smaller-sized command-line app, you can use the `go` command as 30 follows: 31 32 go build -ldflags "-s -w" -trimpath json0.go 33 */ 34 35 package main 36 37 import ( 38 "bufio" 39 "bytes" 40 "errors" 41 "io" 42 "os" 43 "strconv" 44 "unicode" 45 ) 46 47 const info = ` 48 json0 [options...] [file...] 49 50 51 JSON-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output. 52 Its output is always a single line, which ends with a line-feed. 53 54 Besides minimizing bytes, this tool also adapts almost-JSON input into 55 valid JSON, since it 56 57 - ignores both rest-of-line and multi-line comments 58 - ignores extra/trailing commas in arrays and objects 59 - turns single-quoted strings/keys into double-quoted strings 60 - double-quotes unquoted object keys 61 - changes \x 2-hex-digit into \u 4-hex-digit string-escapes 62 63 All options available can either start with a single or a double-dash 64 65 -h show this help message 66 -help show this help message 67 -jsonl emit JSON Lines, when top-level value is an array 68 ` 69 70 const ( 71 bufSize = 32 * 1024 72 ) 73 74 func main() { 75 args := os.Args[1:] 76 handler := json0 77 78 if len(args) > 0 { 79 switch os.Args[1] { 80 case `-h`, `--h`, `-help`, `--help`: 81 os.Stdout.WriteString(info[1:]) 82 return 83 case `-jsonl`, `--jsonl`: 84 handler = jsonl 85 args = args[1:] 86 } 87 } 88 89 if len(args) > 1 { 90 const msg = "only 1 (optional) named input is supported\n" 91 os.Stderr.WriteString(msg) 92 os.Exit(1) 93 } 94 95 name := `-` 96 if len(args) == 1 { 97 name = args[0] 98 } 99 100 if err := run(os.Stdout, name, handler); isActualError(err) { 101 os.Stderr.WriteString(err.Error()) 102 os.Stderr.WriteString("\n") 103 os.Exit(1) 104 } 105 } 106 107 type handlerFunc func(w *bufio.Writer, r *bufio.Reader) error 108 109 func run(w io.Writer, name string, handler handlerFunc) error { 110 if name == `` || name == `-` { 111 bw := bufio.NewWriterSize(w, bufSize) 112 br := bufio.NewReaderSize(os.Stdin, bufSize) 113 defer bw.Flush() 114 return handler(bw, br) 115 } 116 117 f, err := os.Open(name) 118 if err != nil { 119 return errors.New(`can't read from file named "` + name + `"`) 120 } 121 defer f.Close() 122 123 bw := bufio.NewWriterSize(w, bufSize) 124 br := bufio.NewReaderSize(f, bufSize) 125 defer bw.Flush() 126 return handler(bw, br) 127 } 128 129 var ( 130 errCommentEarlyEnd = errors.New(`unexpected early-end of comment`) 131 errInputEarlyEnd = errors.New(`expected end of input data`) 132 errInvalidComment = errors.New(`expected / or *`) 133 errInvalidHex = errors.New(`expected a base-16 digit`) 134 errInvalidRune = errors.New(`invalid UTF-8 bytes`) 135 errInvalidToken = errors.New(`invalid JSON token`) 136 errNoDigits = errors.New(`expected numeric digits`) 137 errNoStringQuote = errors.New(`expected " or '`) 138 errNoArrayComma = errors.New(`missing comma between array values`) 139 errNoObjectComma = errors.New(`missing comma between key-value pairs`) 140 errStringEarlyEnd = errors.New(`unexpected early-end of string`) 141 errExtraBytes = errors.New(`unexpected extra input bytes`) 142 143 // errNoMoreOutput is a generic dummy output-error, which is meant to be 144 // ultimately ignored, being just an excuse to quit the app immediately 145 // and successfully 146 errNoMoreOutput = errors.New(`no more output`) 147 ) 148 149 // isActualError is to figure out whether not to ignore an error, and thus 150 // show it as an error message 151 func isActualError(err error) bool { 152 return err != nil && err != io.EOF && err != errNoMoreOutput 153 } 154 155 // linePosError is a more descriptive kind of error, showing the source of 156 // the input-related problem, as 1-based a line/pos number pair in front 157 // of the error message 158 type linePosError struct { 159 // line is the 1-based line count from the input 160 line int 161 162 // pos is the 1-based `horizontal` position in its line 163 pos int 164 165 // err is the error message to `decorate` with the position info 166 err error 167 } 168 169 // Error satisfies the error interface 170 func (lpe linePosError) Error() string { 171 where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos) 172 return where + `: ` + lpe.err.Error() 173 } 174 175 // isIdentifier improves control-flow of func handleKey, when it handles 176 // unquoted object keys 177 var isIdentifier = [256]bool{ 178 '_': true, 179 180 '0': true, '1': true, '2': true, '3': true, '4': true, 181 '5': true, '6': true, '7': true, '8': true, '9': true, 182 183 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 184 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 185 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 186 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 187 'Y': true, 'Z': true, 188 189 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 190 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 191 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 192 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 193 'y': true, 'z': true, 194 } 195 196 // matchHex both figures out if a byte is a valid ASCII hex-digit, by not 197 // being 0, and normalizes letter-case for the hex letters 198 var matchHex = [256]byte{ 199 '0': '0', '1': '1', '2': '2', '3': '3', '4': '4', 200 '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', 201 'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F', 202 'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F', 203 } 204 205 // json0 converts JSON/pseudo-JSON into (valid) minimal JSON 206 func json0(w *bufio.Writer, r *bufio.Reader) error { 207 jr := jsonReader{r, 1, 1} 208 defer w.Flush() 209 210 if err := jr.handleLeadingJunk(); err != nil { 211 return err 212 } 213 214 // handle a single top-level JSON value 215 err := handleValue(w, &jr) 216 217 // end the only output-line with a line-feed; this also avoids showing 218 // error messages on the same line as the main output, since JSON-0 219 // output has no line-feeds before its last byte 220 outputByte(w, '\n') 221 222 if err != nil { 223 return err 224 } 225 return jr.handleTrailingJunk() 226 } 227 228 // jsonl converts JSON/pseudo-JSON into (valid) minimal JSON Lines; this func 229 // avoids writing a trailing line-feed, leaving that up to its caller 230 func jsonl(w *bufio.Writer, r *bufio.Reader) error { 231 jr := jsonReader{r, 1, 1} 232 233 if err := jr.handleLeadingJunk(); err != nil { 234 return err 235 } 236 237 chunk, err := jr.r.Peek(1) 238 if err == nil && len(chunk) >= 1 && chunk[0] == '[' { 239 return handleArrayJSONL(w, &jr) 240 } 241 242 // handle a single top-level JSON value 243 err = handleValue(w, &jr) 244 245 // end the only output-line with a line-feed; this also avoids showing 246 // error messages on the same line as the main output, since JSON-0 247 // output has no line-feeds before its last byte 248 outputByte(w, '\n') 249 250 if err != nil { 251 return err 252 } 253 return jr.handleTrailingJunk() 254 } 255 256 // handleArrayJSONL handles top-level arrays for func jsonl 257 func handleArrayJSONL(w *bufio.Writer, jr *jsonReader) error { 258 if err := jr.demandSyntax('['); err != nil { 259 return err 260 } 261 262 for n := 0; true; n++ { 263 // there may be whitespace/comments before the next comma 264 if err := jr.seekNext(); err != nil { 265 return err 266 } 267 268 // handle commas between values, as well as trailing ones 269 comma := false 270 b, _ := jr.peekByte() 271 if b == ',' { 272 jr.readByte() 273 comma = true 274 275 // there may be whitespace/comments before an ending ']' 276 if err := jr.seekNext(); err != nil { 277 return err 278 } 279 b, _ = jr.peekByte() 280 } 281 282 // handle end of array 283 if b == ']' { 284 jr.readByte() 285 if n > 0 { 286 err := outputByte(w, '\n') 287 w.Flush() 288 return err 289 } 290 return nil 291 } 292 293 // turn commas between adjacent values into line-feeds, as the 294 // output for this custom func is supposed to be JSON Lines 295 if n > 0 { 296 if !comma { 297 return errNoArrayComma 298 } 299 if err := outputByte(w, '\n'); err != nil { 300 return err 301 } 302 w.Flush() 303 } 304 305 // handle the next value 306 if err := jr.seekNext(); err != nil { 307 return err 308 } 309 if err := handleValue(w, jr); err != nil { 310 return err 311 } 312 } 313 314 // make the compiler happy 315 return nil 316 } 317 318 // jsonReader reads data via a buffer, keeping track of the input position: 319 // this in turn allows showing much more useful errors, when these happen 320 type jsonReader struct { 321 // r is the actual reader 322 r *bufio.Reader 323 324 // line is the 1-based line-counter for input bytes, and gives errors 325 // useful position info 326 line int 327 328 // pos is the 1-based `horizontal` position in its line, and gives 329 // errors useful position info 330 pos int 331 } 332 333 // improveError makes any error more useful, by giving it info about the 334 // current input-position, as a 1-based line/within-line-position pair 335 func (jr jsonReader) improveError(err error) error { 336 if _, ok := err.(linePosError); ok { 337 return err 338 } 339 340 if err == io.EOF { 341 return linePosError{jr.line, jr.pos, errInputEarlyEnd} 342 } 343 if err != nil { 344 return linePosError{jr.line, jr.pos, err} 345 } 346 return nil 347 } 348 349 func (jr *jsonReader) handleLeadingJunk() error { 350 // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order 351 // mark) gives no useful info if present, as UTF-8 leaves no ambiguity 352 // about byte-order by design 353 jr.skipUTF8BOM() 354 355 // ignore leading whitespace and/or comments 356 return jr.seekNext() 357 } 358 359 func (jr *jsonReader) handleTrailingJunk() error { 360 // ignore trailing whitespace and/or comments 361 if err := jr.seekNext(); err != nil { 362 return err 363 } 364 365 // ignore trailing semicolon 366 if b, ok := jr.peekByte(); ok && b == ';' { 367 jr.readByte() 368 // ignore trailing whitespace and/or comments 369 if err := jr.seekNext(); err != nil { 370 return err 371 } 372 } 373 374 // beyond trailing whitespace and/or comments, any more bytes 375 // make the whole input data invalid JSON 376 if _, ok := jr.peekByte(); ok { 377 return jr.improveError(errExtraBytes) 378 } 379 return nil 380 } 381 382 // demandSyntax fails with an error when the next byte isn't the one given; 383 // when it is, the byte is then read/skipped, and a nil error is returned 384 func (jr *jsonReader) demandSyntax(syntax byte) error { 385 chunk, err := jr.r.Peek(1) 386 if err == io.EOF { 387 return jr.improveError(errInputEarlyEnd) 388 } 389 if err != nil { 390 return jr.improveError(err) 391 } 392 393 if len(chunk) < 1 || chunk[0] != syntax { 394 msg := `expected ` + string(rune(syntax)) 395 return jr.improveError(errors.New(msg)) 396 } 397 398 jr.readByte() 399 return nil 400 } 401 402 // updatePosInfo does what it says, given the byte just read separately 403 func (jr *jsonReader) updatePosInfo(r rune) { 404 if r == '\n' { 405 jr.line += 1 406 jr.pos = 1 407 } else { 408 jr.pos++ 409 } 410 } 411 412 // peekByte simplifies control-flow for various other funcs 413 func (jr jsonReader) peekByte() (b byte, ok bool) { 414 chunk, err := jr.r.Peek(1) 415 if err == nil && len(chunk) >= 1 { 416 return chunk[0], true 417 } 418 return 0, false 419 } 420 421 // readByte does what it says, updating the reader's position info 422 func (jr *jsonReader) readByte() (b byte, err error) { 423 b, err = jr.r.ReadByte() 424 if err == nil { 425 jr.updatePosInfo(rune(b)) 426 return b, nil 427 } 428 return b, jr.improveError(err) 429 } 430 431 // readRune does what it says, updating the reader's position info 432 func (jr *jsonReader) readRune() (r rune, err error) { 433 r, _, err = jr.r.ReadRune() 434 if err == nil { 435 jr.updatePosInfo(r) 436 return r, nil 437 } 438 return r, jr.improveError(err) 439 } 440 441 // seekNext skips/seeks the next token, ignoring runs of whitespace symbols 442 // and comments, either single-line (starting with //) or general (starting 443 // with /* and ending with */) 444 func (jr *jsonReader) seekNext() error { 445 for { 446 b, ok := jr.peekByte() 447 if !ok { 448 return nil 449 } 450 451 // case ' ', '\t', '\f', '\v', '\r', '\n': 452 if b <= 32 { 453 // keep skipping whitespace bytes 454 jr.readByte() 455 continue 456 } 457 458 if b == '#' { 459 if err := jr.skipLine(); err != nil { 460 return err 461 } 462 continue 463 } 464 465 if b != '/' { 466 // reached the next token 467 return nil 468 } 469 470 if err := jr.skipComment(); err != nil { 471 return err 472 } 473 474 // after comments, keep looking for more whitespace and/or comments 475 } 476 } 477 478 // skipComment helps func seekNext skip over comments, simplifying the latter 479 // func's control-flow 480 func (jr *jsonReader) skipComment() error { 481 err := jr.demandSyntax('/') 482 if err != nil { 483 return err 484 } 485 486 b, ok := jr.peekByte() 487 if !ok { 488 return nil 489 } 490 491 switch b { 492 case '/': 493 // handle single-line comments 494 return jr.skipLine() 495 496 case '*': 497 // handle (potentially) multi-line comments 498 return jr.skipGeneralComment() 499 500 default: 501 return jr.improveError(errInvalidComment) 502 } 503 } 504 505 // skipLine handles single-line comments for func skipComment 506 func (jr *jsonReader) skipLine() error { 507 for { 508 b, err := jr.readByte() 509 if err == io.EOF { 510 // end of input is fine in this case 511 return nil 512 } 513 if err != nil { 514 return err 515 } 516 517 if b == '\n' { 518 return nil 519 } 520 } 521 } 522 523 // skipGeneralComment handles (potentially) multi-line comments for func 524 // skipComment 525 func (jr *jsonReader) skipGeneralComment() error { 526 var prev byte 527 for { 528 b, err := jr.readByte() 529 if err != nil { 530 return jr.improveError(errCommentEarlyEnd) 531 } 532 533 if prev == '*' && b == '/' { 534 return nil 535 } 536 if b == '\n' { 537 jr.line++ 538 } 539 prev = b 540 } 541 } 542 543 // skipUTF8BOM does what it says, if a UTF-8 BOM is present 544 func (jr *jsonReader) skipUTF8BOM() { 545 lead, err := jr.r.Peek(3) 546 if err == nil && bytes.HasPrefix(lead, []byte{0xef, 0xbb, 0xbf}) { 547 jr.readByte() 548 jr.readByte() 549 jr.readByte() 550 jr.pos += 3 551 } 552 } 553 554 // outputByte is a small wrapper on func WriteByte, which adapts any error 555 // into a custom dummy output-error, which is in turn meant to be ignored, 556 // being just an excuse to quit the app immediately and successfully 557 func outputByte(w *bufio.Writer, b byte) error { 558 err := w.WriteByte(b) 559 if err == nil { 560 return nil 561 } 562 return errNoMoreOutput 563 } 564 565 // handleArray handles arrays for func handleValue 566 func handleArray(w *bufio.Writer, jr *jsonReader) error { 567 if err := jr.demandSyntax('['); err != nil { 568 return err 569 } 570 w.WriteByte('[') 571 572 for n := 0; true; n++ { 573 // there may be whitespace/comments before the next comma 574 if err := jr.seekNext(); err != nil { 575 return err 576 } 577 578 // handle commas between values, as well as trailing ones 579 comma := false 580 b, _ := jr.peekByte() 581 if b == ',' { 582 jr.readByte() 583 comma = true 584 585 // there may be whitespace/comments before an ending ']' 586 if err := jr.seekNext(); err != nil { 587 return err 588 } 589 b, _ = jr.peekByte() 590 } 591 592 // handle end of array 593 if b == ']' { 594 jr.readByte() 595 w.WriteByte(']') 596 return nil 597 } 598 599 // don't forget commas between adjacent values 600 if n > 0 { 601 if !comma { 602 return errNoArrayComma 603 } 604 if err := outputByte(w, ','); err != nil { 605 return err 606 } 607 } 608 609 // handle the next value 610 if err := jr.seekNext(); err != nil { 611 return err 612 } 613 if err := handleValue(w, jr); err != nil { 614 return err 615 } 616 } 617 618 // make the compiler happy 619 return nil 620 } 621 622 // handleDigits helps various number-handling funcs do their job 623 func handleDigits(w *bufio.Writer, jr *jsonReader) error { 624 for n := 0; true; n++ { 625 b, _ := jr.peekByte() 626 627 // support `nice` long numbers by ignoring their underscores 628 if b == '_' { 629 jr.readByte() 630 continue 631 } 632 633 if '0' <= b && b <= '9' { 634 jr.readByte() 635 w.WriteByte(b) 636 continue 637 } 638 639 if n == 0 { 640 return errNoDigits 641 } 642 return nil 643 } 644 645 // make the compiler happy 646 return nil 647 } 648 649 // handleDot handles pseudo-JSON numbers which start with a decimal dot 650 func handleDot(w *bufio.Writer, jr *jsonReader) error { 651 if err := jr.demandSyntax('.'); err != nil { 652 return err 653 } 654 w.Write([]byte{'0', '.'}) 655 return handleDigits(w, jr) 656 } 657 658 // handleKey is used by func handleObjects and generalizes func handleString, 659 // by allowing unquoted object keys; it's not used anywhere else, as allowing 660 // unquoted string values is ambiguous with actual JSON-keyword values null, 661 // false, and true. 662 func handleKey(w *bufio.Writer, jr *jsonReader) error { 663 quote, ok := jr.peekByte() 664 if !ok { 665 return jr.improveError(errStringEarlyEnd) 666 } 667 668 if quote == '"' || quote == '\'' { 669 return handleString(w, jr) 670 } 671 672 w.WriteByte('"') 673 for { 674 if b, _ := jr.peekByte(); isIdentifier[b] { 675 jr.readByte() 676 w.WriteByte(b) 677 continue 678 } 679 680 w.WriteByte('"') 681 return nil 682 } 683 } 684 685 // trySimpleInner tries to handle (more quickly) inner-strings where all bytes 686 // are unescaped ASCII symbols: this is a very common case for strings, and is 687 // almost always the case for object keys; returns whether it succeeded, so 688 // this func's caller knows knows if it needs to do anything, the slower way 689 func trySimpleInner(w *bufio.Writer, jr *jsonReader, quote byte) (gotIt bool) { 690 chunk, _ := jr.r.Peek(64) 691 692 for i, b := range chunk { 693 if b < 32 || b > 127 || b == '\\' { 694 return false 695 } 696 if b != quote { 697 continue 698 } 699 700 // bulk-writing the chunk is this func's whole point 701 w.WriteByte('"') 702 w.Write(chunk[:i]) 703 w.WriteByte('"') 704 705 jr.r.Discard(i + 1) 706 return true 707 } 708 709 // maybe the inner-string is ok, but it's just longer than the chunk 710 return false 711 } 712 713 // handleKeyword is used by funcs handleFalse, handleNull, and handleTrue 714 func handleKeyword(w *bufio.Writer, jr *jsonReader, kw []byte) error { 715 for rest := kw; len(rest) > 0; rest = rest[1:] { 716 b, err := jr.readByte() 717 if err == nil && b == rest[0] { 718 // keywords given to this func have no line-feeds 719 jr.pos++ 720 continue 721 } 722 723 msg := `expected JSON value ` + string(kw) 724 return jr.improveError(errors.New(msg)) 725 } 726 727 w.Write(kw) 728 return nil 729 } 730 731 // handleNegative handles numbers starting with a negative sign for func 732 // handleValue 733 func handleNegative(w *bufio.Writer, jr *jsonReader) error { 734 if err := jr.demandSyntax('-'); err != nil { 735 return err 736 } 737 738 w.WriteByte('-') 739 if b, _ := jr.peekByte(); b == '.' { 740 jr.readByte() 741 w.Write([]byte{'0', '.'}) 742 return handleDigits(w, jr) 743 } 744 return handleNumber(w, jr) 745 } 746 747 // handleNumber handles numeric values/tokens, including invalid-JSON cases, 748 // such as values starting with a decimal dot 749 func handleNumber(w *bufio.Writer, jr *jsonReader) error { 750 // handle integer digits 751 if err := handleDigits(w, jr); err != nil { 752 return err 753 } 754 755 // handle optional decimal digits, starting with a leading dot 756 if b, _ := jr.peekByte(); b == '.' { 757 jr.readByte() 758 w.WriteByte('.') 759 return handleDigits(w, jr) 760 } 761 762 // handle optional exponent digits 763 if b, _ := jr.peekByte(); b == 'e' || b == 'E' { 764 jr.readByte() 765 w.WriteByte(b) 766 b, _ = jr.peekByte() 767 if b == '+' { 768 jr.readByte() 769 } else if b == '-' { 770 w.WriteByte('-') 771 jr.readByte() 772 } 773 return handleDigits(w, jr) 774 } 775 776 return nil 777 } 778 779 // handleObject handles objects for func handleValue 780 func handleObject(w *bufio.Writer, jr *jsonReader) error { 781 if err := jr.demandSyntax('{'); err != nil { 782 return err 783 } 784 w.WriteByte('{') 785 786 for npairs := 0; true; npairs++ { 787 // there may be whitespace/comments before the next comma 788 if err := jr.seekNext(); err != nil { 789 return err 790 } 791 792 // handle commas between key-value pairs, as well as trailing ones 793 comma := false 794 b, _ := jr.peekByte() 795 if b == ',' { 796 jr.readByte() 797 comma = true 798 799 // there may be whitespace/comments before an ending '}' 800 if err := jr.seekNext(); err != nil { 801 return err 802 } 803 b, _ = jr.peekByte() 804 } 805 806 // handle end of object 807 if b == '}' { 808 jr.readByte() 809 w.WriteByte('}') 810 return nil 811 } 812 813 // don't forget commas between adjacent key-value pairs 814 if npairs > 0 { 815 if !comma { 816 return errNoObjectComma 817 } 818 if err := outputByte(w, ','); err != nil { 819 return err 820 } 821 } 822 823 // handle the next pair's key 824 if err := jr.seekNext(); err != nil { 825 return err 826 } 827 if err := handleKey(w, jr); err != nil { 828 return err 829 } 830 831 // demand a colon right after the key 832 if err := jr.seekNext(); err != nil { 833 return err 834 } 835 if err := jr.demandSyntax(':'); err != nil { 836 return err 837 } 838 w.WriteByte(':') 839 840 // handle the next pair's value 841 if err := jr.seekNext(); err != nil { 842 return err 843 } 844 if err := handleValue(w, jr); err != nil { 845 return err 846 } 847 } 848 849 // make the compiler happy 850 return nil 851 } 852 853 // handlePositive handles numbers starting with a positive sign for func 854 // handleValue 855 func handlePositive(w *bufio.Writer, jr *jsonReader) error { 856 if err := jr.demandSyntax('+'); err != nil { 857 return err 858 } 859 860 // valid JSON isn't supposed to have leading pluses on numbers, so 861 // emit nothing for it, unlike for negative numbers 862 863 if b, _ := jr.peekByte(); b == '.' { 864 jr.readByte() 865 w.Write([]byte{'0', '.'}) 866 return handleDigits(w, jr) 867 } 868 return handleNumber(w, jr) 869 } 870 871 // handleString handles strings for funcs handleValue and handleObject, and 872 // supports both single-quotes and double-quotes, always emitting the latter 873 // in the output, of course 874 func handleString(w *bufio.Writer, jr *jsonReader) error { 875 q, ok := jr.peekByte() 876 if !ok || (q != '"' && q != '\'') { 877 return errNoStringQuote 878 } 879 880 jr.readByte() 881 quote := rune(q) 882 883 // try the quicker all-unescaped-ASCII handler 884 if trySimpleInner(w, jr, q) { 885 return nil 886 } 887 888 // it's a non-trivial inner-string, so handle it byte-by-byte 889 w.WriteByte('"') 890 escaped := false 891 892 for { 893 r, err := jr.readRune() 894 if r == unicode.ReplacementChar { 895 return jr.improveError(errInvalidRune) 896 } 897 if err != nil { 898 if err == io.EOF { 899 return jr.improveError(errStringEarlyEnd) 900 } 901 return jr.improveError(err) 902 } 903 904 if !escaped { 905 if r == '\\' { 906 escaped = true 907 continue 908 } 909 910 // handle end of string 911 if r == quote { 912 return outputByte(w, '"') 913 } 914 915 if r <= 127 { 916 w.Write(escapedStringBytes[byte(r)]) 917 } else { 918 w.WriteRune(r) 919 } 920 continue 921 } 922 923 // handle escaped items 924 escaped = false 925 926 switch r { 927 case 'u': 928 // \u needs exactly 4 hex-digits to follow it 929 w.Write([]byte{'\\', 'u'}) 930 if err := copyHex(w, 4, jr); err != nil { 931 return jr.improveError(err) 932 } 933 934 case 'x': 935 // JSON only supports 4 escaped hex-digits, so pad the 2 936 // expected hex-digits with 2 zeros 937 w.Write([]byte{'\\', 'u', '0', '0'}) 938 if err := copyHex(w, 2, jr); err != nil { 939 return jr.improveError(err) 940 } 941 942 case 't', 'f', 'r', 'n', 'b', '\\', '"': 943 // handle valid-JSON escaped string sequences 944 w.WriteByte('\\') 945 w.WriteByte(byte(r)) 946 947 case '\'': 948 // escaped single-quotes aren't standard JSON, but they can 949 // be handy when the input uses non-standard single-quoted 950 // strings 951 w.WriteByte('\'') 952 953 default: 954 if r <= 127 { 955 w.Write(escapedStringBytes[byte(r)]) 956 } else { 957 w.WriteRune(r) 958 } 959 } 960 } 961 } 962 963 // copyHex handles a run of hex-digits for func handleString, starting right 964 // after the leading `\u` (or `\x`) part; this func doesn't `improve` its 965 // errors with position info: that's up to the caller 966 func copyHex(w *bufio.Writer, n int, jr *jsonReader) error { 967 for i := 0; i < n; i++ { 968 b, err := jr.readByte() 969 if err == io.EOF { 970 return errStringEarlyEnd 971 } 972 if err != nil { 973 return err 974 } 975 976 if b >= 128 { 977 return errInvalidHex 978 } 979 980 if b := matchHex[b]; b != 0 { 981 w.WriteByte(b) 982 continue 983 } 984 985 return errInvalidHex 986 } 987 988 return nil 989 } 990 991 // handleValue is a generic JSON-token handler, which allows the recursive 992 // behavior to handle any kind of JSON/pseudo-JSON input 993 func handleValue(w *bufio.Writer, jr *jsonReader) error { 994 chunk, err := jr.r.Peek(1) 995 if err == nil && len(chunk) >= 1 { 996 return handleValueDispatch(w, jr, chunk[0]) 997 } 998 999 if err == io.EOF { 1000 return jr.improveError(errInputEarlyEnd) 1001 } 1002 return jr.improveError(errInputEarlyEnd) 1003 } 1004 1005 // handleValueDispatch simplifies control-flow for func handleValue 1006 func handleValueDispatch(w *bufio.Writer, jr *jsonReader, b byte) error { 1007 switch b { 1008 case 'f': 1009 return handleKeyword(w, jr, []byte{'f', 'a', 'l', 's', 'e'}) 1010 case 'n': 1011 return handleKeyword(w, jr, []byte{'n', 'u', 'l', 'l'}) 1012 case 't': 1013 return handleKeyword(w, jr, []byte{'t', 'r', 'u', 'e'}) 1014 case 'F': 1015 return handleKeyword(w, jr, []byte{'F', 'a', 'l', 's', 'e'}) 1016 case 'N': 1017 return handleKeyword(w, jr, []byte{'N', 'o', 'n', 'e'}) 1018 case 'T': 1019 return handleKeyword(w, jr, []byte{'T', 'r', 'u', 'e'}) 1020 case '.': 1021 return handleDot(w, jr) 1022 case '+': 1023 return handlePositive(w, jr) 1024 case '-': 1025 return handleNegative(w, jr) 1026 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 1027 return handleNumber(w, jr) 1028 case '\'', '"': 1029 return handleString(w, jr) 1030 case '[': 1031 return handleArray(w, jr) 1032 case '{': 1033 return handleObject(w, jr) 1034 default: 1035 return jr.improveError(errInvalidToken) 1036 } 1037 } 1038 1039 // escapedStringBytes helps func handleString treat all string bytes quickly 1040 // and correctly, using their officially-supported JSON escape sequences 1041 // 1042 // https://www.rfc-editor.org/rfc/rfc8259#section-7 1043 var escapedStringBytes = [256][]byte{ 1044 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 1045 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 1046 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 1047 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 1048 {'\\', 'b'}, {'\\', 't'}, 1049 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 1050 {'\\', 'f'}, {'\\', 'r'}, 1051 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 1052 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 1053 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 1054 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 1055 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 1056 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 1057 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 1058 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 1059 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 1060 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 1061 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 1062 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 1063 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 1064 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 1065 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 1066 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 1067 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 1068 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 1069 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 1070 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 1071 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 1072 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 1073 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 1074 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 1075 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 1076 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 1077 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 1078 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 1079 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 1080 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 1081 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 1082 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 1083 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 1084 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 1085 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 1086 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 1087 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 1088 }