File: json0.go 1 /* 2 The MIT License (MIT) 3 4 Copyright (c) 2026 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the "Software"), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Single-file source-code for json0: this version has no http(s) support. Even 27 the unit-tests from the original json0 are omitted. 28 29 To compile a smaller-sized command-line app, you can use the `go` command as 30 follows: 31 32 go build -ldflags "-s -w" -trimpath json0.go 33 */ 34 35 package main 36 37 import ( 38 "bufio" 39 "errors" 40 "io" 41 "os" 42 "strconv" 43 "unicode" 44 ) 45 46 const info = ` 47 json0 [options...] [file...] 48 49 50 JSON-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output. 51 Its output is always a single line, which ends with a line-feed. 52 53 Besides minimizing bytes, this tool also adapts almost-JSON input into 54 valid JSON, since it 55 56 - ignores both rest-of-line and multi-line comments 57 - ignores extra/trailing commas in arrays and objects 58 - turns single-quoted strings/keys into double-quoted strings 59 - double-quotes unquoted object keys 60 - changes \x 2-hex-digit into \u 4-hex-digit string-escapes 61 62 All options available can either start with a single or a double-dash 63 64 -h, -help show this help message 65 -jsonl emit JSON Lines, when top-level value is an array 66 ` 67 68 const ( 69 bufSize = 32 * 1024 70 chunkPeekSize = 16 71 ) 72 73 func main() { 74 args := os.Args[1:] 75 buffered := false 76 handler := json0 77 78 for len(args) > 0 { 79 switch args[0] { 80 case `-b`, `--b`, `-buffered`, `--buffered`: 81 buffered = true 82 args = args[1:] 83 continue 84 85 case `-h`, `--h`, `-help`, `--help`: 86 os.Stdout.WriteString(info[1:]) 87 return 88 89 case `-jsonl`, `--jsonl`: 90 handler = jsonl 91 args = args[1:] 92 continue 93 } 94 95 break 96 } 97 98 if len(args) > 0 && args[0] == `--` { 99 args = args[1:] 100 } 101 102 if len(args) > 1 { 103 const msg = "multiple inputs aren't allowed\n" 104 os.Stderr.WriteString(msg) 105 os.Exit(1) 106 } 107 108 liveLines := !buffered 109 if !buffered { 110 if _, err := os.Stdout.Seek(0, io.SeekCurrent); err == nil { 111 liveLines = false 112 } 113 } 114 115 name := `-` 116 if len(args) == 1 { 117 name = args[0] 118 } 119 120 if err := run(os.Stdout, name, handler, liveLines); err != nil && err != io.EOF { 121 os.Stderr.WriteString(err.Error()) 122 os.Stderr.WriteString("\n") 123 os.Exit(1) 124 } 125 } 126 127 type handlerFunc func(w *bufio.Writer, r *bufio.Reader, live bool) error 128 129 func run(w io.Writer, name string, handler handlerFunc, live bool) error { 130 // f, _ := os.Create(`json0.prof`) 131 // defer f.Close() 132 // pprof.StartCPUProfile(f) 133 // defer pprof.StopCPUProfile() 134 135 if name == `` || name == `-` { 136 bw := bufio.NewWriterSize(w, bufSize) 137 br := bufio.NewReaderSize(os.Stdin, bufSize) 138 defer bw.Flush() 139 return handler(bw, br, live) 140 } 141 142 f, err := os.Open(name) 143 if err != nil { 144 return errors.New(`can't read from file named "` + name + `"`) 145 } 146 defer f.Close() 147 148 bw := bufio.NewWriterSize(w, bufSize) 149 br := bufio.NewReaderSize(f, bufSize) 150 defer bw.Flush() 151 return handler(bw, br, live) 152 } 153 154 var ( 155 errCommentEarlyEnd = errors.New(`unexpected early-end of comment`) 156 errInputEarlyEnd = errors.New(`expected end of input data`) 157 errInvalidComment = errors.New(`expected / or *`) 158 errInvalidHex = errors.New(`expected a base-16 digit`) 159 errInvalidRune = errors.New(`invalid UTF-8 bytes`) 160 errInvalidToken = errors.New(`invalid JSON token`) 161 errNoDigits = errors.New(`expected numeric digits`) 162 errNoStringQuote = errors.New(`expected " or '`) 163 errNoArrayComma = errors.New(`missing comma between array values`) 164 errNoObjectComma = errors.New(`missing comma between key-value pairs`) 165 errStringEarlyEnd = errors.New(`unexpected early-end of string`) 166 errExtraBytes = errors.New(`unexpected extra input bytes`) 167 ) 168 169 // linePosError is a more descriptive kind of error, showing the source of 170 // the input-related problem, as 1-based a line/pos number pair in front 171 // of the error message 172 type linePosError struct { 173 // line is the 1-based line count from the input 174 line int 175 176 // pos is the 1-based `horizontal` position in its line 177 pos int 178 179 // err is the error message to `decorate` with the position info 180 err error 181 } 182 183 // Error satisfies the error interface 184 func (lpe linePosError) Error() string { 185 where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos) 186 return where + `: ` + lpe.err.Error() 187 } 188 189 // isIdentifier improves control-flow of func handleKey, when it handles 190 // unquoted object keys 191 var isIdentifier = [256]bool{ 192 '_': true, 193 194 '0': true, '1': true, '2': true, '3': true, '4': true, 195 '5': true, '6': true, '7': true, '8': true, '9': true, 196 197 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 198 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 199 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 200 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 201 'Y': true, 'Z': true, 202 203 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 204 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 205 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 206 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 207 'y': true, 'z': true, 208 } 209 210 // matchHex both figures out if a byte is a valid ASCII hex-digit, by not 211 // being 0, and normalizes letter-case for the hex letters 212 var matchHex = [256]byte{ 213 '0': '0', '1': '1', '2': '2', '3': '3', '4': '4', 214 '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', 215 'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F', 216 'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F', 217 } 218 219 // json0 converts JSON/pseudo-JSON into (valid) minimal JSON; final boolean 220 // value isn't used, and is just there to match the signature of func jsonl 221 func json0(w *bufio.Writer, r *bufio.Reader, live bool) error { 222 jr := jsonReader{r, 1, 1} 223 defer w.Flush() 224 225 if err := jr.handleLeadingJunk(); err != nil { 226 return err 227 } 228 229 // handle a single top-level JSON value 230 err := handleValue(w, &jr) 231 232 // end the only output-line with a line-feed; this also avoids showing 233 // error messages on the same line as the main output, since JSON-0 234 // output has no line-feeds before its last byte 235 outputByte(w, '\n') 236 237 if err != nil { 238 return err 239 } 240 return jr.handleTrailingJunk() 241 } 242 243 // jsonl converts JSON/pseudo-JSON into (valid) minimal JSON Lines; this func 244 // avoids writing a trailing line-feed, leaving that up to its caller 245 func jsonl(w *bufio.Writer, r *bufio.Reader, live bool) error { 246 jr := jsonReader{r, 1, 1} 247 248 if err := jr.handleLeadingJunk(); err != nil { 249 return err 250 } 251 252 chunk, err := jr.r.Peek(1) 253 if err == nil && len(chunk) >= 1 { 254 switch b := chunk[0]; b { 255 case '[', '(': 256 return handleArrayJSONL(w, &jr, b, live) 257 } 258 } 259 260 // handle a single top-level JSON value 261 err = handleValue(w, &jr) 262 263 // end the only output-line with a line-feed; this also avoids showing 264 // error messages on the same line as the main output, since JSON-0 265 // output has no line-feeds before its last byte 266 outputByte(w, '\n') 267 268 if err != nil { 269 return err 270 } 271 return jr.handleTrailingJunk() 272 } 273 274 // handleArrayJSONL handles top-level arrays for func jsonl 275 func handleArrayJSONL(w *bufio.Writer, jr *jsonReader, start byte, live bool) error { 276 if err := jr.demandSyntax(start); err != nil { 277 return err 278 } 279 280 var end byte = ']' 281 if start == '(' { 282 end = ')' 283 } 284 285 for n := 0; true; n++ { 286 // there may be whitespace/comments before the next comma 287 if err := jr.seekNext(); err != nil { 288 return err 289 } 290 291 // handle commas between values, as well as trailing ones 292 comma := false 293 b, _ := jr.peekByte() 294 if b == ',' { 295 jr.readByte() 296 comma = true 297 298 // there may be whitespace/comments before an ending ']' 299 if err := jr.seekNext(); err != nil { 300 return err 301 } 302 b, _ = jr.peekByte() 303 } 304 305 // handle end of array 306 if b == end { 307 jr.readByte() 308 if n > 0 { 309 err := outputByte(w, '\n') 310 if live { 311 w.Flush() 312 } 313 return err 314 } 315 return nil 316 } 317 318 // turn commas between adjacent values into line-feeds, as the 319 // output for this custom func is supposed to be JSON Lines 320 if n > 0 { 321 if !comma { 322 return errNoArrayComma 323 } 324 if err := outputByte(w, '\n'); err != nil { 325 return err 326 } 327 if live { 328 w.Flush() 329 } 330 } 331 332 // handle the next value 333 if err := jr.seekNext(); err != nil { 334 return err 335 } 336 if err := handleValue(w, jr); err != nil { 337 return err 338 } 339 } 340 341 // make the compiler happy 342 return nil 343 } 344 345 // jsonReader reads data via a buffer, keeping track of the input position: 346 // this in turn allows showing much more useful errors, when these happen 347 type jsonReader struct { 348 // r is the actual reader 349 r *bufio.Reader 350 351 // line is the 1-based line-counter for input bytes, and gives errors 352 // useful position info 353 line int 354 355 // pos is the 1-based `horizontal` position in its line, and gives 356 // errors useful position info 357 pos int 358 } 359 360 // improveError makes any error more useful, by giving it info about the 361 // current input-position, as a 1-based line/within-line-position pair 362 func (jr jsonReader) improveError(err error) error { 363 if _, ok := err.(linePosError); ok { 364 return err 365 } 366 367 if err == io.EOF { 368 return linePosError{jr.line, jr.pos, errInputEarlyEnd} 369 } 370 if err != nil { 371 return linePosError{jr.line, jr.pos, err} 372 } 373 return nil 374 } 375 376 func (jr *jsonReader) handleLeadingJunk() error { 377 // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order 378 // mark) gives no useful info if present, as UTF-8 leaves no ambiguity 379 // about byte-order by design 380 jr.skipUTF8BOM() 381 382 // ignore leading whitespace and/or comments 383 return jr.seekNext() 384 } 385 386 func (jr *jsonReader) handleTrailingJunk() error { 387 // ignore trailing whitespace and/or comments 388 if err := jr.seekNext(); err != nil { 389 return err 390 } 391 392 // ignore trailing semicolons 393 for { 394 if b, ok := jr.peekByte(); !ok || b != ';' { 395 break 396 } 397 398 jr.readByte() 399 // ignore trailing whitespace and/or comments 400 if err := jr.seekNext(); err != nil { 401 return err 402 } 403 } 404 405 // beyond trailing whitespace and/or comments, any more bytes 406 // make the whole input data invalid JSON 407 if _, ok := jr.peekByte(); ok { 408 return jr.improveError(errExtraBytes) 409 } 410 return nil 411 } 412 413 // demandSyntax fails with an error when the next byte isn't the one given; 414 // when it is, the byte is then read/skipped, and a nil error is returned 415 func (jr *jsonReader) demandSyntax(syntax byte) error { 416 chunk, err := jr.r.Peek(1) 417 if err == io.EOF { 418 return jr.improveError(errInputEarlyEnd) 419 } 420 if err != nil { 421 return jr.improveError(err) 422 } 423 424 if len(chunk) < 1 || chunk[0] != syntax { 425 msg := `expected ` + string(rune(syntax)) 426 return jr.improveError(errors.New(msg)) 427 } 428 429 jr.readByte() 430 return nil 431 } 432 433 // peekByte simplifies control-flow for various other funcs 434 func (jr jsonReader) peekByte() (b byte, ok bool) { 435 chunk, err := jr.r.Peek(1) 436 if err == nil && len(chunk) >= 1 { 437 return chunk[0], true 438 } 439 return 0, false 440 } 441 442 // readByte does what it says, updating the reader's position info 443 func (jr *jsonReader) readByte() (b byte, err error) { 444 b, err = jr.r.ReadByte() 445 if err == nil { 446 if b == '\n' { 447 jr.line += 1 448 jr.pos = 1 449 } else { 450 jr.pos++ 451 } 452 return b, nil 453 } 454 return b, jr.improveError(err) 455 } 456 457 // readRune does what it says, updating the reader's position info 458 func (jr *jsonReader) readRune() (r rune, err error) { 459 r, _, err = jr.r.ReadRune() 460 if err == nil { 461 if r == '\n' { 462 jr.line += 1 463 jr.pos = 1 464 } else { 465 jr.pos++ 466 } 467 return r, nil 468 } 469 return r, jr.improveError(err) 470 } 471 472 // seekNext skips/seeks the next token, ignoring runs of whitespace symbols 473 // and comments, either single-line (starting with //) or general (starting 474 // with /* and ending with */) 475 func (jr *jsonReader) seekNext() error { 476 for { 477 b, ok := jr.peekByte() 478 if !ok { 479 return nil 480 } 481 482 // case ' ', '\t', '\f', '\v', '\r', '\n': 483 if b <= 32 { 484 // keep skipping whitespace bytes 485 jr.readByte() 486 continue 487 } 488 489 if b == '#' { 490 if err := jr.skipLine(); err != nil { 491 return err 492 } 493 continue 494 } 495 496 if b != '/' { 497 // reached the next token 498 return nil 499 } 500 501 if err := jr.skipComment(); err != nil { 502 return err 503 } 504 505 // after comments, keep looking for more whitespace and/or comments 506 } 507 } 508 509 // skipComment helps func seekNext skip over comments, simplifying the latter 510 // func's control-flow 511 func (jr *jsonReader) skipComment() error { 512 err := jr.demandSyntax('/') 513 if err != nil { 514 return err 515 } 516 517 b, ok := jr.peekByte() 518 if !ok { 519 return nil 520 } 521 522 switch b { 523 case '/': 524 // handle single-line comments 525 return jr.skipLine() 526 527 case '*': 528 // handle (potentially) multi-line comments 529 return jr.skipGeneralComment() 530 531 default: 532 return jr.improveError(errInvalidComment) 533 } 534 } 535 536 // skipLine handles single-line comments for func skipComment 537 func (jr *jsonReader) skipLine() error { 538 for { 539 b, err := jr.readByte() 540 if err == io.EOF { 541 // end of input is fine in this case 542 return nil 543 } 544 if err != nil { 545 return err 546 } 547 548 if b == '\n' { 549 return nil 550 } 551 } 552 } 553 554 // skipGeneralComment handles (potentially) multi-line comments for func 555 // skipComment 556 func (jr *jsonReader) skipGeneralComment() error { 557 var prev byte 558 for { 559 b, err := jr.readByte() 560 if err != nil { 561 return jr.improveError(errCommentEarlyEnd) 562 } 563 564 if prev == '*' && b == '/' { 565 return nil 566 } 567 if b == '\n' { 568 jr.line++ 569 } 570 prev = b 571 } 572 } 573 574 // skipUTF8BOM does what it says, if a UTF-8 BOM is present 575 func (jr *jsonReader) skipUTF8BOM() { 576 lead, err := jr.r.Peek(3) 577 if err != nil { 578 return 579 } 580 581 if len(lead) > 2 && lead[0] == 0xef && lead[1] == 0xbb && lead[2] == 0xbf { 582 jr.readByte() 583 jr.readByte() 584 jr.readByte() 585 } 586 } 587 588 // outputByte is a small wrapper on func WriteByte, which adapts any error 589 // into a custom dummy output-error, which is in turn meant to be ignored, 590 // being just an excuse to quit the app immediately and successfully 591 func outputByte(w *bufio.Writer, b byte) error { 592 err := w.WriteByte(b) 593 if err == nil { 594 return nil 595 } 596 return io.EOF 597 } 598 599 // handleArray handles arrays for func handleValue 600 func handleArray(w *bufio.Writer, jr *jsonReader, start byte) error { 601 if err := jr.demandSyntax(start); err != nil { 602 return err 603 } 604 605 var end byte = ']' 606 if start == '(' { 607 end = ')' 608 } 609 610 w.WriteByte('[') 611 612 for n := 0; true; n++ { 613 // there may be whitespace/comments before the next comma 614 if err := jr.seekNext(); err != nil { 615 return err 616 } 617 618 // handle commas between values, as well as trailing ones 619 comma := false 620 b, _ := jr.peekByte() 621 if b == ',' { 622 jr.readByte() 623 comma = true 624 625 // there may be whitespace/comments before an ending ']' 626 if err := jr.seekNext(); err != nil { 627 return err 628 } 629 b, _ = jr.peekByte() 630 } 631 632 // handle end of array 633 if b == end { 634 jr.readByte() 635 w.WriteByte(']') 636 return nil 637 } 638 639 // don't forget commas between adjacent values 640 if n > 0 { 641 if !comma { 642 return errNoArrayComma 643 } 644 if err := outputByte(w, ','); err != nil { 645 return err 646 } 647 } 648 649 // handle the next value 650 if err := jr.seekNext(); err != nil { 651 return err 652 } 653 if err := handleValue(w, jr); err != nil { 654 return err 655 } 656 } 657 658 // make the compiler happy 659 return nil 660 } 661 662 // handleDigits helps various number-handling funcs do their job 663 func handleDigits(w *bufio.Writer, jr *jsonReader) error { 664 if trySimpleDigits(w, jr) { 665 return nil 666 } 667 668 for n := 0; true; n++ { 669 b, _ := jr.peekByte() 670 671 // support `nice` long numbers by ignoring their underscores 672 if b == '_' { 673 jr.readByte() 674 continue 675 } 676 677 if '0' <= b && b <= '9' { 678 jr.readByte() 679 w.WriteByte(b) 680 continue 681 } 682 683 if n == 0 { 684 return errNoDigits 685 } 686 return nil 687 } 688 689 // make the compiler happy 690 return nil 691 } 692 693 // trySimpleDigits tries to handle (more quickly) digit-runs where all bytes 694 // are just digits: this is a very common case for numbers; returns whether 695 // it succeeded, so this func's caller knows knows if it needs to do anything, 696 // the slower way 697 func trySimpleDigits(w *bufio.Writer, jr *jsonReader) (gotIt bool) { 698 chunk, _ := jr.r.Peek(chunkPeekSize) 699 700 for i, b := range chunk { 701 if '0' <= b && b <= '9' { 702 continue 703 } 704 705 if i == 0 || b == '_' { 706 return false 707 } 708 709 // bulk-writing the chunk is this func's whole point 710 w.Write(chunk[:i]) 711 712 jr.r.Discard(i) 713 jr.pos += i 714 return true 715 } 716 717 // maybe the digits-run is ok, but it's just longer than the chunk 718 return false 719 } 720 721 // handleDot handles pseudo-JSON numbers which start with a decimal dot 722 func handleDot(w *bufio.Writer, jr *jsonReader) error { 723 if err := jr.demandSyntax('.'); err != nil { 724 return err 725 } 726 w.Write([]byte{'0', '.'}) 727 return handleDigits(w, jr) 728 } 729 730 // handleKey is used by func handleObjects and generalizes func handleString, 731 // by allowing unquoted object keys; it's not used anywhere else, as allowing 732 // unquoted string values is ambiguous with actual JSON-keyword values null, 733 // false, and true. 734 func handleKey(w *bufio.Writer, jr *jsonReader) error { 735 quote, ok := jr.peekByte() 736 if !ok { 737 return jr.improveError(errStringEarlyEnd) 738 } 739 740 if quote == '"' || quote == '\'' { 741 return handleString(w, jr, quote) 742 } 743 744 w.WriteByte('"') 745 for { 746 if b, _ := jr.peekByte(); isIdentifier[b] { 747 jr.readByte() 748 w.WriteByte(b) 749 continue 750 } 751 752 w.WriteByte('"') 753 return nil 754 } 755 } 756 757 // trySimpleString tries to handle (more quickly) inner-strings where all bytes 758 // are unescaped ASCII symbols: this is a very common case for strings, and is 759 // almost always the case for object keys; returns whether it succeeded, so 760 // this func's caller knows knows if it needs to do anything, the slower way 761 func trySimpleString(w *bufio.Writer, jr *jsonReader, quote byte) (gotIt bool) { 762 end := -1 763 chunk, _ := jr.r.Peek(chunkPeekSize) 764 765 for i, b := range chunk { 766 if 32 <= b && b <= 127 && b != '\\' && b != '\'' && b != '"' { 767 continue 768 } 769 770 if b == byte(quote) { 771 end = i 772 break 773 } 774 return false 775 } 776 777 if end < 0 { 778 return false 779 } 780 781 // bulk-writing the chunk is this func's whole point 782 w.WriteByte('"') 783 w.Write(chunk[:end]) 784 w.WriteByte('"') 785 786 jr.r.Discard(end + 1) 787 jr.pos += end + 1 788 return true 789 } 790 791 // handleKeyword is used by funcs handleFalse, handleNull, and handleTrue 792 func handleKeyword(w *bufio.Writer, jr *jsonReader, kw []byte) error { 793 for rest := kw; len(rest) > 0; rest = rest[1:] { 794 b, err := jr.readByte() 795 if err == nil && b == rest[0] { 796 // keywords given to this func have no line-feeds 797 jr.pos++ 798 continue 799 } 800 801 msg := `expected JSON value ` + string(kw) 802 return jr.improveError(errors.New(msg)) 803 } 804 805 w.Write(kw) 806 return nil 807 } 808 809 func replaceKeyword(w *bufio.Writer, jr *jsonReader, kw, with []byte) error { 810 for rest := kw; len(rest) > 0; rest = rest[1:] { 811 b, err := jr.readByte() 812 if err == nil && b == rest[0] { 813 // keywords given to this func have no line-feeds 814 jr.pos++ 815 continue 816 } 817 818 msg := `expected JSON value ` + string(kw) 819 return jr.improveError(errors.New(msg)) 820 } 821 822 w.Write(with) 823 return nil 824 } 825 826 // handleNegative handles numbers starting with a negative sign for func 827 // handleValue 828 func handleNegative(w *bufio.Writer, jr *jsonReader) error { 829 if err := jr.demandSyntax('-'); err != nil { 830 return err 831 } 832 833 w.WriteByte('-') 834 if b, _ := jr.peekByte(); b == '.' { 835 jr.readByte() 836 w.Write([]byte{'0', '.'}) 837 return handleDigits(w, jr) 838 } 839 return handleNumber(w, jr) 840 } 841 842 // handleNumber handles numeric values/tokens, including invalid-JSON cases, 843 // such as values starting with a decimal dot 844 func handleNumber(w *bufio.Writer, jr *jsonReader) error { 845 // handle integer digits 846 if err := handleDigits(w, jr); err != nil { 847 return err 848 } 849 850 // handle optional decimal digits, starting with a leading dot 851 if b, _ := jr.peekByte(); b == '.' { 852 jr.readByte() 853 w.WriteByte('.') 854 return handleDigits(w, jr) 855 } 856 857 // handle optional exponent digits 858 if b, _ := jr.peekByte(); b == 'e' || b == 'E' { 859 jr.readByte() 860 w.WriteByte(b) 861 b, _ = jr.peekByte() 862 if b == '+' { 863 jr.readByte() 864 } else if b == '-' { 865 w.WriteByte('-') 866 jr.readByte() 867 } 868 return handleDigits(w, jr) 869 } 870 871 return nil 872 } 873 874 // handleObject handles objects for func handleValue 875 func handleObject(w *bufio.Writer, jr *jsonReader) error { 876 if err := jr.demandSyntax('{'); err != nil { 877 return err 878 } 879 w.WriteByte('{') 880 881 for npairs := 0; true; npairs++ { 882 // there may be whitespace/comments before the next comma 883 if err := jr.seekNext(); err != nil { 884 return err 885 } 886 887 // handle commas between key-value pairs, as well as trailing ones 888 comma := false 889 b, _ := jr.peekByte() 890 if b == ',' { 891 jr.readByte() 892 comma = true 893 894 // there may be whitespace/comments before an ending '}' 895 if err := jr.seekNext(); err != nil { 896 return err 897 } 898 b, _ = jr.peekByte() 899 } 900 901 // handle end of object 902 if b == '}' { 903 jr.readByte() 904 w.WriteByte('}') 905 return nil 906 } 907 908 // don't forget commas between adjacent key-value pairs 909 if npairs > 0 { 910 if !comma { 911 return errNoObjectComma 912 } 913 if err := outputByte(w, ','); err != nil { 914 return err 915 } 916 } 917 918 // handle the next pair's key 919 if err := jr.seekNext(); err != nil { 920 return err 921 } 922 if err := handleKey(w, jr); err != nil { 923 return err 924 } 925 926 // demand a colon right after the key 927 if err := jr.seekNext(); err != nil { 928 return err 929 } 930 if err := jr.demandSyntax(':'); err != nil { 931 return err 932 } 933 w.WriteByte(':') 934 935 // handle the next pair's value 936 if err := jr.seekNext(); err != nil { 937 return err 938 } 939 if err := handleValue(w, jr); err != nil { 940 return err 941 } 942 } 943 944 // make the compiler happy 945 return nil 946 } 947 948 // handlePositive handles numbers starting with a positive sign for func 949 // handleValue 950 func handlePositive(w *bufio.Writer, jr *jsonReader) error { 951 if err := jr.demandSyntax('+'); err != nil { 952 return err 953 } 954 955 // valid JSON isn't supposed to have leading pluses on numbers, so 956 // emit nothing for it, unlike for negative numbers 957 958 if b, _ := jr.peekByte(); b == '.' { 959 jr.readByte() 960 w.Write([]byte{'0', '.'}) 961 return handleDigits(w, jr) 962 } 963 return handleNumber(w, jr) 964 } 965 966 // handleString handles strings for funcs handleValue and handleObject, and 967 // supports both single-quotes and double-quotes, always emitting the latter 968 // in the output, of course 969 func handleString(w *bufio.Writer, jr *jsonReader, quote byte) error { 970 if quote != '"' && quote != '\'' { 971 return errNoStringQuote 972 } 973 974 jr.readByte() 975 976 // try the quicker no-escapes ASCII handler 977 if trySimpleString(w, jr, quote) { 978 return nil 979 } 980 981 // it's a non-trivial inner-string, so handle it byte-by-byte 982 w.WriteByte('"') 983 escaped := false 984 985 for quote := rune(quote); true; { 986 r, err := jr.readRune() 987 if r == unicode.ReplacementChar { 988 return jr.improveError(errInvalidRune) 989 } 990 if err != nil { 991 if err == io.EOF { 992 return jr.improveError(errStringEarlyEnd) 993 } 994 return jr.improveError(err) 995 } 996 997 if !escaped { 998 if r == '\\' { 999 escaped = true 1000 continue 1001 } 1002 1003 // handle end of string 1004 if r == quote { 1005 return outputByte(w, '"') 1006 } 1007 1008 if r <= 127 { 1009 w.Write(escapedStringBytes[byte(r)]) 1010 } else { 1011 w.WriteRune(r) 1012 } 1013 continue 1014 } 1015 1016 // handle escaped items 1017 escaped = false 1018 1019 switch r { 1020 case 'u': 1021 // \u needs exactly 4 hex-digits to follow it 1022 w.Write([]byte{'\\', 'u'}) 1023 if err := copyHex(w, 4, jr); err != nil { 1024 return jr.improveError(err) 1025 } 1026 1027 case 'x': 1028 // JSON only supports 4 escaped hex-digits, so pad the 2 1029 // expected hex-digits with 2 zeros 1030 w.Write([]byte{'\\', 'u', '0', '0'}) 1031 if err := copyHex(w, 2, jr); err != nil { 1032 return jr.improveError(err) 1033 } 1034 1035 case 't', 'f', 'r', 'n', 'b', '\\', '"': 1036 // handle valid-JSON escaped string sequences 1037 w.WriteByte('\\') 1038 w.WriteByte(byte(r)) 1039 1040 case '\'': 1041 // escaped single-quotes aren't standard JSON, but they can 1042 // be handy when the input uses non-standard single-quoted 1043 // strings 1044 w.WriteByte('\'') 1045 1046 default: 1047 if r <= 127 { 1048 w.Write(escapedStringBytes[byte(r)]) 1049 } else { 1050 w.WriteRune(r) 1051 } 1052 } 1053 } 1054 1055 return nil 1056 } 1057 1058 // copyHex handles a run of hex-digits for func handleString, starting right 1059 // after the leading `\u` (or `\x`) part; this func doesn't `improve` its 1060 // errors with position info: that's up to the caller 1061 func copyHex(w *bufio.Writer, n int, jr *jsonReader) error { 1062 for i := 0; i < n; i++ { 1063 b, err := jr.readByte() 1064 if err == io.EOF { 1065 return errStringEarlyEnd 1066 } 1067 if err != nil { 1068 return err 1069 } 1070 1071 if b >= 128 { 1072 return errInvalidHex 1073 } 1074 1075 if b := matchHex[b]; b != 0 { 1076 w.WriteByte(b) 1077 continue 1078 } 1079 1080 return errInvalidHex 1081 } 1082 1083 return nil 1084 } 1085 1086 // handleValue is a generic JSON-token handler, which allows the recursive 1087 // behavior to handle any kind of JSON/pseudo-JSON input 1088 func handleValue(w *bufio.Writer, jr *jsonReader) error { 1089 chunk, err := jr.r.Peek(1) 1090 if err == nil && len(chunk) >= 1 { 1091 return handleValueDispatch(w, jr, chunk[0]) 1092 } 1093 1094 if err == io.EOF { 1095 return jr.improveError(errInputEarlyEnd) 1096 } 1097 return jr.improveError(errInputEarlyEnd) 1098 } 1099 1100 // handleValueDispatch simplifies control-flow for func handleValue 1101 func handleValueDispatch(w *bufio.Writer, jr *jsonReader, b byte) error { 1102 switch b { 1103 case '#': 1104 return jr.skipLine() 1105 case 'f': 1106 return handleKeyword(w, jr, []byte{'f', 'a', 'l', 's', 'e'}) 1107 case 'n': 1108 return handleKeyword(w, jr, []byte{'n', 'u', 'l', 'l'}) 1109 case 't': 1110 return handleKeyword(w, jr, []byte{'t', 'r', 'u', 'e'}) 1111 case 'F': 1112 return replaceKeyword(w, jr, []byte(`False`), []byte(`false`)) 1113 case 'N': 1114 return replaceKeyword(w, jr, []byte(`None`), []byte(`null`)) 1115 case 'T': 1116 return replaceKeyword(w, jr, []byte(`True`), []byte(`true`)) 1117 case '.': 1118 return handleDot(w, jr) 1119 case '+': 1120 return handlePositive(w, jr) 1121 case '-': 1122 return handleNegative(w, jr) 1123 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 1124 return handleNumber(w, jr) 1125 case '\'', '"': 1126 return handleString(w, jr, b) 1127 case '[', '(': 1128 return handleArray(w, jr, b) 1129 case '{': 1130 return handleObject(w, jr) 1131 default: 1132 return jr.improveError(errInvalidToken) 1133 } 1134 } 1135 1136 // escapedStringBytes helps func handleString treat all string bytes quickly 1137 // and correctly, using their officially-supported JSON escape sequences 1138 // 1139 // https://www.rfc-editor.org/rfc/rfc8259#section-7 1140 var escapedStringBytes = [256][]byte{ 1141 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 1142 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 1143 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 1144 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 1145 {'\\', 'b'}, {'\\', 't'}, 1146 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 1147 {'\\', 'f'}, {'\\', 'r'}, 1148 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 1149 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 1150 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 1151 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 1152 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 1153 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 1154 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 1155 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 1156 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 1157 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 1158 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 1159 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 1160 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 1161 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 1162 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 1163 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 1164 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 1165 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 1166 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 1167 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 1168 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 1169 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 1170 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 1171 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 1172 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 1173 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 1174 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 1175 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 1176 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 1177 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 1178 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 1179 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 1180 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 1181 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 1182 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 1183 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 1184 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 1185 }