File: j0.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2024 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Single-file source-code for j0: this version has no http(s) support. Even 27 the unit-tests from the original j0 are omitted. 28 29 To compile a smaller-sized command-line app, you can use the `go` command as 30 follows: 31 32 go build -ldflags "-s -w" -trimpath j0.go 33 */ 34 35 package main 36 37 import ( 38 "bufio" 39 "bytes" 40 "errors" 41 "io" 42 "os" 43 "strconv" 44 ) 45 46 const info = ` 47 j0 [options...] [file...] 48 49 50 Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output. 51 Its output is always a single line, which ends with a line-feed. 52 53 Besides minimizing bytes, this tool also adapts almost-JSON input into 54 valid JSON, since it 55 56 - ignores both rest-of-line and multi-line comments 57 - ignores extra/trailing commas in arrays and objects 58 - turns single-quoted strings/keys into double-quoted strings 59 - double-quotes unquoted object keys 60 - changes \x 2-hex-digit into \u 4-hex-digit string-escapes 61 62 All options available can either start with a single or a double-dash 63 64 -h show this help message 65 -help show this help message 66 -jsonl emit JSON Lines, when top-level value is an array 67 ` 68 69 const ( 70 bufSize = 32 * 1024 71 errorStyle = "\x1b[31m" 72 ) 73 74 func main() { 75 args := os.Args[1:] 76 handler := json0 77 78 if len(args) > 0 { 79 switch os.Args[1] { 80 case `-h`, `--h`, `-help`, `--help`: 81 os.Stderr.WriteString(info[1:]) 82 return 83 case `-jsonl`, `--jsonl`: 84 handler = jsonl 85 args = args[1:] 86 } 87 } 88 89 if len(args) > 1 { 90 const msg = `only 1 (optional) named input is supported` 91 os.Stderr.WriteString(errorStyle + msg + "\x1b[0m\n") 92 os.Exit(1) 93 } 94 95 name := `-` 96 if len(args) == 1 { 97 name = args[0] 98 } 99 100 if err := run(os.Stdout, name, handler); isActualError(err) { 101 os.Stderr.WriteString(errorStyle) 102 os.Stderr.WriteString(err.Error()) 103 os.Stderr.WriteString("\x1b[0m\n") 104 os.Exit(1) 105 } 106 } 107 108 type handlerFunc func(w *bufio.Writer, r *bufio.Reader) error 109 110 func run(w io.Writer, name string, handler handlerFunc) error { 111 if name == `` || name == `-` { 112 bw := bufio.NewWriterSize(w, bufSize) 113 br := bufio.NewReaderSize(os.Stdin, bufSize) 114 defer bw.Flush() 115 return handler(bw, br) 116 } 117 118 f, err := os.Open(name) 119 if err != nil { 120 return errors.New(`can't read from file named "` + name + `"`) 121 } 122 defer f.Close() 123 124 bw := bufio.NewWriterSize(w, bufSize) 125 br := bufio.NewReaderSize(f, bufSize) 126 defer bw.Flush() 127 return handler(bw, br) 128 } 129 130 var ( 131 errCommentEarlyEnd = errors.New(`unexpected early-end of comment`) 132 errInputEarlyEnd = errors.New(`expected end of input data`) 133 errInvalidComment = errors.New(`expected / or *`) 134 errInvalidHex = errors.New(`expected a base-16 digit`) 135 errInvalidToken = errors.New(`invalid JSON token`) 136 errNoDigits = errors.New(`expected numeric digits`) 137 errNoStringQuote = errors.New(`expected " or '`) 138 errNoArrayComma = errors.New(`missing comma between array values`) 139 errNoObjectComma = errors.New(`missing comma between key-value pairs`) 140 errStringEarlyEnd = errors.New(`unexpected early-end of string`) 141 errExtraBytes = errors.New(`unexpected extra input bytes`) 142 143 // errNoMoreOutput is a generic dummy output-error, which is meant to be 144 // ultimately ignored, being just an excuse to quit the app immediately 145 // and successfully 146 errNoMoreOutput = errors.New(`no more output`) 147 ) 148 149 // isActualError is to figure out whether not to ignore an error, and thus 150 // show it as an error message 151 func isActualError(err error) bool { 152 return err != nil && err != io.EOF && err != errNoMoreOutput 153 } 154 155 // linePosError is a more descriptive kind of error, showing the source of 156 // the input-related problem, as 1-based a line/pos number pair in front 157 // of the error message 158 type linePosError struct { 159 // line is the 1-based line count from the input 160 line int 161 162 // pos is the 1-based `horizontal` position in its line 163 pos int 164 165 // err is the error message to `decorate` with the position info 166 err error 167 } 168 169 // Error satisfies the error interface 170 func (lpe linePosError) Error() string { 171 where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos) 172 return where + `: ` + lpe.err.Error() 173 } 174 175 // isIdentifier improves control-flow of func handleKey, when it handles 176 // unquoted object keys 177 var isIdentifier = [256]bool{ 178 '_': true, 179 180 '0': true, '1': true, '2': true, '3': true, '4': true, 181 '5': true, '6': true, '7': true, '8': true, '9': true, 182 183 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 184 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 185 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 186 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 187 'Y': true, 'Z': true, 188 189 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 190 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 191 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 192 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 193 'y': true, 'z': true, 194 } 195 196 // matchHex both figures out if a byte is a valid ASCII hex-digit, by not 197 // being 0, and normalizes letter-case for the hex letters 198 var matchHex = [256]byte{ 199 '0': '0', '1': '1', '2': '2', '3': '3', '4': '4', 200 '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', 201 'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F', 202 'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F', 203 } 204 205 // json0 converts JSON/pseudo-JSON into (valid) minimal JSON 206 func json0(w *bufio.Writer, r *bufio.Reader) error { 207 jr := jsonReader{r, 1, 1} 208 defer w.Flush() 209 210 if err := jr.handleLeadingJunk(); err != nil { 211 return err 212 } 213 214 // handle a single top-level JSON value 215 err := handleValue(w, &jr) 216 217 // end the only output-line with a line-feed; this also avoids showing 218 // error messages on the same line as the main output, since JSON-0 219 // output has no line-feeds before its last byte 220 outputByte(w, '\n') 221 222 if err != nil { 223 return err 224 } 225 return jr.handleTrailingJunk() 226 } 227 228 // jsonl converts JSON/pseudo-JSON into (valid) minimal JSON Lines; this func 229 // avoids writing a trailing line-feed, leaving that up to its caller 230 func jsonl(w *bufio.Writer, r *bufio.Reader) error { 231 jr := jsonReader{r, 1, 1} 232 233 if err := jr.handleLeadingJunk(); err != nil { 234 return err 235 } 236 237 chunk, err := jr.r.Peek(1) 238 if err == nil && len(chunk) >= 1 && chunk[0] == '[' { 239 return handleArrayJSONL(w, &jr) 240 } 241 242 // handle a single top-level JSON value 243 err = handleValue(w, &jr) 244 245 // end the only output-line with a line-feed; this also avoids showing 246 // error messages on the same line as the main output, since JSON-0 247 // output has no line-feeds before its last byte 248 outputByte(w, '\n') 249 250 if err != nil { 251 return err 252 } 253 return jr.handleTrailingJunk() 254 } 255 256 // handleArrayJSONL handles top-level arrays for func jsonl 257 func handleArrayJSONL(w *bufio.Writer, jr *jsonReader) error { 258 if err := jr.demandSyntax('['); err != nil { 259 return err 260 } 261 262 for n := 0; true; n++ { 263 // there may be whitespace/comments before the next comma 264 if err := jr.seekNext(); err != nil { 265 return err 266 } 267 268 // handle commas between values, as well as trailing ones 269 comma := false 270 b, _ := jr.peekByte() 271 if b == ',' { 272 jr.readByte() 273 comma = true 274 275 // there may be whitespace/comments before an ending ']' 276 if err := jr.seekNext(); err != nil { 277 return err 278 } 279 b, _ = jr.peekByte() 280 } 281 282 // handle end of array 283 if b == ']' { 284 jr.readByte() 285 if n > 0 { 286 err := outputByte(w, '\n') 287 w.Flush() 288 return err 289 } 290 return nil 291 } 292 293 // turn commas between adjacent values into line-feeds, as the 294 // output for this custom func is supposed to be JSON Lines 295 if n > 0 { 296 if !comma { 297 return errNoArrayComma 298 } 299 if err := outputByte(w, '\n'); err != nil { 300 return err 301 } 302 w.Flush() 303 } 304 305 // handle the next value 306 if err := jr.seekNext(); err != nil { 307 return err 308 } 309 if err := handleValue(w, jr); err != nil { 310 return err 311 } 312 } 313 314 // make the compiler happy 315 return nil 316 } 317 318 // jsonReader reads data via a buffer, keeping track of the input position: 319 // this in turn allows showing much more useful errors, when these happen 320 type jsonReader struct { 321 // r is the actual reader 322 r *bufio.Reader 323 324 // line is the 1-based line-counter for input bytes, and gives errors 325 // useful position info 326 line int 327 328 // pos is the 1-based `horizontal` position in its line, and gives 329 // errors useful position info 330 pos int 331 } 332 333 // improveError makes any error more useful, by giving it info about the 334 // current input-position, as a 1-based line/within-line-position pair 335 func (jr jsonReader) improveError(err error) error { 336 if _, ok := err.(linePosError); ok { 337 return err 338 } 339 340 if err == io.EOF { 341 return linePosError{jr.line, jr.pos, errInputEarlyEnd} 342 } 343 if err != nil { 344 return linePosError{jr.line, jr.pos, err} 345 } 346 return nil 347 } 348 349 func (jr *jsonReader) handleLeadingJunk() error { 350 // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order 351 // mark) gives no useful info if present, as UTF-8 leaves no ambiguity 352 // about byte-order by design 353 jr.skipUTF8BOM() 354 355 // ignore leading whitespace and/or comments 356 return jr.seekNext() 357 } 358 359 func (jr *jsonReader) handleTrailingJunk() error { 360 // ignore trailing whitespace and/or comments 361 if err := jr.seekNext(); err != nil { 362 return err 363 } 364 365 // ignore trailing semicolon 366 if b, ok := jr.peekByte(); ok && b == ';' { 367 // ignore trailing whitespace and/or comments 368 if err := jr.seekNext(); err != nil { 369 return err 370 } 371 } 372 373 // beyond trailing whitespace and/or comments, any more bytes 374 // make the whole input data invalid JSON 375 if _, ok := jr.peekByte(); ok { 376 return jr.improveError(errExtraBytes) 377 } 378 return nil 379 } 380 381 // demandSyntax fails with an error when the next byte isn't the one given; 382 // when it is, the byte is then read/skipped, and a nil error is returned 383 func (jr *jsonReader) demandSyntax(syntax byte) error { 384 chunk, err := jr.r.Peek(1) 385 if err == io.EOF { 386 return jr.improveError(errInputEarlyEnd) 387 } 388 if err != nil { 389 return jr.improveError(err) 390 } 391 392 if len(chunk) < 1 || chunk[0] != syntax { 393 msg := `expected ` + string(rune(syntax)) 394 return jr.improveError(errors.New(msg)) 395 } 396 397 jr.readByte() 398 return nil 399 } 400 401 // updatePosInfo does what it says, given the byte just read separately 402 func (jr *jsonReader) updatePosInfo(b byte) { 403 if b == '\n' { 404 jr.line += 1 405 jr.pos = 1 406 } else { 407 jr.pos++ 408 } 409 } 410 411 // peekByte simplifies control-flow for various other funcs 412 func (jr jsonReader) peekByte() (b byte, ok bool) { 413 chunk, err := jr.r.Peek(1) 414 if err == nil && len(chunk) >= 1 { 415 return chunk[0], true 416 } 417 return 0, false 418 } 419 420 // readByte does what it says, updating the reader's position info 421 func (jr *jsonReader) readByte() (b byte, err error) { 422 b, err = jr.r.ReadByte() 423 if err == nil { 424 jr.updatePosInfo(b) 425 return b, nil 426 } 427 return b, jr.improveError(err) 428 } 429 430 // seekNext skips/seeks the next token, ignoring runs of whitespace symbols 431 // and comments, either single-line (starting with //) or general (starting 432 // with /* and ending with */) 433 func (jr *jsonReader) seekNext() error { 434 for { 435 b, ok := jr.peekByte() 436 if !ok { 437 return nil 438 } 439 440 // case ' ', '\t', '\f', '\v', '\r', '\n': 441 if b <= 32 { 442 // keep skipping whitespace bytes 443 b, _ := jr.readByte() 444 jr.updatePosInfo(b) 445 continue 446 } 447 448 if b != '/' { 449 // reached the next token 450 return nil 451 } 452 453 if err := jr.skipComment(); err != nil { 454 return err 455 } 456 457 // after comments, keep looking for more whitespace and/or comments 458 } 459 } 460 461 // skipComment helps func seekNext skip over comments, simplifying the latter 462 // func's control-flow 463 func (jr *jsonReader) skipComment() error { 464 err := jr.demandSyntax('/') 465 if err != nil { 466 return err 467 } 468 469 b, ok := jr.peekByte() 470 if !ok { 471 return jr.improveError(errInputEarlyEnd) 472 } 473 474 switch b { 475 case '/': 476 // handle single-line comments 477 return jr.skipLine() 478 479 case '*': 480 // handle (potentially) multi-line comments 481 return jr.skipGeneralComment() 482 483 default: 484 return jr.improveError(errInvalidComment) 485 } 486 } 487 488 // skipLine handles single-line comments for func skipComment 489 func (jr *jsonReader) skipLine() error { 490 for { 491 b, err := jr.r.ReadByte() 492 if err == io.EOF { 493 // end of input is fine in this case 494 return nil 495 } 496 if err != nil { 497 return err 498 } 499 500 jr.updatePosInfo(b) 501 if b == '\n' { 502 jr.line++ 503 return nil 504 } 505 } 506 } 507 508 // skipGeneralComment handles (potentially) multi-line comments for func 509 // skipComment 510 func (jr *jsonReader) skipGeneralComment() error { 511 var prev byte 512 for { 513 b, err := jr.readByte() 514 if err != nil { 515 return jr.improveError(errCommentEarlyEnd) 516 } 517 518 if prev == '*' && b == '/' { 519 return nil 520 } 521 if b == '\n' { 522 jr.line++ 523 } 524 prev = b 525 } 526 } 527 528 // skipUTF8BOM does what it says, if a UTF-8 BOM is present 529 func (jr *jsonReader) skipUTF8BOM() { 530 lead, err := jr.r.Peek(3) 531 if err == nil && bytes.HasPrefix(lead, []byte{0xef, 0xbb, 0xbf}) { 532 jr.readByte() 533 jr.readByte() 534 jr.readByte() 535 jr.pos += 3 536 } 537 } 538 539 // outputByte is a small wrapper on func WriteByte, which adapts any error 540 // into a custom dummy output-error, which is in turn meant to be ignored, 541 // being just an excuse to quit the app immediately and successfully 542 func outputByte(w *bufio.Writer, b byte) error { 543 err := w.WriteByte(b) 544 if err == nil { 545 return nil 546 } 547 return errNoMoreOutput 548 } 549 550 // handleArray handles arrays for func handleValue 551 func handleArray(w *bufio.Writer, jr *jsonReader) error { 552 if err := jr.demandSyntax('['); err != nil { 553 return err 554 } 555 w.WriteByte('[') 556 557 for n := 0; true; n++ { 558 // there may be whitespace/comments before the next comma 559 if err := jr.seekNext(); err != nil { 560 return err 561 } 562 563 // handle commas between values, as well as trailing ones 564 comma := false 565 b, _ := jr.peekByte() 566 if b == ',' { 567 jr.readByte() 568 comma = true 569 570 // there may be whitespace/comments before an ending ']' 571 if err := jr.seekNext(); err != nil { 572 return err 573 } 574 b, _ = jr.peekByte() 575 } 576 577 // handle end of array 578 if b == ']' { 579 jr.readByte() 580 w.WriteByte(']') 581 return nil 582 } 583 584 // don't forget commas between adjacent values 585 if n > 0 { 586 if !comma { 587 return errNoArrayComma 588 } 589 if err := outputByte(w, ','); err != nil { 590 return err 591 } 592 } 593 594 // handle the next value 595 if err := jr.seekNext(); err != nil { 596 return err 597 } 598 if err := handleValue(w, jr); err != nil { 599 return err 600 } 601 } 602 603 // make the compiler happy 604 return nil 605 } 606 607 // handleDigits helps various number-handling funcs do their job 608 func handleDigits(w *bufio.Writer, jr *jsonReader) error { 609 for n := 0; true; n++ { 610 b, _ := jr.peekByte() 611 612 // support `nice` long numbers by ignoring their underscores 613 if b == '_' { 614 jr.readByte() 615 continue 616 } 617 618 if '0' <= b && b <= '9' { 619 jr.readByte() 620 w.WriteByte(b) 621 continue 622 } 623 624 if n == 0 { 625 return errNoDigits 626 } 627 return nil 628 } 629 630 // make the compiler happy 631 return nil 632 } 633 634 // handleDot handles pseudo-JSON numbers which start with a decimal dot 635 func handleDot(w *bufio.Writer, jr *jsonReader) error { 636 if err := jr.demandSyntax('.'); err != nil { 637 return err 638 } 639 w.Write([]byte{'0', '.'}) 640 return handleDigits(w, jr) 641 } 642 643 // handleKey is used by func handleObjects and generalizes func handleString, 644 // by allowing unquoted object keys; it's not used anywhere else, as allowing 645 // unquoted string values is ambiguous with actual JSON-keyword values null, 646 // false, and true. 647 func handleKey(w *bufio.Writer, jr *jsonReader) error { 648 quote, ok := jr.peekByte() 649 if quote == '"' || quote == '\'' { 650 return handleString(w, jr) 651 } 652 if !ok { 653 return jr.improveError(errStringEarlyEnd) 654 } 655 656 w.WriteByte('"') 657 for { 658 if b, _ := jr.peekByte(); isIdentifier[b] { 659 jr.readByte() 660 w.WriteByte(b) 661 continue 662 } 663 664 w.WriteByte('"') 665 return nil 666 } 667 } 668 669 // trySimpleInner tries to handle (more quickly) inner-strings where all bytes 670 // are unescaped ASCII symbols: this is a very common case for strings, and is 671 // almost always the case for object keys; returns whether it succeeded, so 672 // this func's caller knows knows if it needs to do anything, the slower way 673 func trySimpleInner(w *bufio.Writer, jr *jsonReader, quote byte) (gotIt bool) { 674 chunk, _ := jr.r.Peek(64) 675 676 for i, b := range chunk { 677 if b < 32 || b > 127 || b == '\\' { 678 return false 679 } 680 if b != quote { 681 continue 682 } 683 684 // bulk-writing the chunk is this func's whole point 685 w.WriteByte('"') 686 w.Write(chunk[:i]) 687 w.WriteByte('"') 688 689 jr.r.Discard(i + 1) 690 return true 691 } 692 693 // maybe the inner-string is ok, but it's just longer than the chunk 694 return false 695 } 696 697 // handleKeyword is used by funcs handleFalse, handleNull, and handleTrue 698 func handleKeyword(w *bufio.Writer, jr *jsonReader, kw []byte) error { 699 for rest := kw; len(rest) > 0; rest = rest[1:] { 700 b, err := jr.readByte() 701 if err == nil && b == rest[0] { 702 // keywords given to this func have no line-feeds 703 jr.pos++ 704 continue 705 } 706 707 msg := `expected JSON value ` + string(kw) 708 return jr.improveError(errors.New(msg)) 709 } 710 711 w.Write(kw) 712 return nil 713 } 714 715 // handleNegative handles numbers starting with a negative sign for func 716 // handleValue 717 func handleNegative(w *bufio.Writer, jr *jsonReader) error { 718 if err := jr.demandSyntax('-'); err != nil { 719 return err 720 } 721 722 w.WriteByte('-') 723 if b, _ := jr.peekByte(); b == '.' { 724 jr.readByte() 725 w.Write([]byte{'0', '.'}) 726 return handleDigits(w, jr) 727 } 728 return handleNumber(w, jr) 729 } 730 731 // handleNumber handles numeric values/tokens, including invalid-JSON cases, 732 // such as values starting with a decimal dot 733 func handleNumber(w *bufio.Writer, jr *jsonReader) error { 734 // handle integer digits 735 if err := handleDigits(w, jr); err != nil { 736 return err 737 } 738 739 // handle optional decimal digits, starting with a leading dot 740 if b, _ := jr.peekByte(); b == '.' { 741 jr.readByte() 742 w.WriteByte('.') 743 return handleDigits(w, jr) 744 } 745 746 // handle optional exponent digits 747 if b, _ := jr.peekByte(); b == 'e' || b == 'E' { 748 jr.readByte() 749 w.WriteByte(b) 750 b, _ = jr.peekByte() 751 if b == '+' { 752 jr.readByte() 753 } else if b == '-' { 754 w.WriteByte('-') 755 jr.readByte() 756 } 757 return handleDigits(w, jr) 758 } 759 760 return nil 761 } 762 763 // handleObject handles objects for func handleValue 764 func handleObject(w *bufio.Writer, jr *jsonReader) error { 765 if err := jr.demandSyntax('{'); err != nil { 766 return err 767 } 768 w.WriteByte('{') 769 770 for npairs := 0; true; npairs++ { 771 // there may be whitespace/comments before the next comma 772 if err := jr.seekNext(); err != nil { 773 return err 774 } 775 776 // handle commas between key-value pairs, as well as trailing ones 777 comma := false 778 b, _ := jr.peekByte() 779 if b == ',' { 780 jr.readByte() 781 comma = true 782 783 // there may be whitespace/comments before an ending '}' 784 if err := jr.seekNext(); err != nil { 785 return err 786 } 787 b, _ = jr.peekByte() 788 } 789 790 // handle end of object 791 if b == '}' { 792 jr.readByte() 793 w.WriteByte('}') 794 return nil 795 } 796 797 // don't forget commas between adjacent key-value pairs 798 if npairs > 0 { 799 if !comma { 800 return errNoObjectComma 801 } 802 if err := outputByte(w, ','); err != nil { 803 return err 804 } 805 } 806 807 // handle the next pair's key 808 if err := jr.seekNext(); err != nil { 809 return err 810 } 811 if err := handleKey(w, jr); err != nil { 812 return err 813 } 814 815 // demand a colon right after the key 816 if err := jr.seekNext(); err != nil { 817 return err 818 } 819 if err := jr.demandSyntax(':'); err != nil { 820 return err 821 } 822 w.WriteByte(':') 823 824 // handle the next pair's value 825 if err := jr.seekNext(); err != nil { 826 return err 827 } 828 if err := handleValue(w, jr); err != nil { 829 return err 830 } 831 } 832 833 // make the compiler happy 834 return nil 835 } 836 837 // handlePositive handles numbers starting with a positive sign for func 838 // handleValue 839 func handlePositive(w *bufio.Writer, jr *jsonReader) error { 840 if err := jr.demandSyntax('+'); err != nil { 841 return err 842 } 843 844 // valid JSON isn't supposed to have leading pluses on numbers, so 845 // emit nothing for it, unlike for negative numbers 846 847 if b, _ := jr.peekByte(); b == '.' { 848 jr.readByte() 849 w.Write([]byte{'0', '.'}) 850 return handleDigits(w, jr) 851 } 852 return handleNumber(w, jr) 853 } 854 855 // handleString handles strings for funcs handleValue and handleObject, and 856 // supports both single-quotes and double-quotes, always emitting the latter 857 // in the output, of course 858 func handleString(w *bufio.Writer, jr *jsonReader) error { 859 quote, ok := jr.peekByte() 860 if !ok || (quote != '"' && quote != '\'') { 861 return errNoStringQuote 862 } 863 864 jr.readByte() 865 // try the quicker all-unescaped-ASCII handler 866 if trySimpleInner(w, jr, quote) { 867 return nil 868 } 869 870 // it's a non-trivial inner-string, so handle it byte-by-byte 871 w.WriteByte('"') 872 escaped := false 873 874 for { 875 b, err := jr.r.ReadByte() 876 if err != nil { 877 if err == io.EOF { 878 return jr.improveError(errStringEarlyEnd) 879 } 880 return jr.improveError(err) 881 } 882 883 if !escaped { 884 if b == '\\' { 885 escaped = true 886 continue 887 } 888 889 // handle end of string 890 if b == quote { 891 return outputByte(w, '"') 892 } 893 894 w.Write(escapedStringBytes[b]) 895 jr.updatePosInfo(b) 896 continue 897 } 898 899 // handle escaped items 900 escaped = false 901 902 switch b { 903 case 'u': 904 // \u needs exactly 4 hex-digits to follow it 905 w.Write([]byte{'\\', 'u'}) 906 if err := copyHex(w, 4, jr); err != nil { 907 return jr.improveError(err) 908 } 909 910 case 'x': 911 // JSON only supports 4 escaped hex-digits, so pad the 2 912 // expected hex-digits with 2 zeros 913 w.Write([]byte{'\\', 'u', '0', '0'}) 914 if err := copyHex(w, 2, jr); err != nil { 915 return jr.improveError(err) 916 } 917 918 case 't', 'f', 'r', 'n', 'b', '\\', '"': 919 // handle valid-JSON escaped string sequences 920 w.WriteByte('\\') 921 w.WriteByte(b) 922 923 // case '\'': 924 // // escaped single-quotes aren't standard JSON, but they can 925 // // be handy when the input uses non-standard single-quoted 926 // // strings 927 // w.WriteByte('\'') 928 929 default: 930 // return jr.decorateError(unexpectedByte{b}) 931 w.Write(escapedStringBytes[b]) 932 } 933 } 934 } 935 936 // copyHex handles a run of hex-digits for func handleString, starting right 937 // after the leading `\u` (or `\x`) part; this func doesn't `improve` its 938 // errors with position info: that's up to the caller 939 func copyHex(w *bufio.Writer, n int, jr *jsonReader) error { 940 for i := 0; i < n; i++ { 941 b, err := jr.r.ReadByte() 942 if err == io.EOF { 943 return errStringEarlyEnd 944 } 945 if err != nil { 946 return err 947 } 948 949 jr.updatePosInfo(b) 950 951 if b := matchHex[b]; b != 0 { 952 w.WriteByte(b) 953 continue 954 } 955 956 return errInvalidHex 957 } 958 959 return nil 960 } 961 962 // handleValue is a generic JSON-token handler, which allows the recursive 963 // behavior to handle any kind of JSON/pseudo-JSON input 964 func handleValue(w *bufio.Writer, jr *jsonReader) error { 965 chunk, err := jr.r.Peek(1) 966 if err == nil && len(chunk) >= 1 { 967 return handleValueDispatch(w, jr, chunk[0]) 968 } 969 970 if err == io.EOF { 971 return jr.improveError(errInputEarlyEnd) 972 } 973 return jr.improveError(errInputEarlyEnd) 974 } 975 976 // handleValueDispatch simplifies control-flow for func handleValue 977 func handleValueDispatch(w *bufio.Writer, jr *jsonReader, b byte) error { 978 switch b { 979 case 'f': 980 return handleKeyword(w, jr, []byte{'f', 'a', 'l', 's', 'e'}) 981 case 'n': 982 return handleKeyword(w, jr, []byte{'n', 'u', 'l', 'l'}) 983 case 't': 984 return handleKeyword(w, jr, []byte{'t', 'r', 'u', 'e'}) 985 case '.': 986 return handleDot(w, jr) 987 case '+': 988 return handlePositive(w, jr) 989 case '-': 990 return handleNegative(w, jr) 991 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 992 return handleNumber(w, jr) 993 case '\'', '"': 994 return handleString(w, jr) 995 case '[': 996 return handleArray(w, jr) 997 case '{': 998 return handleObject(w, jr) 999 default: 1000 return jr.improveError(errInvalidToken) 1001 } 1002 } 1003 1004 // escapedStringBytes helps func handleString treat all string bytes quickly 1005 // and correctly, using their officially-supported JSON escape sequences 1006 // 1007 // https://www.rfc-editor.org/rfc/rfc8259#section-7 1008 var escapedStringBytes = [256][]byte{ 1009 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 1010 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 1011 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 1012 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 1013 {'\\', 'b'}, {'\\', 't'}, 1014 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 1015 {'\\', 'f'}, {'\\', 'r'}, 1016 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 1017 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 1018 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 1019 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 1020 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 1021 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 1022 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 1023 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 1024 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 1025 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 1026 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 1027 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 1028 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 1029 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 1030 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 1031 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 1032 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 1033 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 1034 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 1035 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 1036 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 1037 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 1038 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 1039 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 1040 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 1041 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 1042 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 1043 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 1044 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 1045 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 1046 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 1047 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 1048 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 1049 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 1050 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 1051 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 1052 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 1053 }