File: j0.go 1 /* 2 The MIT License (MIT) 3 4 Copyright © 2020-2025 pacman64 5 6 Permission is hereby granted, free of charge, to any person obtaining a copy of 7 this software and associated documentation files (the “Software”), to deal 8 in the Software without restriction, including without limitation the rights to 9 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 of the Software, and to permit persons to whom the Software is furnished to do 11 so, subject to the following conditions: 12 13 The above copyright notice and this permission notice shall be included in all 14 copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 SOFTWARE. 23 */ 24 25 /* 26 Single-file source-code for j0: this version has no http(s) support. Even 27 the unit-tests from the original j0 are omitted. 28 29 To compile a smaller-sized command-line app, you can use the `go` command as 30 follows: 31 32 go build -ldflags "-s -w" -trimpath j0.go 33 */ 34 35 package main 36 37 import ( 38 "bufio" 39 "bytes" 40 "errors" 41 "io" 42 "os" 43 "strconv" 44 "unicode" 45 ) 46 47 const info = ` 48 j0 [options...] [file...] 49 50 51 Json-0 converts/fixes JSON/pseudo-JSON input into minimal JSON output. 52 Its output is always a single line, which ends with a line-feed. 53 54 Besides minimizing bytes, this tool also adapts almost-JSON input into 55 valid JSON, since it 56 57 - ignores both rest-of-line and multi-line comments 58 - ignores extra/trailing commas in arrays and objects 59 - turns single-quoted strings/keys into double-quoted strings 60 - double-quotes unquoted object keys 61 - changes \x 2-hex-digit into \u 4-hex-digit string-escapes 62 63 All options available can either start with a single or a double-dash 64 65 -h show this help message 66 -help show this help message 67 -jsonl emit JSON Lines, when top-level value is an array 68 ` 69 70 const ( 71 bufSize = 32 * 1024 72 errorStyle = "\x1b[31m" 73 ) 74 75 func main() { 76 args := os.Args[1:] 77 handler := json0 78 79 if len(args) > 0 { 80 switch os.Args[1] { 81 case `-h`, `--h`, `-help`, `--help`: 82 os.Stderr.WriteString(info[1:]) 83 return 84 case `-jsonl`, `--jsonl`: 85 handler = jsonl 86 args = args[1:] 87 } 88 } 89 90 if len(args) > 1 { 91 const msg = `only 1 (optional) named input is supported` 92 os.Stderr.WriteString(errorStyle + msg + "\x1b[0m\n") 93 os.Exit(1) 94 } 95 96 name := `-` 97 if len(args) == 1 { 98 name = args[0] 99 } 100 101 if err := run(os.Stdout, name, handler); isActualError(err) { 102 os.Stderr.WriteString(errorStyle) 103 os.Stderr.WriteString(err.Error()) 104 os.Stderr.WriteString("\x1b[0m\n") 105 os.Exit(1) 106 } 107 } 108 109 type handlerFunc func(w *bufio.Writer, r *bufio.Reader) error 110 111 func run(w io.Writer, name string, handler handlerFunc) error { 112 if name == `` || name == `-` { 113 bw := bufio.NewWriterSize(w, bufSize) 114 br := bufio.NewReaderSize(os.Stdin, bufSize) 115 defer bw.Flush() 116 return handler(bw, br) 117 } 118 119 f, err := os.Open(name) 120 if err != nil { 121 return errors.New(`can't read from file named "` + name + `"`) 122 } 123 defer f.Close() 124 125 bw := bufio.NewWriterSize(w, bufSize) 126 br := bufio.NewReaderSize(f, bufSize) 127 defer bw.Flush() 128 return handler(bw, br) 129 } 130 131 var ( 132 errCommentEarlyEnd = errors.New(`unexpected early-end of comment`) 133 errInputEarlyEnd = errors.New(`expected end of input data`) 134 errInvalidComment = errors.New(`expected / or *`) 135 errInvalidHex = errors.New(`expected a base-16 digit`) 136 errInvalidRune = errors.New(`invalid UTF-8 bytes`) 137 errInvalidToken = errors.New(`invalid JSON token`) 138 errNoDigits = errors.New(`expected numeric digits`) 139 errNoStringQuote = errors.New(`expected " or '`) 140 errNoArrayComma = errors.New(`missing comma between array values`) 141 errNoObjectComma = errors.New(`missing comma between key-value pairs`) 142 errStringEarlyEnd = errors.New(`unexpected early-end of string`) 143 errExtraBytes = errors.New(`unexpected extra input bytes`) 144 145 // errNoMoreOutput is a generic dummy output-error, which is meant to be 146 // ultimately ignored, being just an excuse to quit the app immediately 147 // and successfully 148 errNoMoreOutput = errors.New(`no more output`) 149 ) 150 151 // isActualError is to figure out whether not to ignore an error, and thus 152 // show it as an error message 153 func isActualError(err error) bool { 154 return err != nil && err != io.EOF && err != errNoMoreOutput 155 } 156 157 // linePosError is a more descriptive kind of error, showing the source of 158 // the input-related problem, as 1-based a line/pos number pair in front 159 // of the error message 160 type linePosError struct { 161 // line is the 1-based line count from the input 162 line int 163 164 // pos is the 1-based `horizontal` position in its line 165 pos int 166 167 // err is the error message to `decorate` with the position info 168 err error 169 } 170 171 // Error satisfies the error interface 172 func (lpe linePosError) Error() string { 173 where := strconv.Itoa(lpe.line) + `:` + strconv.Itoa(lpe.pos) 174 return where + `: ` + lpe.err.Error() 175 } 176 177 // isIdentifier improves control-flow of func handleKey, when it handles 178 // unquoted object keys 179 var isIdentifier = [256]bool{ 180 '_': true, 181 182 '0': true, '1': true, '2': true, '3': true, '4': true, 183 '5': true, '6': true, '7': true, '8': true, '9': true, 184 185 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 186 'G': true, 'H': true, 'I': true, 'J': true, 'K': true, 'L': true, 187 'M': true, 'N': true, 'O': true, 'P': true, 'Q': true, 'R': true, 188 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, 189 'Y': true, 'Z': true, 190 191 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 192 'g': true, 'h': true, 'i': true, 'j': true, 'k': true, 'l': true, 193 'm': true, 'n': true, 'o': true, 'p': true, 'q': true, 'r': true, 194 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, 195 'y': true, 'z': true, 196 } 197 198 // matchHex both figures out if a byte is a valid ASCII hex-digit, by not 199 // being 0, and normalizes letter-case for the hex letters 200 var matchHex = [256]byte{ 201 '0': '0', '1': '1', '2': '2', '3': '3', '4': '4', 202 '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', 203 'A': 'A', 'B': 'B', 'C': 'C', 'D': 'D', 'E': 'E', 'F': 'F', 204 'a': 'A', 'b': 'B', 'c': 'C', 'd': 'D', 'e': 'E', 'f': 'F', 205 } 206 207 // json0 converts JSON/pseudo-JSON into (valid) minimal JSON 208 func json0(w *bufio.Writer, r *bufio.Reader) error { 209 jr := jsonReader{r, 1, 1} 210 defer w.Flush() 211 212 if err := jr.handleLeadingJunk(); err != nil { 213 return err 214 } 215 216 // handle a single top-level JSON value 217 err := handleValue(w, &jr) 218 219 // end the only output-line with a line-feed; this also avoids showing 220 // error messages on the same line as the main output, since JSON-0 221 // output has no line-feeds before its last byte 222 outputByte(w, '\n') 223 224 if err != nil { 225 return err 226 } 227 return jr.handleTrailingJunk() 228 } 229 230 // jsonl converts JSON/pseudo-JSON into (valid) minimal JSON Lines; this func 231 // avoids writing a trailing line-feed, leaving that up to its caller 232 func jsonl(w *bufio.Writer, r *bufio.Reader) error { 233 jr := jsonReader{r, 1, 1} 234 235 if err := jr.handleLeadingJunk(); err != nil { 236 return err 237 } 238 239 chunk, err := jr.r.Peek(1) 240 if err == nil && len(chunk) >= 1 && chunk[0] == '[' { 241 return handleArrayJSONL(w, &jr) 242 } 243 244 // handle a single top-level JSON value 245 err = handleValue(w, &jr) 246 247 // end the only output-line with a line-feed; this also avoids showing 248 // error messages on the same line as the main output, since JSON-0 249 // output has no line-feeds before its last byte 250 outputByte(w, '\n') 251 252 if err != nil { 253 return err 254 } 255 return jr.handleTrailingJunk() 256 } 257 258 // handleArrayJSONL handles top-level arrays for func jsonl 259 func handleArrayJSONL(w *bufio.Writer, jr *jsonReader) error { 260 if err := jr.demandSyntax('['); err != nil { 261 return err 262 } 263 264 for n := 0; true; n++ { 265 // there may be whitespace/comments before the next comma 266 if err := jr.seekNext(); err != nil { 267 return err 268 } 269 270 // handle commas between values, as well as trailing ones 271 comma := false 272 b, _ := jr.peekByte() 273 if b == ',' { 274 jr.readByte() 275 comma = true 276 277 // there may be whitespace/comments before an ending ']' 278 if err := jr.seekNext(); err != nil { 279 return err 280 } 281 b, _ = jr.peekByte() 282 } 283 284 // handle end of array 285 if b == ']' { 286 jr.readByte() 287 if n > 0 { 288 err := outputByte(w, '\n') 289 w.Flush() 290 return err 291 } 292 return nil 293 } 294 295 // turn commas between adjacent values into line-feeds, as the 296 // output for this custom func is supposed to be JSON Lines 297 if n > 0 { 298 if !comma { 299 return errNoArrayComma 300 } 301 if err := outputByte(w, '\n'); err != nil { 302 return err 303 } 304 w.Flush() 305 } 306 307 // handle the next value 308 if err := jr.seekNext(); err != nil { 309 return err 310 } 311 if err := handleValue(w, jr); err != nil { 312 return err 313 } 314 } 315 316 // make the compiler happy 317 return nil 318 } 319 320 // jsonReader reads data via a buffer, keeping track of the input position: 321 // this in turn allows showing much more useful errors, when these happen 322 type jsonReader struct { 323 // r is the actual reader 324 r *bufio.Reader 325 326 // line is the 1-based line-counter for input bytes, and gives errors 327 // useful position info 328 line int 329 330 // pos is the 1-based `horizontal` position in its line, and gives 331 // errors useful position info 332 pos int 333 } 334 335 // improveError makes any error more useful, by giving it info about the 336 // current input-position, as a 1-based line/within-line-position pair 337 func (jr jsonReader) improveError(err error) error { 338 if _, ok := err.(linePosError); ok { 339 return err 340 } 341 342 if err == io.EOF { 343 return linePosError{jr.line, jr.pos, errInputEarlyEnd} 344 } 345 if err != nil { 346 return linePosError{jr.line, jr.pos, err} 347 } 348 return nil 349 } 350 351 func (jr *jsonReader) handleLeadingJunk() error { 352 // input is already assumed to be UTF-8: a leading UTF-8 BOM (byte-order 353 // mark) gives no useful info if present, as UTF-8 leaves no ambiguity 354 // about byte-order by design 355 jr.skipUTF8BOM() 356 357 // ignore leading whitespace and/or comments 358 return jr.seekNext() 359 } 360 361 func (jr *jsonReader) handleTrailingJunk() error { 362 // ignore trailing whitespace and/or comments 363 if err := jr.seekNext(); err != nil { 364 return err 365 } 366 367 // ignore trailing semicolon 368 if b, ok := jr.peekByte(); ok && b == ';' { 369 jr.readByte() 370 // ignore trailing whitespace and/or comments 371 if err := jr.seekNext(); err != nil { 372 return err 373 } 374 } 375 376 // beyond trailing whitespace and/or comments, any more bytes 377 // make the whole input data invalid JSON 378 if _, ok := jr.peekByte(); ok { 379 return jr.improveError(errExtraBytes) 380 } 381 return nil 382 } 383 384 // demandSyntax fails with an error when the next byte isn't the one given; 385 // when it is, the byte is then read/skipped, and a nil error is returned 386 func (jr *jsonReader) demandSyntax(syntax byte) error { 387 chunk, err := jr.r.Peek(1) 388 if err == io.EOF { 389 return jr.improveError(errInputEarlyEnd) 390 } 391 if err != nil { 392 return jr.improveError(err) 393 } 394 395 if len(chunk) < 1 || chunk[0] != syntax { 396 msg := `expected ` + string(rune(syntax)) 397 return jr.improveError(errors.New(msg)) 398 } 399 400 jr.readByte() 401 return nil 402 } 403 404 // updatePosInfo does what it says, given the byte just read separately 405 func (jr *jsonReader) updatePosInfo(r rune) { 406 if r == '\n' { 407 jr.line += 1 408 jr.pos = 1 409 } else { 410 jr.pos++ 411 } 412 } 413 414 // peekByte simplifies control-flow for various other funcs 415 func (jr jsonReader) peekByte() (b byte, ok bool) { 416 chunk, err := jr.r.Peek(1) 417 if err == nil && len(chunk) >= 1 { 418 return chunk[0], true 419 } 420 return 0, false 421 } 422 423 // readByte does what it says, updating the reader's position info 424 func (jr *jsonReader) readByte() (b byte, err error) { 425 b, err = jr.r.ReadByte() 426 if err == nil { 427 jr.updatePosInfo(rune(b)) 428 return b, nil 429 } 430 return b, jr.improveError(err) 431 } 432 433 // readRune does what it says, updating the reader's position info 434 func (jr *jsonReader) readRune() (r rune, err error) { 435 r, _, err = jr.r.ReadRune() 436 if err == nil { 437 jr.updatePosInfo(r) 438 return r, nil 439 } 440 return r, jr.improveError(err) 441 } 442 443 // seekNext skips/seeks the next token, ignoring runs of whitespace symbols 444 // and comments, either single-line (starting with //) or general (starting 445 // with /* and ending with */) 446 func (jr *jsonReader) seekNext() error { 447 for { 448 b, ok := jr.peekByte() 449 if !ok { 450 return nil 451 } 452 453 // case ' ', '\t', '\f', '\v', '\r', '\n': 454 if b <= 32 { 455 // keep skipping whitespace bytes 456 jr.readByte() 457 continue 458 } 459 460 if b == '#' { 461 if err := jr.skipLine(); err != nil { 462 return err 463 } 464 continue 465 } 466 467 if b != '/' { 468 // reached the next token 469 return nil 470 } 471 472 if err := jr.skipComment(); err != nil { 473 return err 474 } 475 476 // after comments, keep looking for more whitespace and/or comments 477 } 478 } 479 480 // skipComment helps func seekNext skip over comments, simplifying the latter 481 // func's control-flow 482 func (jr *jsonReader) skipComment() error { 483 err := jr.demandSyntax('/') 484 if err != nil { 485 return err 486 } 487 488 b, ok := jr.peekByte() 489 if !ok { 490 return nil 491 } 492 493 switch b { 494 case '/': 495 // handle single-line comments 496 return jr.skipLine() 497 498 case '*': 499 // handle (potentially) multi-line comments 500 return jr.skipGeneralComment() 501 502 default: 503 return jr.improveError(errInvalidComment) 504 } 505 } 506 507 // skipLine handles single-line comments for func skipComment 508 func (jr *jsonReader) skipLine() error { 509 for { 510 b, err := jr.readByte() 511 if err == io.EOF { 512 // end of input is fine in this case 513 return nil 514 } 515 if err != nil { 516 return err 517 } 518 519 if b == '\n' { 520 return nil 521 } 522 } 523 } 524 525 // skipGeneralComment handles (potentially) multi-line comments for func 526 // skipComment 527 func (jr *jsonReader) skipGeneralComment() error { 528 var prev byte 529 for { 530 b, err := jr.readByte() 531 if err != nil { 532 return jr.improveError(errCommentEarlyEnd) 533 } 534 535 if prev == '*' && b == '/' { 536 return nil 537 } 538 if b == '\n' { 539 jr.line++ 540 } 541 prev = b 542 } 543 } 544 545 // skipUTF8BOM does what it says, if a UTF-8 BOM is present 546 func (jr *jsonReader) skipUTF8BOM() { 547 lead, err := jr.r.Peek(3) 548 if err == nil && bytes.HasPrefix(lead, []byte{0xef, 0xbb, 0xbf}) { 549 jr.readByte() 550 jr.readByte() 551 jr.readByte() 552 jr.pos += 3 553 } 554 } 555 556 // outputByte is a small wrapper on func WriteByte, which adapts any error 557 // into a custom dummy output-error, which is in turn meant to be ignored, 558 // being just an excuse to quit the app immediately and successfully 559 func outputByte(w *bufio.Writer, b byte) error { 560 err := w.WriteByte(b) 561 if err == nil { 562 return nil 563 } 564 return errNoMoreOutput 565 } 566 567 // handleArray handles arrays for func handleValue 568 func handleArray(w *bufio.Writer, jr *jsonReader) error { 569 if err := jr.demandSyntax('['); err != nil { 570 return err 571 } 572 w.WriteByte('[') 573 574 for n := 0; true; n++ { 575 // there may be whitespace/comments before the next comma 576 if err := jr.seekNext(); err != nil { 577 return err 578 } 579 580 // handle commas between values, as well as trailing ones 581 comma := false 582 b, _ := jr.peekByte() 583 if b == ',' { 584 jr.readByte() 585 comma = true 586 587 // there may be whitespace/comments before an ending ']' 588 if err := jr.seekNext(); err != nil { 589 return err 590 } 591 b, _ = jr.peekByte() 592 } 593 594 // handle end of array 595 if b == ']' { 596 jr.readByte() 597 w.WriteByte(']') 598 return nil 599 } 600 601 // don't forget commas between adjacent values 602 if n > 0 { 603 if !comma { 604 return errNoArrayComma 605 } 606 if err := outputByte(w, ','); err != nil { 607 return err 608 } 609 } 610 611 // handle the next value 612 if err := jr.seekNext(); err != nil { 613 return err 614 } 615 if err := handleValue(w, jr); err != nil { 616 return err 617 } 618 } 619 620 // make the compiler happy 621 return nil 622 } 623 624 // handleDigits helps various number-handling funcs do their job 625 func handleDigits(w *bufio.Writer, jr *jsonReader) error { 626 for n := 0; true; n++ { 627 b, _ := jr.peekByte() 628 629 // support `nice` long numbers by ignoring their underscores 630 if b == '_' { 631 jr.readByte() 632 continue 633 } 634 635 if '0' <= b && b <= '9' { 636 jr.readByte() 637 w.WriteByte(b) 638 continue 639 } 640 641 if n == 0 { 642 return errNoDigits 643 } 644 return nil 645 } 646 647 // make the compiler happy 648 return nil 649 } 650 651 // handleDot handles pseudo-JSON numbers which start with a decimal dot 652 func handleDot(w *bufio.Writer, jr *jsonReader) error { 653 if err := jr.demandSyntax('.'); err != nil { 654 return err 655 } 656 w.Write([]byte{'0', '.'}) 657 return handleDigits(w, jr) 658 } 659 660 // handleKey is used by func handleObjects and generalizes func handleString, 661 // by allowing unquoted object keys; it's not used anywhere else, as allowing 662 // unquoted string values is ambiguous with actual JSON-keyword values null, 663 // false, and true. 664 func handleKey(w *bufio.Writer, jr *jsonReader) error { 665 quote, ok := jr.peekByte() 666 if !ok { 667 return jr.improveError(errStringEarlyEnd) 668 } 669 670 if quote == '"' || quote == '\'' { 671 return handleString(w, jr) 672 } 673 674 w.WriteByte('"') 675 for { 676 if b, _ := jr.peekByte(); isIdentifier[b] { 677 jr.readByte() 678 w.WriteByte(b) 679 continue 680 } 681 682 w.WriteByte('"') 683 return nil 684 } 685 } 686 687 // trySimpleInner tries to handle (more quickly) inner-strings where all bytes 688 // are unescaped ASCII symbols: this is a very common case for strings, and is 689 // almost always the case for object keys; returns whether it succeeded, so 690 // this func's caller knows knows if it needs to do anything, the slower way 691 func trySimpleInner(w *bufio.Writer, jr *jsonReader, quote byte) (gotIt bool) { 692 chunk, _ := jr.r.Peek(64) 693 694 for i, b := range chunk { 695 if b < 32 || b > 127 || b == '\\' { 696 return false 697 } 698 if b != quote { 699 continue 700 } 701 702 // bulk-writing the chunk is this func's whole point 703 w.WriteByte('"') 704 w.Write(chunk[:i]) 705 w.WriteByte('"') 706 707 jr.r.Discard(i + 1) 708 return true 709 } 710 711 // maybe the inner-string is ok, but it's just longer than the chunk 712 return false 713 } 714 715 // handleKeyword is used by funcs handleFalse, handleNull, and handleTrue 716 func handleKeyword(w *bufio.Writer, jr *jsonReader, kw []byte) error { 717 for rest := kw; len(rest) > 0; rest = rest[1:] { 718 b, err := jr.readByte() 719 if err == nil && b == rest[0] { 720 // keywords given to this func have no line-feeds 721 jr.pos++ 722 continue 723 } 724 725 msg := `expected JSON value ` + string(kw) 726 return jr.improveError(errors.New(msg)) 727 } 728 729 w.Write(kw) 730 return nil 731 } 732 733 // handleNegative handles numbers starting with a negative sign for func 734 // handleValue 735 func handleNegative(w *bufio.Writer, jr *jsonReader) error { 736 if err := jr.demandSyntax('-'); err != nil { 737 return err 738 } 739 740 w.WriteByte('-') 741 if b, _ := jr.peekByte(); b == '.' { 742 jr.readByte() 743 w.Write([]byte{'0', '.'}) 744 return handleDigits(w, jr) 745 } 746 return handleNumber(w, jr) 747 } 748 749 // handleNumber handles numeric values/tokens, including invalid-JSON cases, 750 // such as values starting with a decimal dot 751 func handleNumber(w *bufio.Writer, jr *jsonReader) error { 752 // handle integer digits 753 if err := handleDigits(w, jr); err != nil { 754 return err 755 } 756 757 // handle optional decimal digits, starting with a leading dot 758 if b, _ := jr.peekByte(); b == '.' { 759 jr.readByte() 760 w.WriteByte('.') 761 return handleDigits(w, jr) 762 } 763 764 // handle optional exponent digits 765 if b, _ := jr.peekByte(); b == 'e' || b == 'E' { 766 jr.readByte() 767 w.WriteByte(b) 768 b, _ = jr.peekByte() 769 if b == '+' { 770 jr.readByte() 771 } else if b == '-' { 772 w.WriteByte('-') 773 jr.readByte() 774 } 775 return handleDigits(w, jr) 776 } 777 778 return nil 779 } 780 781 // handleObject handles objects for func handleValue 782 func handleObject(w *bufio.Writer, jr *jsonReader) error { 783 if err := jr.demandSyntax('{'); err != nil { 784 return err 785 } 786 w.WriteByte('{') 787 788 for npairs := 0; true; npairs++ { 789 // there may be whitespace/comments before the next comma 790 if err := jr.seekNext(); err != nil { 791 return err 792 } 793 794 // handle commas between key-value pairs, as well as trailing ones 795 comma := false 796 b, _ := jr.peekByte() 797 if b == ',' { 798 jr.readByte() 799 comma = true 800 801 // there may be whitespace/comments before an ending '}' 802 if err := jr.seekNext(); err != nil { 803 return err 804 } 805 b, _ = jr.peekByte() 806 } 807 808 // handle end of object 809 if b == '}' { 810 jr.readByte() 811 w.WriteByte('}') 812 return nil 813 } 814 815 // don't forget commas between adjacent key-value pairs 816 if npairs > 0 { 817 if !comma { 818 return errNoObjectComma 819 } 820 if err := outputByte(w, ','); err != nil { 821 return err 822 } 823 } 824 825 // handle the next pair's key 826 if err := jr.seekNext(); err != nil { 827 return err 828 } 829 if err := handleKey(w, jr); err != nil { 830 return err 831 } 832 833 // demand a colon right after the key 834 if err := jr.seekNext(); err != nil { 835 return err 836 } 837 if err := jr.demandSyntax(':'); err != nil { 838 return err 839 } 840 w.WriteByte(':') 841 842 // handle the next pair's value 843 if err := jr.seekNext(); err != nil { 844 return err 845 } 846 if err := handleValue(w, jr); err != nil { 847 return err 848 } 849 } 850 851 // make the compiler happy 852 return nil 853 } 854 855 // handlePositive handles numbers starting with a positive sign for func 856 // handleValue 857 func handlePositive(w *bufio.Writer, jr *jsonReader) error { 858 if err := jr.demandSyntax('+'); err != nil { 859 return err 860 } 861 862 // valid JSON isn't supposed to have leading pluses on numbers, so 863 // emit nothing for it, unlike for negative numbers 864 865 if b, _ := jr.peekByte(); b == '.' { 866 jr.readByte() 867 w.Write([]byte{'0', '.'}) 868 return handleDigits(w, jr) 869 } 870 return handleNumber(w, jr) 871 } 872 873 // handleString handles strings for funcs handleValue and handleObject, and 874 // supports both single-quotes and double-quotes, always emitting the latter 875 // in the output, of course 876 func handleString(w *bufio.Writer, jr *jsonReader) error { 877 q, ok := jr.peekByte() 878 if !ok || (q != '"' && q != '\'') { 879 return errNoStringQuote 880 } 881 882 jr.readByte() 883 quote := rune(q) 884 885 // try the quicker all-unescaped-ASCII handler 886 if trySimpleInner(w, jr, q) { 887 return nil 888 } 889 890 // it's a non-trivial inner-string, so handle it byte-by-byte 891 w.WriteByte('"') 892 escaped := false 893 894 for { 895 r, err := jr.readRune() 896 if r == unicode.ReplacementChar { 897 return jr.improveError(errInvalidRune) 898 } 899 if err != nil { 900 if err == io.EOF { 901 return jr.improveError(errStringEarlyEnd) 902 } 903 return jr.improveError(err) 904 } 905 906 if !escaped { 907 if r == '\\' { 908 escaped = true 909 continue 910 } 911 912 // handle end of string 913 if r == quote { 914 return outputByte(w, '"') 915 } 916 917 if r <= 127 { 918 w.Write(escapedStringBytes[byte(r)]) 919 } else { 920 w.WriteRune(r) 921 } 922 continue 923 } 924 925 // handle escaped items 926 escaped = false 927 928 switch r { 929 case 'u': 930 // \u needs exactly 4 hex-digits to follow it 931 w.Write([]byte{'\\', 'u'}) 932 if err := copyHex(w, 4, jr); err != nil { 933 return jr.improveError(err) 934 } 935 936 case 'x': 937 // JSON only supports 4 escaped hex-digits, so pad the 2 938 // expected hex-digits with 2 zeros 939 w.Write([]byte{'\\', 'u', '0', '0'}) 940 if err := copyHex(w, 2, jr); err != nil { 941 return jr.improveError(err) 942 } 943 944 case 't', 'f', 'r', 'n', 'b', '\\', '"': 945 // handle valid-JSON escaped string sequences 946 w.WriteByte('\\') 947 w.WriteByte(byte(r)) 948 949 case '\'': 950 // escaped single-quotes aren't standard JSON, but they can 951 // be handy when the input uses non-standard single-quoted 952 // strings 953 w.WriteByte('\'') 954 955 default: 956 if r <= 127 { 957 w.Write(escapedStringBytes[byte(r)]) 958 } else { 959 w.WriteRune(r) 960 } 961 } 962 } 963 } 964 965 // copyHex handles a run of hex-digits for func handleString, starting right 966 // after the leading `\u` (or `\x`) part; this func doesn't `improve` its 967 // errors with position info: that's up to the caller 968 func copyHex(w *bufio.Writer, n int, jr *jsonReader) error { 969 for i := 0; i < n; i++ { 970 b, err := jr.readByte() 971 if err == io.EOF { 972 return errStringEarlyEnd 973 } 974 if err != nil { 975 return err 976 } 977 978 if b >= 128 { 979 return errInvalidHex 980 } 981 982 if b := matchHex[b]; b != 0 { 983 w.WriteByte(b) 984 continue 985 } 986 987 return errInvalidHex 988 } 989 990 return nil 991 } 992 993 // handleValue is a generic JSON-token handler, which allows the recursive 994 // behavior to handle any kind of JSON/pseudo-JSON input 995 func handleValue(w *bufio.Writer, jr *jsonReader) error { 996 chunk, err := jr.r.Peek(1) 997 if err == nil && len(chunk) >= 1 { 998 return handleValueDispatch(w, jr, chunk[0]) 999 } 1000 1001 if err == io.EOF { 1002 return jr.improveError(errInputEarlyEnd) 1003 } 1004 return jr.improveError(errInputEarlyEnd) 1005 } 1006 1007 // handleValueDispatch simplifies control-flow for func handleValue 1008 func handleValueDispatch(w *bufio.Writer, jr *jsonReader, b byte) error { 1009 switch b { 1010 case 'f': 1011 return handleKeyword(w, jr, []byte{'f', 'a', 'l', 's', 'e'}) 1012 case 'n': 1013 return handleKeyword(w, jr, []byte{'n', 'u', 'l', 'l'}) 1014 case 't': 1015 return handleKeyword(w, jr, []byte{'t', 'r', 'u', 'e'}) 1016 case 'F': 1017 return handleKeyword(w, jr, []byte{'F', 'a', 'l', 's', 'e'}) 1018 case 'N': 1019 return handleKeyword(w, jr, []byte{'N', 'o', 'n', 'e'}) 1020 case 'T': 1021 return handleKeyword(w, jr, []byte{'T', 'r', 'u', 'e'}) 1022 case '.': 1023 return handleDot(w, jr) 1024 case '+': 1025 return handlePositive(w, jr) 1026 case '-': 1027 return handleNegative(w, jr) 1028 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 1029 return handleNumber(w, jr) 1030 case '\'', '"': 1031 return handleString(w, jr) 1032 case '[': 1033 return handleArray(w, jr) 1034 case '{': 1035 return handleObject(w, jr) 1036 default: 1037 return jr.improveError(errInvalidToken) 1038 } 1039 } 1040 1041 // escapedStringBytes helps func handleString treat all string bytes quickly 1042 // and correctly, using their officially-supported JSON escape sequences 1043 // 1044 // https://www.rfc-editor.org/rfc/rfc8259#section-7 1045 var escapedStringBytes = [256][]byte{ 1046 {'\\', 'u', '0', '0', '0', '0'}, {'\\', 'u', '0', '0', '0', '1'}, 1047 {'\\', 'u', '0', '0', '0', '2'}, {'\\', 'u', '0', '0', '0', '3'}, 1048 {'\\', 'u', '0', '0', '0', '4'}, {'\\', 'u', '0', '0', '0', '5'}, 1049 {'\\', 'u', '0', '0', '0', '6'}, {'\\', 'u', '0', '0', '0', '7'}, 1050 {'\\', 'b'}, {'\\', 't'}, 1051 {'\\', 'n'}, {'\\', 'u', '0', '0', '0', 'b'}, 1052 {'\\', 'f'}, {'\\', 'r'}, 1053 {'\\', 'u', '0', '0', '0', 'e'}, {'\\', 'u', '0', '0', '0', 'f'}, 1054 {'\\', 'u', '0', '0', '1', '0'}, {'\\', 'u', '0', '0', '1', '1'}, 1055 {'\\', 'u', '0', '0', '1', '2'}, {'\\', 'u', '0', '0', '1', '3'}, 1056 {'\\', 'u', '0', '0', '1', '4'}, {'\\', 'u', '0', '0', '1', '5'}, 1057 {'\\', 'u', '0', '0', '1', '6'}, {'\\', 'u', '0', '0', '1', '7'}, 1058 {'\\', 'u', '0', '0', '1', '8'}, {'\\', 'u', '0', '0', '1', '9'}, 1059 {'\\', 'u', '0', '0', '1', 'a'}, {'\\', 'u', '0', '0', '1', 'b'}, 1060 {'\\', 'u', '0', '0', '1', 'c'}, {'\\', 'u', '0', '0', '1', 'd'}, 1061 {'\\', 'u', '0', '0', '1', 'e'}, {'\\', 'u', '0', '0', '1', 'f'}, 1062 {32}, {33}, {'\\', '"'}, {35}, {36}, {37}, {38}, {39}, 1063 {40}, {41}, {42}, {43}, {44}, {45}, {46}, {47}, 1064 {48}, {49}, {50}, {51}, {52}, {53}, {54}, {55}, 1065 {56}, {57}, {58}, {59}, {60}, {61}, {62}, {63}, 1066 {64}, {65}, {66}, {67}, {68}, {69}, {70}, {71}, 1067 {72}, {73}, {74}, {75}, {76}, {77}, {78}, {79}, 1068 {80}, {81}, {82}, {83}, {84}, {85}, {86}, {87}, 1069 {88}, {89}, {90}, {91}, {'\\', '\\'}, {93}, {94}, {95}, 1070 {96}, {97}, {98}, {99}, {100}, {101}, {102}, {103}, 1071 {104}, {105}, {106}, {107}, {108}, {109}, {110}, {111}, 1072 {112}, {113}, {114}, {115}, {116}, {117}, {118}, {119}, 1073 {120}, {121}, {122}, {123}, {124}, {125}, {126}, {127}, 1074 {128}, {129}, {130}, {131}, {132}, {133}, {134}, {135}, 1075 {136}, {137}, {138}, {139}, {140}, {141}, {142}, {143}, 1076 {144}, {145}, {146}, {147}, {148}, {149}, {150}, {151}, 1077 {152}, {153}, {154}, {155}, {156}, {157}, {158}, {159}, 1078 {160}, {161}, {162}, {163}, {164}, {165}, {166}, {167}, 1079 {168}, {169}, {170}, {171}, {172}, {173}, {174}, {175}, 1080 {176}, {177}, {178}, {179}, {180}, {181}, {182}, {183}, 1081 {184}, {185}, {186}, {187}, {188}, {189}, {190}, {191}, 1082 {192}, {193}, {194}, {195}, {196}, {197}, {198}, {199}, 1083 {200}, {201}, {202}, {203}, {204}, {205}, {206}, {207}, 1084 {208}, {209}, {210}, {211}, {212}, {213}, {214}, {215}, 1085 {216}, {217}, {218}, {219}, {220}, {221}, {222}, {223}, 1086 {224}, {225}, {226}, {227}, {228}, {229}, {230}, {231}, 1087 {232}, {233}, {234}, {235}, {236}, {237}, {238}, {239}, 1088 {240}, {241}, {242}, {243}, {244}, {245}, {246}, {247}, 1089 {248}, {249}, {250}, {251}, {252}, {253}, {254}, {255}, 1090 }