json_reader.cpp revision f59fb0e83fd0a4b41700d3f5eebdc8d21b173c2e
1// Copyright 2007-2011 Baptiste Lepilleur 2// Distributed under MIT license, or public domain if desired and 3// recognized in your jurisdiction. 4// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 5 6#if !defined(JSON_IS_AMALGAMATION) 7# include <json/assertions.h> 8# include <json/reader.h> 9# include <json/value.h> 10# include "json_tool.h" 11#endif // if !defined(JSON_IS_AMALGAMATION) 12#include <utility> 13#include <cstdio> 14#include <cassert> 15#include <cstring> 16#include <stdexcept> 17 18#if _MSC_VER >= 1400 // VC++ 8.0 19#pragma warning( disable : 4996 ) // disable warning about strdup being deprecated. 20#endif 21 22namespace Json { 23 24// Implementation of class Features 25// //////////////////////////////// 26 27Features::Features() 28 : allowComments_( true ) 29 , strictRoot_( false ) 30{ 31} 32 33 34Features 35Features::all() 36{ 37 return Features(); 38} 39 40 41Features 42Features::strictMode() 43{ 44 Features features; 45 features.allowComments_ = false; 46 features.strictRoot_ = true; 47 return features; 48} 49 50// Implementation of class Reader 51// //////////////////////////////// 52 53 54static inline bool 55in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 ) 56{ 57 return c == c1 || c == c2 || c == c3 || c == c4; 58} 59 60static inline bool 61in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 ) 62{ 63 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5; 64} 65 66 67static bool 68containsNewLine( Reader::Location begin, 69 Reader::Location end ) 70{ 71 for ( ;begin < end; ++begin ) 72 if ( *begin == '\n' || *begin == '\r' ) 73 return true; 74 return false; 75} 76 77 78// Class Reader 79// ////////////////////////////////////////////////////////////////// 80 81Reader::Reader() 82 : errors_(), 83 document_(), 84 begin_(), 85 end_(), 86 current_(), 87 lastValueEnd_(), 88 lastValue_(), 89 commentsBefore_(), 90 features_( Features::all() ), 91 collectComments_() 92{ 93} 94 95 96Reader::Reader( const Features &features ) 97 : errors_(), 98 document_(), 99 begin_(), 100 end_(), 101 current_(), 102 lastValueEnd_(), 103 lastValue_(), 104 commentsBefore_(), 105 features_( features ), 106 collectComments_() 107{ 108} 109 110 111bool 112Reader::parse( const std::string &document, 113 Value &root, 114 bool collectComments ) 115{ 116 document_ = document; 117 const char *begin = document_.c_str(); 118 const char *end = begin + document_.length(); 119 return parse( begin, end, root, collectComments ); 120} 121 122 123bool 124Reader::parse( std::istream& sin, 125 Value &root, 126 bool collectComments ) 127{ 128 //std::istream_iterator<char> begin(sin); 129 //std::istream_iterator<char> end; 130 // Those would allow streamed input from a file, if parse() were a 131 // template function. 132 133 // Since std::string is reference-counted, this at least does not 134 // create an extra copy. 135 std::string doc; 136 std::getline(sin, doc, (char)EOF); 137 return parse( doc, root, collectComments ); 138} 139 140bool 141Reader::parse( const char *beginDoc, const char *endDoc, 142 Value &root, 143 bool collectComments ) 144{ 145 if ( !features_.allowComments_ ) 146 { 147 collectComments = false; 148 } 149 150 begin_ = beginDoc; 151 end_ = endDoc; 152 collectComments_ = collectComments; 153 current_ = begin_; 154 lastValueEnd_ = 0; 155 lastValue_ = 0; 156 commentsBefore_ = ""; 157 errors_.clear(); 158 while ( !nodes_.empty() ) 159 nodes_.pop(); 160 nodes_.push( &root ); 161 162 bool successful = readValue(); 163 Token token; 164 skipCommentTokens( token ); 165 if ( collectComments_ && !commentsBefore_.empty() ) 166 root.setComment( commentsBefore_, commentAfter ); 167 if ( features_.strictRoot_ ) 168 { 169 if ( !root.isArray() && !root.isObject() ) 170 { 171 // Set error location to start of doc, ideally should be first token found in doc 172 token.type_ = tokenError; 173 token.start_ = beginDoc; 174 token.end_ = endDoc; 175 addError( "A valid JSON document must be either an array or an object value.", 176 token ); 177 return false; 178 } 179 } 180 return successful; 181} 182 183 184bool 185Reader::readValue() 186{ 187 Token token; 188 skipCommentTokens( token ); 189 bool successful = true; 190 191 if ( collectComments_ && !commentsBefore_.empty() ) 192 { 193 currentValue().setComment( commentsBefore_, commentBefore ); 194 commentsBefore_ = ""; 195 } 196 197 198 switch ( token.type_ ) 199 { 200 case tokenObjectBegin: 201 successful = readObject( token ); 202 break; 203 case tokenArrayBegin: 204 successful = readArray( token ); 205 break; 206 case tokenNumber: 207 successful = decodeNumber( token ); 208 break; 209 case tokenString: 210 successful = decodeString( token ); 211 break; 212 case tokenTrue: 213 currentValue() = true; 214 break; 215 case tokenFalse: 216 currentValue() = false; 217 break; 218 case tokenNull: 219 currentValue() = Value(); 220 break; 221 default: 222 return addError( "Syntax error: value, object or array expected.", token ); 223 } 224 225 if ( collectComments_ ) 226 { 227 lastValueEnd_ = current_; 228 lastValue_ = ¤tValue(); 229 } 230 231 return successful; 232} 233 234 235void 236Reader::skipCommentTokens( Token &token ) 237{ 238 if ( features_.allowComments_ ) 239 { 240 do 241 { 242 readToken( token ); 243 } 244 while ( token.type_ == tokenComment ); 245 } 246 else 247 { 248 readToken( token ); 249 } 250} 251 252 253bool 254Reader::expectToken( TokenType type, Token &token, const char *message ) 255{ 256 readToken( token ); 257 if ( token.type_ != type ) 258 return addError( message, token ); 259 return true; 260} 261 262 263bool 264Reader::readToken( Token &token ) 265{ 266 skipSpaces(); 267 token.start_ = current_; 268 Char c = getNextChar(); 269 bool ok = true; 270 switch ( c ) 271 { 272 case '{': 273 token.type_ = tokenObjectBegin; 274 break; 275 case '}': 276 token.type_ = tokenObjectEnd; 277 break; 278 case '[': 279 token.type_ = tokenArrayBegin; 280 break; 281 case ']': 282 token.type_ = tokenArrayEnd; 283 break; 284 case '"': 285 token.type_ = tokenString; 286 ok = readString(); 287 break; 288 case '/': 289 token.type_ = tokenComment; 290 ok = readComment(); 291 break; 292 case '0': 293 case '1': 294 case '2': 295 case '3': 296 case '4': 297 case '5': 298 case '6': 299 case '7': 300 case '8': 301 case '9': 302 case '-': 303 token.type_ = tokenNumber; 304 readNumber(); 305 break; 306 case 't': 307 token.type_ = tokenTrue; 308 ok = match( "rue", 3 ); 309 break; 310 case 'f': 311 token.type_ = tokenFalse; 312 ok = match( "alse", 4 ); 313 break; 314 case 'n': 315 token.type_ = tokenNull; 316 ok = match( "ull", 3 ); 317 break; 318 case ',': 319 token.type_ = tokenArraySeparator; 320 break; 321 case ':': 322 token.type_ = tokenMemberSeparator; 323 break; 324 case 0: 325 token.type_ = tokenEndOfStream; 326 break; 327 default: 328 ok = false; 329 break; 330 } 331 if ( !ok ) 332 token.type_ = tokenError; 333 token.end_ = current_; 334 return true; 335} 336 337 338void 339Reader::skipSpaces() 340{ 341 while ( current_ != end_ ) 342 { 343 Char c = *current_; 344 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) 345 ++current_; 346 else 347 break; 348 } 349} 350 351 352bool 353Reader::match( Location pattern, 354 int patternLength ) 355{ 356 if ( end_ - current_ < patternLength ) 357 return false; 358 int index = patternLength; 359 while ( index-- ) 360 if ( current_[index] != pattern[index] ) 361 return false; 362 current_ += patternLength; 363 return true; 364} 365 366 367bool 368Reader::readComment() 369{ 370 Location commentBegin = current_ - 1; 371 Char c = getNextChar(); 372 bool successful = false; 373 if ( c == '*' ) 374 successful = readCStyleComment(); 375 else if ( c == '/' ) 376 successful = readCppStyleComment(); 377 if ( !successful ) 378 return false; 379 380 if ( collectComments_ ) 381 { 382 CommentPlacement placement = commentBefore; 383 if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) ) 384 { 385 if ( c != '*' || !containsNewLine( commentBegin, current_ ) ) 386 placement = commentAfterOnSameLine; 387 } 388 389 addComment( commentBegin, current_, placement ); 390 } 391 return true; 392} 393 394 395void 396Reader::addComment( Location begin, 397 Location end, 398 CommentPlacement placement ) 399{ 400 assert( collectComments_ ); 401 if ( placement == commentAfterOnSameLine ) 402 { 403 assert( lastValue_ != 0 ); 404 lastValue_->setComment( std::string( begin, end ), placement ); 405 } 406 else 407 { 408 if ( !commentsBefore_.empty() ) 409 commentsBefore_ += "\n"; 410 commentsBefore_ += std::string( begin, end ); 411 } 412} 413 414 415bool 416Reader::readCStyleComment() 417{ 418 while ( current_ != end_ ) 419 { 420 Char c = getNextChar(); 421 if ( c == '*' && *current_ == '/' ) 422 break; 423 } 424 return getNextChar() == '/'; 425} 426 427 428bool 429Reader::readCppStyleComment() 430{ 431 while ( current_ != end_ ) 432 { 433 Char c = getNextChar(); 434 if ( c == '\r' || c == '\n' ) 435 break; 436 } 437 return true; 438} 439 440 441void 442Reader::readNumber() 443{ 444 while ( current_ != end_ ) 445 { 446 if ( !(*current_ >= '0' && *current_ <= '9') && 447 !in( *current_, '.', 'e', 'E', '+', '-' ) ) 448 break; 449 ++current_; 450 } 451} 452 453bool 454Reader::readString() 455{ 456 Char c = 0; 457 while ( current_ != end_ ) 458 { 459 c = getNextChar(); 460 if ( c == '\\' ) 461 getNextChar(); 462 else if ( c == '"' ) 463 break; 464 } 465 return c == '"'; 466} 467 468 469bool 470Reader::readObject( Token &/*tokenStart*/ ) 471{ 472 Token tokenName; 473 std::string name; 474 currentValue() = Value( objectValue ); 475 while ( readToken( tokenName ) ) 476 { 477 bool initialTokenOk = true; 478 while ( tokenName.type_ == tokenComment && initialTokenOk ) 479 initialTokenOk = readToken( tokenName ); 480 if ( !initialTokenOk ) 481 break; 482 if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object 483 return true; 484 if ( tokenName.type_ != tokenString ) 485 break; 486 487 name = ""; 488 if ( !decodeString( tokenName, name ) ) 489 return recoverFromError( tokenObjectEnd ); 490 491 Token colon; 492 if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator ) 493 { 494 return addErrorAndRecover( "Missing ':' after object member name", 495 colon, 496 tokenObjectEnd ); 497 } 498 Value &value = currentValue()[ name ]; 499 nodes_.push( &value ); 500 bool ok = readValue(); 501 nodes_.pop(); 502 if ( !ok ) // error already set 503 return recoverFromError( tokenObjectEnd ); 504 505 Token comma; 506 if ( !readToken( comma ) 507 || ( comma.type_ != tokenObjectEnd && 508 comma.type_ != tokenArraySeparator && 509 comma.type_ != tokenComment ) ) 510 { 511 return addErrorAndRecover( "Missing ',' or '}' in object declaration", 512 comma, 513 tokenObjectEnd ); 514 } 515 bool finalizeTokenOk = true; 516 while ( comma.type_ == tokenComment && 517 finalizeTokenOk ) 518 finalizeTokenOk = readToken( comma ); 519 if ( comma.type_ == tokenObjectEnd ) 520 return true; 521 } 522 return addErrorAndRecover( "Missing '}' or object member name", 523 tokenName, 524 tokenObjectEnd ); 525} 526 527 528bool 529Reader::readArray( Token &/*tokenStart*/ ) 530{ 531 currentValue() = Value( arrayValue ); 532 skipSpaces(); 533 if ( *current_ == ']' ) // empty array 534 { 535 Token endArray; 536 readToken( endArray ); 537 return true; 538 } 539 int index = 0; 540 for (;;) 541 { 542 Value &value = currentValue()[ index++ ]; 543 nodes_.push( &value ); 544 bool ok = readValue(); 545 nodes_.pop(); 546 if ( !ok ) // error already set 547 return recoverFromError( tokenArrayEnd ); 548 549 Token token; 550 // Accept Comment after last item in the array. 551 ok = readToken( token ); 552 while ( token.type_ == tokenComment && ok ) 553 { 554 ok = readToken( token ); 555 } 556 bool badTokenType = ( token.type_ != tokenArraySeparator && 557 token.type_ != tokenArrayEnd ); 558 if ( !ok || badTokenType ) 559 { 560 return addErrorAndRecover( "Missing ',' or ']' in array declaration", 561 token, 562 tokenArrayEnd ); 563 } 564 if ( token.type_ == tokenArrayEnd ) 565 break; 566 } 567 return true; 568} 569 570 571bool 572Reader::decodeNumber( Token &token ) 573{ 574 bool isDouble = false; 575 for ( Location inspect = token.start_; inspect != token.end_; ++inspect ) 576 { 577 isDouble = isDouble 578 || in( *inspect, '.', 'e', 'E', '+' ) 579 || ( *inspect == '-' && inspect != token.start_ ); 580 } 581 if ( isDouble ) 582 return decodeDouble( token ); 583 // Attempts to parse the number as an integer. If the number is 584 // larger than the maximum supported value of an integer then 585 // we decode the number as a double. 586 Location current = token.start_; 587 bool isNegative = *current == '-'; 588 if ( isNegative ) 589 ++current; 590 Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt) 591 : Value::maxLargestUInt; 592 Value::LargestUInt threshold = maxIntegerValue / 10; 593 Value::LargestUInt value = 0; 594 while ( current < token.end_ ) 595 { 596 Char c = *current++; 597 if ( c < '0' || c > '9' ) 598 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); 599 Value::UInt digit(c - '0'); 600 if ( value >= threshold ) 601 { 602 // We've hit or exceeded the max value divided by 10 (rounded down). If 603 // a) we've only just touched the limit, b) this is the last digit, and 604 // c) it's small enough to fit in that rounding delta, we're okay. 605 // Otherwise treat this number as a double to avoid overflow. 606 if (value > threshold || 607 current != token.end_ || 608 digit > maxIntegerValue % 10) 609 { 610 return decodeDouble( token ); 611 } 612 } 613 value = value * 10 + digit; 614 } 615 if ( isNegative ) 616 currentValue() = -Value::LargestInt( value ); 617 else if ( value <= Value::LargestUInt(Value::maxInt) ) 618 currentValue() = Value::LargestInt( value ); 619 else 620 currentValue() = value; 621 return true; 622} 623 624 625bool 626Reader::decodeDouble( Token &token ) 627{ 628 double value = 0; 629 const int bufferSize = 32; 630 int count; 631 int length = int(token.end_ - token.start_); 632 633 // Sanity check to avoid buffer overflow exploits. 634 if (length < 0) { 635 return addError( "Unable to parse token length", token ); 636 } 637 638 // Avoid using a string constant for the format control string given to 639 // sscanf, as this can cause hard to debug crashes on OS X. See here for more 640 // info: 641 // 642 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html 643 char format[] = "%lf"; 644 645 if ( length <= bufferSize ) 646 { 647 Char buffer[bufferSize+1]; 648 memcpy( buffer, token.start_, length ); 649 buffer[length] = 0; 650 count = sscanf( buffer, format, &value ); 651 } 652 else 653 { 654 std::string buffer( token.start_, token.end_ ); 655 count = sscanf( buffer.c_str(), format, &value ); 656 } 657 658 if ( count != 1 ) 659 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); 660 currentValue() = value; 661 return true; 662} 663 664 665bool 666Reader::decodeString( Token &token ) 667{ 668 std::string decoded; 669 if ( !decodeString( token, decoded ) ) 670 return false; 671 currentValue() = decoded; 672 return true; 673} 674 675 676bool 677Reader::decodeString( Token &token, std::string &decoded ) 678{ 679 decoded.reserve( token.end_ - token.start_ - 2 ); 680 Location current = token.start_ + 1; // skip '"' 681 Location end = token.end_ - 1; // do not include '"' 682 while ( current != end ) 683 { 684 Char c = *current++; 685 if ( c == '"' ) 686 break; 687 else if ( c == '\\' ) 688 { 689 if ( current == end ) 690 return addError( "Empty escape sequence in string", token, current ); 691 Char escape = *current++; 692 switch ( escape ) 693 { 694 case '"': decoded += '"'; break; 695 case '/': decoded += '/'; break; 696 case '\\': decoded += '\\'; break; 697 case 'b': decoded += '\b'; break; 698 case 'f': decoded += '\f'; break; 699 case 'n': decoded += '\n'; break; 700 case 'r': decoded += '\r'; break; 701 case 't': decoded += '\t'; break; 702 case 'u': 703 { 704 unsigned int unicode; 705 if ( !decodeUnicodeCodePoint( token, current, end, unicode ) ) 706 return false; 707 decoded += codePointToUTF8(unicode); 708 } 709 break; 710 default: 711 return addError( "Bad escape sequence in string", token, current ); 712 } 713 } 714 else 715 { 716 decoded += c; 717 } 718 } 719 return true; 720} 721 722bool 723Reader::decodeUnicodeCodePoint( Token &token, 724 Location ¤t, 725 Location end, 726 unsigned int &unicode ) 727{ 728 729 if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) ) 730 return false; 731 if (unicode >= 0xD800 && unicode <= 0xDBFF) 732 { 733 // surrogate pairs 734 if (end - current < 6) 735 return addError( "additional six characters expected to parse unicode surrogate pair.", token, current ); 736 unsigned int surrogatePair; 737 if (*(current++) == '\\' && *(current++)== 'u') 738 { 739 if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair )) 740 { 741 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); 742 } 743 else 744 return false; 745 } 746 else 747 return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current ); 748 } 749 return true; 750} 751 752bool 753Reader::decodeUnicodeEscapeSequence( Token &token, 754 Location ¤t, 755 Location end, 756 unsigned int &unicode ) 757{ 758 if ( end - current < 4 ) 759 return addError( "Bad unicode escape sequence in string: four digits expected.", token, current ); 760 unicode = 0; 761 for ( int index =0; index < 4; ++index ) 762 { 763 Char c = *current++; 764 unicode *= 16; 765 if ( c >= '0' && c <= '9' ) 766 unicode += c - '0'; 767 else if ( c >= 'a' && c <= 'f' ) 768 unicode += c - 'a' + 10; 769 else if ( c >= 'A' && c <= 'F' ) 770 unicode += c - 'A' + 10; 771 else 772 return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current ); 773 } 774 return true; 775} 776 777 778bool 779Reader::addError( const std::string &message, 780 Token &token, 781 Location extra ) 782{ 783 ErrorInfo info; 784 info.token_ = token; 785 info.message_ = message; 786 info.extra_ = extra; 787 errors_.push_back( info ); 788 return false; 789} 790 791 792bool 793Reader::recoverFromError( TokenType skipUntilToken ) 794{ 795 int errorCount = int(errors_.size()); 796 Token skip; 797 for (;;) 798 { 799 if ( !readToken(skip) ) 800 errors_.resize( errorCount ); // discard errors caused by recovery 801 if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream ) 802 break; 803 } 804 errors_.resize( errorCount ); 805 return false; 806} 807 808 809bool 810Reader::addErrorAndRecover( const std::string &message, 811 Token &token, 812 TokenType skipUntilToken ) 813{ 814 addError( message, token ); 815 return recoverFromError( skipUntilToken ); 816} 817 818 819Value & 820Reader::currentValue() 821{ 822 return *(nodes_.top()); 823} 824 825 826Reader::Char 827Reader::getNextChar() 828{ 829 if ( current_ == end_ ) 830 return 0; 831 return *current_++; 832} 833 834 835void 836Reader::getLocationLineAndColumn( Location location, 837 int &line, 838 int &column ) const 839{ 840 Location current = begin_; 841 Location lastLineStart = current; 842 line = 0; 843 while ( current < location && current != end_ ) 844 { 845 Char c = *current++; 846 if ( c == '\r' ) 847 { 848 if ( *current == '\n' ) 849 ++current; 850 lastLineStart = current; 851 ++line; 852 } 853 else if ( c == '\n' ) 854 { 855 lastLineStart = current; 856 ++line; 857 } 858 } 859 // column & line start at 1 860 column = int(location - lastLineStart) + 1; 861 ++line; 862} 863 864 865std::string 866Reader::getLocationLineAndColumn( Location location ) const 867{ 868 int line, column; 869 getLocationLineAndColumn( location, line, column ); 870 char buffer[18+16+16+1]; 871 sprintf( buffer, "Line %d, Column %d", line, column ); 872 return buffer; 873} 874 875 876// Deprecated. Preserved for backward compatibility 877std::string 878Reader::getFormatedErrorMessages() const 879{ 880 return getFormattedErrorMessages(); 881} 882 883 884std::string 885Reader::getFormattedErrorMessages() const 886{ 887 std::string formattedMessage; 888 for ( Errors::const_iterator itError = errors_.begin(); 889 itError != errors_.end(); 890 ++itError ) 891 { 892 const ErrorInfo &error = *itError; 893 formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n"; 894 formattedMessage += " " + error.message_ + "\n"; 895 if ( error.extra_ ) 896 formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n"; 897 } 898 return formattedMessage; 899} 900 901 902std::istream& operator>>( std::istream &sin, Value &root ) 903{ 904 Json::Reader reader; 905 bool ok = reader.parse(sin, root, true); 906 if (!ok) { 907 fprintf( 908 stderr, 909 "Error from reader: %s", 910 reader.getFormattedErrorMessages().c_str()); 911 912 JSON_FAIL_MESSAGE("reader error"); 913 } 914 return sin; 915} 916 917 918} // namespace Json 919