1// Copyright 2007-2011 Baptiste Lepilleur 2// Distributed under MIT license, or public domain if desired and 3// recognized in your jurisdiction. 4// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 5 6#if !defined(JSON_IS_AMALGAMATION) 7# include <json/assertions.h> 8# include <json/reader.h> 9# include <json/value.h> 10# include "json_tool.h" 11#endif // if !defined(JSON_IS_AMALGAMATION) 12#include <utility> 13#include <cstdio> 14#include <cassert> 15#include <cstring> 16#include <stdexcept> 17#include <string> 18#include <istream> 19 20#if _MSC_VER >= 1400 // VC++ 8.0 21#pragma warning( disable : 4996 ) // disable warning about strdup being deprecated. 22#endif 23 24namespace Json { 25 26// Implementation of class Features 27// //////////////////////////////// 28 29Features::Features() 30 : allowComments_( true ) 31 , strictRoot_( false ) 32{ 33} 34 35 36Features 37Features::all() 38{ 39 return Features(); 40} 41 42 43Features 44Features::strictMode() 45{ 46 Features features; 47 features.allowComments_ = false; 48 features.strictRoot_ = true; 49 return features; 50} 51 52// Implementation of class Reader 53// //////////////////////////////// 54 55 56static inline bool 57in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 ) 58{ 59 return c == c1 || c == c2 || c == c3 || c == c4; 60} 61 62static inline bool 63in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 ) 64{ 65 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5; 66} 67 68 69static bool 70containsNewLine( Reader::Location begin, 71 Reader::Location end ) 72{ 73 for ( ;begin < end; ++begin ) 74 if ( *begin == '\n' || *begin == '\r' ) 75 return true; 76 return false; 77} 78 79 80// Class Reader 81// ////////////////////////////////////////////////////////////////// 82 83Reader::Reader() 84 : errors_(), 85 document_(), 86 begin_(), 87 end_(), 88 current_(), 89 lastValueEnd_(), 90 lastValue_(), 91 commentsBefore_(), 92 features_( Features::all() ), 93 collectComments_() 94{ 95} 96 97 98Reader::Reader( const Features &features ) 99 : errors_(), 100 document_(), 101 begin_(), 102 end_(), 103 current_(), 104 lastValueEnd_(), 105 lastValue_(), 106 commentsBefore_(), 107 features_( features ), 108 collectComments_() 109{ 110} 111 112 113bool 114Reader::parse( const std::string &document, 115 Value &root, 116 bool collectComments ) 117{ 118 document_ = document; 119 const char *begin = document_.c_str(); 120 const char *end = begin + document_.length(); 121 return parse( begin, end, root, collectComments ); 122} 123 124 125bool 126Reader::parse( std::istream& sin, 127 Value &root, 128 bool collectComments ) 129{ 130 //std::istream_iterator<char> begin(sin); 131 //std::istream_iterator<char> end; 132 // Those would allow streamed input from a file, if parse() were a 133 // template function. 134 135 // Since std::string is reference-counted, this at least does not 136 // create an extra copy. 137 std::string doc; 138 std::getline(sin, doc, (char)EOF); 139 return parse( doc, root, collectComments ); 140} 141 142bool 143Reader::parse( const char *beginDoc, const char *endDoc, 144 Value &root, 145 bool collectComments ) 146{ 147 if ( !features_.allowComments_ ) 148 { 149 collectComments = false; 150 } 151 152 begin_ = beginDoc; 153 end_ = endDoc; 154 collectComments_ = collectComments; 155 current_ = begin_; 156 lastValueEnd_ = 0; 157 lastValue_ = 0; 158 commentsBefore_ = ""; 159 errors_.clear(); 160 while ( !nodes_.empty() ) 161 nodes_.pop(); 162 nodes_.push( &root ); 163 164 bool successful = readValue(); 165 Token token; 166 skipCommentTokens( token ); 167 if ( collectComments_ && !commentsBefore_.empty() ) 168 root.setComment( commentsBefore_, commentAfter ); 169 if ( features_.strictRoot_ ) 170 { 171 if ( !root.isArray() && !root.isObject() ) 172 { 173 // Set error location to start of doc, ideally should be first token found in doc 174 token.type_ = tokenError; 175 token.start_ = beginDoc; 176 token.end_ = endDoc; 177 addError( "A valid JSON document must be either an array or an object value.", 178 token ); 179 return false; 180 } 181 } 182 return successful; 183} 184 185 186bool 187Reader::readValue() 188{ 189 Token token; 190 skipCommentTokens( token ); 191 bool successful = true; 192 193 if ( collectComments_ && !commentsBefore_.empty() ) 194 { 195 currentValue().setComment( commentsBefore_, commentBefore ); 196 commentsBefore_ = ""; 197 } 198 199 200 switch ( token.type_ ) 201 { 202 case tokenObjectBegin: 203 successful = readObject( token ); 204 break; 205 case tokenArrayBegin: 206 successful = readArray( token ); 207 break; 208 case tokenNumber: 209 successful = decodeNumber( token ); 210 break; 211 case tokenString: 212 successful = decodeString( token ); 213 break; 214 case tokenTrue: 215 currentValue() = true; 216 break; 217 case tokenFalse: 218 currentValue() = false; 219 break; 220 case tokenNull: 221 currentValue() = Value(); 222 break; 223 default: 224 return addError( "Syntax error: value, object or array expected.", token ); 225 } 226 227 if ( collectComments_ ) 228 { 229 lastValueEnd_ = current_; 230 lastValue_ = ¤tValue(); 231 } 232 233 return successful; 234} 235 236 237void 238Reader::skipCommentTokens( Token &token ) 239{ 240 if ( features_.allowComments_ ) 241 { 242 do 243 { 244 readToken( token ); 245 } 246 while ( token.type_ == tokenComment ); 247 } 248 else 249 { 250 readToken( token ); 251 } 252} 253 254 255bool 256Reader::expectToken( TokenType type, Token &token, const char *message ) 257{ 258 readToken( token ); 259 if ( token.type_ != type ) 260 return addError( message, token ); 261 return true; 262} 263 264 265bool 266Reader::readToken( Token &token ) 267{ 268 skipSpaces(); 269 token.start_ = current_; 270 Char c = getNextChar(); 271 bool ok = true; 272 switch ( c ) 273 { 274 case '{': 275 token.type_ = tokenObjectBegin; 276 break; 277 case '}': 278 token.type_ = tokenObjectEnd; 279 break; 280 case '[': 281 token.type_ = tokenArrayBegin; 282 break; 283 case ']': 284 token.type_ = tokenArrayEnd; 285 break; 286 case '"': 287 token.type_ = tokenString; 288 ok = readString(); 289 break; 290 case '/': 291 token.type_ = tokenComment; 292 ok = readComment(); 293 break; 294 case '0': 295 case '1': 296 case '2': 297 case '3': 298 case '4': 299 case '5': 300 case '6': 301 case '7': 302 case '8': 303 case '9': 304 case '-': 305 token.type_ = tokenNumber; 306 readNumber(); 307 break; 308 case 't': 309 token.type_ = tokenTrue; 310 ok = match( "rue", 3 ); 311 break; 312 case 'f': 313 token.type_ = tokenFalse; 314 ok = match( "alse", 4 ); 315 break; 316 case 'n': 317 token.type_ = tokenNull; 318 ok = match( "ull", 3 ); 319 break; 320 case ',': 321 token.type_ = tokenArraySeparator; 322 break; 323 case ':': 324 token.type_ = tokenMemberSeparator; 325 break; 326 case 0: 327 token.type_ = tokenEndOfStream; 328 break; 329 default: 330 ok = false; 331 break; 332 } 333 if ( !ok ) 334 token.type_ = tokenError; 335 token.end_ = current_; 336 return true; 337} 338 339 340void 341Reader::skipSpaces() 342{ 343 while ( current_ != end_ ) 344 { 345 Char c = *current_; 346 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) 347 ++current_; 348 else 349 break; 350 } 351} 352 353 354bool 355Reader::match( Location pattern, 356 int patternLength ) 357{ 358 if ( end_ - current_ < patternLength ) 359 return false; 360 int index = patternLength; 361 while ( index-- ) 362 if ( current_[index] != pattern[index] ) 363 return false; 364 current_ += patternLength; 365 return true; 366} 367 368 369bool 370Reader::readComment() 371{ 372 Location commentBegin = current_ - 1; 373 Char c = getNextChar(); 374 bool successful = false; 375 if ( c == '*' ) 376 successful = readCStyleComment(); 377 else if ( c == '/' ) 378 successful = readCppStyleComment(); 379 if ( !successful ) 380 return false; 381 382 if ( collectComments_ ) 383 { 384 CommentPlacement placement = commentBefore; 385 if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) ) 386 { 387 if ( c != '*' || !containsNewLine( commentBegin, current_ ) ) 388 placement = commentAfterOnSameLine; 389 } 390 391 addComment( commentBegin, current_, placement ); 392 } 393 return true; 394} 395 396 397void 398Reader::addComment( Location begin, 399 Location end, 400 CommentPlacement placement ) 401{ 402 assert( collectComments_ ); 403 if ( placement == commentAfterOnSameLine ) 404 { 405 assert( lastValue_ != 0 ); 406 lastValue_->setComment( std::string( begin, end ), placement ); 407 } 408 else 409 { 410 if ( !commentsBefore_.empty() ) 411 commentsBefore_ += "\n"; 412 commentsBefore_ += std::string( begin, end ); 413 } 414} 415 416 417bool 418Reader::readCStyleComment() 419{ 420 while ( current_ != end_ ) 421 { 422 Char c = getNextChar(); 423 if ( c == '*' && *current_ == '/' ) 424 break; 425 } 426 return getNextChar() == '/'; 427} 428 429 430bool 431Reader::readCppStyleComment() 432{ 433 while ( current_ != end_ ) 434 { 435 Char c = getNextChar(); 436 if ( c == '\r' || c == '\n' ) 437 break; 438 } 439 return true; 440} 441 442 443void 444Reader::readNumber() 445{ 446 while ( current_ != end_ ) 447 { 448 if ( !(*current_ >= '0' && *current_ <= '9') && 449 !in( *current_, '.', 'e', 'E', '+', '-' ) ) 450 break; 451 ++current_; 452 } 453} 454 455bool 456Reader::readString() 457{ 458 Char c = 0; 459 while ( current_ != end_ ) 460 { 461 c = getNextChar(); 462 if ( c == '\\' ) 463 getNextChar(); 464 else if ( c == '"' ) 465 break; 466 } 467 return c == '"'; 468} 469 470 471bool 472Reader::readObject( Token &/*tokenStart*/ ) 473{ 474 Token tokenName; 475 std::string name; 476 currentValue() = Value( objectValue ); 477 while ( readToken( tokenName ) ) 478 { 479 bool initialTokenOk = true; 480 while ( tokenName.type_ == tokenComment && initialTokenOk ) 481 initialTokenOk = readToken( tokenName ); 482 if ( !initialTokenOk ) 483 break; 484 if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object 485 return true; 486 if ( tokenName.type_ != tokenString ) 487 break; 488 489 name = ""; 490 if ( !decodeString( tokenName, name ) ) 491 return recoverFromError( tokenObjectEnd ); 492 493 Token colon; 494 if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator ) 495 { 496 return addErrorAndRecover( "Missing ':' after object member name", 497 colon, 498 tokenObjectEnd ); 499 } 500 Value &value = currentValue()[ name ]; 501 nodes_.push( &value ); 502 bool ok = readValue(); 503 nodes_.pop(); 504 if ( !ok ) // error already set 505 return recoverFromError( tokenObjectEnd ); 506 507 Token comma; 508 if ( !readToken( comma ) 509 || ( comma.type_ != tokenObjectEnd && 510 comma.type_ != tokenArraySeparator && 511 comma.type_ != tokenComment ) ) 512 { 513 return addErrorAndRecover( "Missing ',' or '}' in object declaration", 514 comma, 515 tokenObjectEnd ); 516 } 517 bool finalizeTokenOk = true; 518 while ( comma.type_ == tokenComment && 519 finalizeTokenOk ) 520 finalizeTokenOk = readToken( comma ); 521 if ( comma.type_ == tokenObjectEnd ) 522 return true; 523 } 524 return addErrorAndRecover( "Missing '}' or object member name", 525 tokenName, 526 tokenObjectEnd ); 527} 528 529 530bool 531Reader::readArray( Token &/*tokenStart*/ ) 532{ 533 currentValue() = Value( arrayValue ); 534 skipSpaces(); 535 if ( *current_ == ']' ) // empty array 536 { 537 Token endArray; 538 readToken( endArray ); 539 return true; 540 } 541 int index = 0; 542 for (;;) 543 { 544 Value &value = currentValue()[ index++ ]; 545 nodes_.push( &value ); 546 bool ok = readValue(); 547 nodes_.pop(); 548 if ( !ok ) // error already set 549 return recoverFromError( tokenArrayEnd ); 550 551 Token token; 552 // Accept Comment after last item in the array. 553 ok = readToken( token ); 554 while ( token.type_ == tokenComment && ok ) 555 { 556 ok = readToken( token ); 557 } 558 bool badTokenType = ( token.type_ != tokenArraySeparator && 559 token.type_ != tokenArrayEnd ); 560 if ( !ok || badTokenType ) 561 { 562 return addErrorAndRecover( "Missing ',' or ']' in array declaration", 563 token, 564 tokenArrayEnd ); 565 } 566 if ( token.type_ == tokenArrayEnd ) 567 break; 568 } 569 return true; 570} 571 572 573bool 574Reader::decodeNumber( Token &token ) 575{ 576 bool isDouble = false; 577 for ( Location inspect = token.start_; inspect != token.end_; ++inspect ) 578 { 579 isDouble = isDouble 580 || in( *inspect, '.', 'e', 'E', '+' ) 581 || ( *inspect == '-' && inspect != token.start_ ); 582 } 583 if ( isDouble ) 584 return decodeDouble( token ); 585 // Attempts to parse the number as an integer. If the number is 586 // larger than the maximum supported value of an integer then 587 // we decode the number as a double. 588 Location current = token.start_; 589 bool isNegative = *current == '-'; 590 if ( isNegative ) 591 ++current; 592 Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt) 593 : Value::maxLargestUInt; 594 Value::LargestUInt threshold = maxIntegerValue / 10; 595 Value::LargestUInt value = 0; 596 while ( current < token.end_ ) 597 { 598 Char c = *current++; 599 if ( c < '0' || c > '9' ) 600 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); 601 Value::UInt digit(c - '0'); 602 if ( value >= threshold ) 603 { 604 // We've hit or exceeded the max value divided by 10 (rounded down). If 605 // a) we've only just touched the limit, b) this is the last digit, and 606 // c) it's small enough to fit in that rounding delta, we're okay. 607 // Otherwise treat this number as a double to avoid overflow. 608 if (value > threshold || 609 current != token.end_ || 610 digit > maxIntegerValue % 10) 611 { 612 return decodeDouble( token ); 613 } 614 } 615 value = value * 10 + digit; 616 } 617 if ( isNegative ) 618 currentValue() = -Value::LargestInt( value ); 619 else if ( value <= Value::LargestUInt(Value::maxInt) ) 620 currentValue() = Value::LargestInt( value ); 621 else 622 currentValue() = value; 623 return true; 624} 625 626 627bool 628Reader::decodeDouble( Token &token ) 629{ 630 double value = 0; 631 const int bufferSize = 32; 632 int count; 633 int length = int(token.end_ - token.start_); 634 635 // Sanity check to avoid buffer overflow exploits. 636 if (length < 0) { 637 return addError( "Unable to parse token length", token ); 638 } 639 640 // Avoid using a string constant for the format control string given to 641 // sscanf, as this can cause hard to debug crashes on OS X. See here for more 642 // info: 643 // 644 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html 645 char format[] = "%lf"; 646 647 if ( length <= bufferSize ) 648 { 649 Char buffer[bufferSize+1]; 650 memcpy( buffer, token.start_, length ); 651 buffer[length] = 0; 652 count = sscanf( buffer, format, &value ); 653 } 654 else 655 { 656 std::string buffer( token.start_, token.end_ ); 657 count = sscanf( buffer.c_str(), format, &value ); 658 } 659 660 if ( count != 1 ) 661 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); 662 currentValue() = value; 663 return true; 664} 665 666 667bool 668Reader::decodeString( Token &token ) 669{ 670 std::string decoded; 671 if ( !decodeString( token, decoded ) ) 672 return false; 673 currentValue() = decoded; 674 return true; 675} 676 677 678bool 679Reader::decodeString( Token &token, std::string &decoded ) 680{ 681 decoded.reserve( token.end_ - token.start_ - 2 ); 682 Location current = token.start_ + 1; // skip '"' 683 Location end = token.end_ - 1; // do not include '"' 684 while ( current != end ) 685 { 686 Char c = *current++; 687 if ( c == '"' ) 688 break; 689 else if ( c == '\\' ) 690 { 691 if ( current == end ) 692 return addError( "Empty escape sequence in string", token, current ); 693 Char escape = *current++; 694 switch ( escape ) 695 { 696 case '"': decoded += '"'; break; 697 case '/': decoded += '/'; break; 698 case '\\': decoded += '\\'; break; 699 case 'b': decoded += '\b'; break; 700 case 'f': decoded += '\f'; break; 701 case 'n': decoded += '\n'; break; 702 case 'r': decoded += '\r'; break; 703 case 't': decoded += '\t'; break; 704 case 'u': 705 { 706 unsigned int unicode; 707 if ( !decodeUnicodeCodePoint( token, current, end, unicode ) ) 708 return false; 709 decoded += codePointToUTF8(unicode); 710 } 711 break; 712 default: 713 return addError( "Bad escape sequence in string", token, current ); 714 } 715 } 716 else 717 { 718 decoded += c; 719 } 720 } 721 return true; 722} 723 724bool 725Reader::decodeUnicodeCodePoint( Token &token, 726 Location ¤t, 727 Location end, 728 unsigned int &unicode ) 729{ 730 731 if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) ) 732 return false; 733 if (unicode >= 0xD800 && unicode <= 0xDBFF) 734 { 735 // surrogate pairs 736 if (end - current < 6) 737 return addError( "additional six characters expected to parse unicode surrogate pair.", token, current ); 738 unsigned int surrogatePair; 739 if (*(current++) == '\\' && *(current++)== 'u') 740 { 741 if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair )) 742 { 743 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); 744 } 745 else 746 return false; 747 } 748 else 749 return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current ); 750 } 751 return true; 752} 753 754bool 755Reader::decodeUnicodeEscapeSequence( Token &token, 756 Location ¤t, 757 Location end, 758 unsigned int &unicode ) 759{ 760 if ( end - current < 4 ) 761 return addError( "Bad unicode escape sequence in string: four digits expected.", token, current ); 762 unicode = 0; 763 for ( int index =0; index < 4; ++index ) 764 { 765 Char c = *current++; 766 unicode *= 16; 767 if ( c >= '0' && c <= '9' ) 768 unicode += c - '0'; 769 else if ( c >= 'a' && c <= 'f' ) 770 unicode += c - 'a' + 10; 771 else if ( c >= 'A' && c <= 'F' ) 772 unicode += c - 'A' + 10; 773 else 774 return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current ); 775 } 776 return true; 777} 778 779 780bool 781Reader::addError( const std::string &message, 782 Token &token, 783 Location extra ) 784{ 785 ErrorInfo info; 786 info.token_ = token; 787 info.message_ = message; 788 info.extra_ = extra; 789 errors_.push_back( info ); 790 return false; 791} 792 793 794bool 795Reader::recoverFromError( TokenType skipUntilToken ) 796{ 797 int errorCount = int(errors_.size()); 798 Token skip; 799 for (;;) 800 { 801 if ( !readToken(skip) ) 802 errors_.resize( errorCount ); // discard errors caused by recovery 803 if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream ) 804 break; 805 } 806 errors_.resize( errorCount ); 807 return false; 808} 809 810 811bool 812Reader::addErrorAndRecover( const std::string &message, 813 Token &token, 814 TokenType skipUntilToken ) 815{ 816 addError( message, token ); 817 return recoverFromError( skipUntilToken ); 818} 819 820 821Value & 822Reader::currentValue() 823{ 824 return *(nodes_.top()); 825} 826 827 828Reader::Char 829Reader::getNextChar() 830{ 831 if ( current_ == end_ ) 832 return 0; 833 return *current_++; 834} 835 836 837void 838Reader::getLocationLineAndColumn( Location location, 839 int &line, 840 int &column ) const 841{ 842 Location current = begin_; 843 Location lastLineStart = current; 844 line = 0; 845 while ( current < location && current != end_ ) 846 { 847 Char c = *current++; 848 if ( c == '\r' ) 849 { 850 if ( *current == '\n' ) 851 ++current; 852 lastLineStart = current; 853 ++line; 854 } 855 else if ( c == '\n' ) 856 { 857 lastLineStart = current; 858 ++line; 859 } 860 } 861 // column & line start at 1 862 column = int(location - lastLineStart) + 1; 863 ++line; 864} 865 866 867std::string 868Reader::getLocationLineAndColumn( Location location ) const 869{ 870 int line, column; 871 getLocationLineAndColumn( location, line, column ); 872 char buffer[18+16+16+1]; 873 sprintf( buffer, "Line %d, Column %d", line, column ); 874 return buffer; 875} 876 877 878// Deprecated. Preserved for backward compatibility 879std::string 880Reader::getFormatedErrorMessages() const 881{ 882 return getFormattedErrorMessages(); 883} 884 885 886std::string 887Reader::getFormattedErrorMessages() const 888{ 889 std::string formattedMessage; 890 for ( Errors::const_iterator itError = errors_.begin(); 891 itError != errors_.end(); 892 ++itError ) 893 { 894 const ErrorInfo &error = *itError; 895 formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n"; 896 formattedMessage += " " + error.message_ + "\n"; 897 if ( error.extra_ ) 898 formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n"; 899 } 900 return formattedMessage; 901} 902 903 904std::istream& operator>>( std::istream &sin, Value &root ) 905{ 906 Json::Reader reader; 907 bool ok = reader.parse(sin, root, true); 908 if (!ok) { 909 fprintf( 910 stderr, 911 "Error from reader: %s", 912 reader.getFormattedErrorMessages().c_str()); 913 914 JSON_FAIL_MESSAGE("reader error"); 915 } 916 return sin; 917} 918 919 920} // namespace Json 921