1// Copyright 2007-2011 Baptiste Lepilleur
2// Distributed under MIT license, or public domain if desired and
3// recognized in your jurisdiction.
4// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
5
6#if !defined(JSON_IS_AMALGAMATION)
7# include <json/assertions.h>
8# include <json/reader.h>
9# include <json/value.h>
10# include "json_tool.h"
11#endif // if !defined(JSON_IS_AMALGAMATION)
12#include <utility>
13#include <cstdio>
14#include <cassert>
15#include <cstring>
16#include <stdexcept>
17#include <string>
18#include <istream>
19
20#if _MSC_VER >= 1400 // VC++ 8.0
21#pragma warning( disable : 4996 )   // disable warning about strdup being deprecated.
22#endif
23
24namespace Json {
25
26// Implementation of class Features
27// ////////////////////////////////
28
29Features::Features()
30   : allowComments_( true )
31   , strictRoot_( false )
32{
33}
34
35
36Features
37Features::all()
38{
39   return Features();
40}
41
42
43Features
44Features::strictMode()
45{
46   Features features;
47   features.allowComments_ = false;
48   features.strictRoot_ = true;
49   return features;
50}
51
52// Implementation of class Reader
53// ////////////////////////////////
54
55
56static inline bool
57in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 )
58{
59   return c == c1  ||  c == c2  ||  c == c3  ||  c == c4;
60}
61
62static inline bool
63in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 )
64{
65   return c == c1  ||  c == c2  ||  c == c3  ||  c == c4  ||  c == c5;
66}
67
68
69static bool
70containsNewLine( Reader::Location begin,
71                 Reader::Location end )
72{
73   for ( ;begin < end; ++begin )
74      if ( *begin == '\n'  ||  *begin == '\r' )
75         return true;
76   return false;
77}
78
79
80// Class Reader
81// //////////////////////////////////////////////////////////////////
82
83Reader::Reader()
84    : errors_(),
85      document_(),
86      begin_(),
87      end_(),
88      current_(),
89      lastValueEnd_(),
90      lastValue_(),
91      commentsBefore_(),
92      features_( Features::all() ),
93      collectComments_()
94{
95}
96
97
98Reader::Reader( const Features &features )
99    : errors_(),
100      document_(),
101      begin_(),
102      end_(),
103      current_(),
104      lastValueEnd_(),
105      lastValue_(),
106      commentsBefore_(),
107      features_( features ),
108      collectComments_()
109{
110}
111
112
113bool
114Reader::parse( const std::string &document,
115               Value &root,
116               bool collectComments )
117{
118   document_ = document;
119   const char *begin = document_.c_str();
120   const char *end = begin + document_.length();
121   return parse( begin, end, root, collectComments );
122}
123
124
125bool
126Reader::parse( std::istream& sin,
127               Value &root,
128               bool collectComments )
129{
130   //std::istream_iterator<char> begin(sin);
131   //std::istream_iterator<char> end;
132   // Those would allow streamed input from a file, if parse() were a
133   // template function.
134
135   // Since std::string is reference-counted, this at least does not
136   // create an extra copy.
137   std::string doc;
138   std::getline(sin, doc, (char)EOF);
139   return parse( doc, root, collectComments );
140}
141
142bool
143Reader::parse( const char *beginDoc, const char *endDoc,
144               Value &root,
145               bool collectComments )
146{
147   if ( !features_.allowComments_ )
148   {
149      collectComments = false;
150   }
151
152   begin_ = beginDoc;
153   end_ = endDoc;
154   collectComments_ = collectComments;
155   current_ = begin_;
156   lastValueEnd_ = 0;
157   lastValue_ = 0;
158   commentsBefore_ = "";
159   errors_.clear();
160   while ( !nodes_.empty() )
161      nodes_.pop();
162   nodes_.push( &root );
163
164   bool successful = readValue();
165   Token token;
166   skipCommentTokens( token );
167   if ( collectComments_  &&  !commentsBefore_.empty() )
168      root.setComment( commentsBefore_, commentAfter );
169   if ( features_.strictRoot_ )
170   {
171      if ( !root.isArray()  &&  !root.isObject() )
172      {
173         // Set error location to start of doc, ideally should be first token found in doc
174         token.type_ = tokenError;
175         token.start_ = beginDoc;
176         token.end_ = endDoc;
177         addError( "A valid JSON document must be either an array or an object value.",
178                   token );
179         return false;
180      }
181   }
182   return successful;
183}
184
185
186bool
187Reader::readValue()
188{
189   Token token;
190   skipCommentTokens( token );
191   bool successful = true;
192
193   if ( collectComments_  &&  !commentsBefore_.empty() )
194   {
195      currentValue().setComment( commentsBefore_, commentBefore );
196      commentsBefore_ = "";
197   }
198
199
200   switch ( token.type_ )
201   {
202   case tokenObjectBegin:
203      successful = readObject( token );
204      break;
205   case tokenArrayBegin:
206      successful = readArray( token );
207      break;
208   case tokenNumber:
209      successful = decodeNumber( token );
210      break;
211   case tokenString:
212      successful = decodeString( token );
213      break;
214   case tokenTrue:
215      currentValue() = true;
216      break;
217   case tokenFalse:
218      currentValue() = false;
219      break;
220   case tokenNull:
221      currentValue() = Value();
222      break;
223   default:
224      return addError( "Syntax error: value, object or array expected.", token );
225   }
226
227   if ( collectComments_ )
228   {
229      lastValueEnd_ = current_;
230      lastValue_ = &currentValue();
231   }
232
233   return successful;
234}
235
236
237void
238Reader::skipCommentTokens( Token &token )
239{
240   if ( features_.allowComments_ )
241   {
242      do
243      {
244         readToken( token );
245      }
246      while ( token.type_ == tokenComment );
247   }
248   else
249   {
250      readToken( token );
251   }
252}
253
254
255bool
256Reader::expectToken( TokenType type, Token &token, const char *message )
257{
258   readToken( token );
259   if ( token.type_ != type )
260      return addError( message, token );
261   return true;
262}
263
264
265bool
266Reader::readToken( Token &token )
267{
268   skipSpaces();
269   token.start_ = current_;
270   Char c = getNextChar();
271   bool ok = true;
272   switch ( c )
273   {
274   case '{':
275      token.type_ = tokenObjectBegin;
276      break;
277   case '}':
278      token.type_ = tokenObjectEnd;
279      break;
280   case '[':
281      token.type_ = tokenArrayBegin;
282      break;
283   case ']':
284      token.type_ = tokenArrayEnd;
285      break;
286   case '"':
287      token.type_ = tokenString;
288      ok = readString();
289      break;
290   case '/':
291      token.type_ = tokenComment;
292      ok = readComment();
293      break;
294   case '0':
295   case '1':
296   case '2':
297   case '3':
298   case '4':
299   case '5':
300   case '6':
301   case '7':
302   case '8':
303   case '9':
304   case '-':
305      token.type_ = tokenNumber;
306      readNumber();
307      break;
308   case 't':
309      token.type_ = tokenTrue;
310      ok = match( "rue", 3 );
311      break;
312   case 'f':
313      token.type_ = tokenFalse;
314      ok = match( "alse", 4 );
315      break;
316   case 'n':
317      token.type_ = tokenNull;
318      ok = match( "ull", 3 );
319      break;
320   case ',':
321      token.type_ = tokenArraySeparator;
322      break;
323   case ':':
324      token.type_ = tokenMemberSeparator;
325      break;
326   case 0:
327      token.type_ = tokenEndOfStream;
328      break;
329   default:
330      ok = false;
331      break;
332   }
333   if ( !ok )
334      token.type_ = tokenError;
335   token.end_ = current_;
336   return true;
337}
338
339
340void
341Reader::skipSpaces()
342{
343   while ( current_ != end_ )
344   {
345      Char c = *current_;
346      if ( c == ' '  ||  c == '\t'  ||  c == '\r'  ||  c == '\n' )
347         ++current_;
348      else
349         break;
350   }
351}
352
353
354bool
355Reader::match( Location pattern,
356               int patternLength )
357{
358   if ( end_ - current_ < patternLength )
359      return false;
360   int index = patternLength;
361   while ( index-- )
362      if ( current_[index] != pattern[index] )
363         return false;
364   current_ += patternLength;
365   return true;
366}
367
368
369bool
370Reader::readComment()
371{
372   Location commentBegin = current_ - 1;
373   Char c = getNextChar();
374   bool successful = false;
375   if ( c == '*' )
376      successful = readCStyleComment();
377   else if ( c == '/' )
378      successful = readCppStyleComment();
379   if ( !successful )
380      return false;
381
382   if ( collectComments_ )
383   {
384      CommentPlacement placement = commentBefore;
385      if ( lastValueEnd_  &&  !containsNewLine( lastValueEnd_, commentBegin ) )
386      {
387         if ( c != '*'  ||  !containsNewLine( commentBegin, current_ ) )
388            placement = commentAfterOnSameLine;
389      }
390
391      addComment( commentBegin, current_, placement );
392   }
393   return true;
394}
395
396
397void
398Reader::addComment( Location begin,
399                    Location end,
400                    CommentPlacement placement )
401{
402   assert( collectComments_ );
403   if ( placement == commentAfterOnSameLine )
404   {
405      assert( lastValue_ != 0 );
406      lastValue_->setComment( std::string( begin, end ), placement );
407   }
408   else
409   {
410      if ( !commentsBefore_.empty() )
411         commentsBefore_ += "\n";
412      commentsBefore_ += std::string( begin, end );
413   }
414}
415
416
417bool
418Reader::readCStyleComment()
419{
420   while ( current_ != end_ )
421   {
422      Char c = getNextChar();
423      if ( c == '*'  &&  *current_ == '/' )
424         break;
425   }
426   return getNextChar() == '/';
427}
428
429
430bool
431Reader::readCppStyleComment()
432{
433   while ( current_ != end_ )
434   {
435      Char c = getNextChar();
436      if (  c == '\r'  ||  c == '\n' )
437         break;
438   }
439   return true;
440}
441
442
443void
444Reader::readNumber()
445{
446   while ( current_ != end_ )
447   {
448      if ( !(*current_ >= '0'  &&  *current_ <= '9')  &&
449           !in( *current_, '.', 'e', 'E', '+', '-' ) )
450         break;
451      ++current_;
452   }
453}
454
455bool
456Reader::readString()
457{
458   Char c = 0;
459   while ( current_ != end_ )
460   {
461      c = getNextChar();
462      if ( c == '\\' )
463         getNextChar();
464      else if ( c == '"' )
465         break;
466   }
467   return c == '"';
468}
469
470
471bool
472Reader::readObject( Token &/*tokenStart*/ )
473{
474   Token tokenName;
475   std::string name;
476   currentValue() = Value( objectValue );
477   while ( readToken( tokenName ) )
478   {
479      bool initialTokenOk = true;
480      while ( tokenName.type_ == tokenComment  &&  initialTokenOk )
481         initialTokenOk = readToken( tokenName );
482      if  ( !initialTokenOk )
483         break;
484      if ( tokenName.type_ == tokenObjectEnd  &&  name.empty() )  // empty object
485         return true;
486      if ( tokenName.type_ != tokenString )
487         break;
488
489      name = "";
490      if ( !decodeString( tokenName, name ) )
491         return recoverFromError( tokenObjectEnd );
492
493      Token colon;
494      if ( !readToken( colon ) ||  colon.type_ != tokenMemberSeparator )
495      {
496         return addErrorAndRecover( "Missing ':' after object member name",
497                                    colon,
498                                    tokenObjectEnd );
499      }
500      Value &value = currentValue()[ name ];
501      nodes_.push( &value );
502      bool ok = readValue();
503      nodes_.pop();
504      if ( !ok ) // error already set
505         return recoverFromError( tokenObjectEnd );
506
507      Token comma;
508      if ( !readToken( comma )
509            ||  ( comma.type_ != tokenObjectEnd  &&
510                  comma.type_ != tokenArraySeparator &&
511                  comma.type_ != tokenComment ) )
512      {
513         return addErrorAndRecover( "Missing ',' or '}' in object declaration",
514                                    comma,
515                                    tokenObjectEnd );
516      }
517      bool finalizeTokenOk = true;
518      while ( comma.type_ == tokenComment &&
519              finalizeTokenOk )
520         finalizeTokenOk = readToken( comma );
521      if ( comma.type_ == tokenObjectEnd )
522         return true;
523   }
524   return addErrorAndRecover( "Missing '}' or object member name",
525                              tokenName,
526                              tokenObjectEnd );
527}
528
529
530bool
531Reader::readArray( Token &/*tokenStart*/ )
532{
533   currentValue() = Value( arrayValue );
534   skipSpaces();
535   if ( *current_ == ']' ) // empty array
536   {
537      Token endArray;
538      readToken( endArray );
539      return true;
540   }
541   int index = 0;
542   for (;;)
543   {
544      Value &value = currentValue()[ index++ ];
545      nodes_.push( &value );
546      bool ok = readValue();
547      nodes_.pop();
548      if ( !ok ) // error already set
549         return recoverFromError( tokenArrayEnd );
550
551      Token token;
552      // Accept Comment after last item in the array.
553      ok = readToken( token );
554      while ( token.type_ == tokenComment  &&  ok )
555      {
556         ok = readToken( token );
557      }
558      bool badTokenType = ( token.type_ != tokenArraySeparator  &&
559                            token.type_ != tokenArrayEnd );
560      if ( !ok  ||  badTokenType )
561      {
562         return addErrorAndRecover( "Missing ',' or ']' in array declaration",
563                                    token,
564                                    tokenArrayEnd );
565      }
566      if ( token.type_ == tokenArrayEnd )
567         break;
568   }
569   return true;
570}
571
572
573bool
574Reader::decodeNumber( Token &token )
575{
576   bool isDouble = false;
577   for ( Location inspect = token.start_; inspect != token.end_; ++inspect )
578   {
579      isDouble = isDouble
580                 ||  in( *inspect, '.', 'e', 'E', '+' )
581                 ||  ( *inspect == '-'  &&  inspect != token.start_ );
582   }
583   if ( isDouble )
584      return decodeDouble( token );
585   // Attempts to parse the number as an integer. If the number is
586   // larger than the maximum supported value of an integer then
587   // we decode the number as a double.
588   Location current = token.start_;
589   bool isNegative = *current == '-';
590   if ( isNegative )
591      ++current;
592   Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt)
593                                                   : Value::maxLargestUInt;
594   Value::LargestUInt threshold = maxIntegerValue / 10;
595   Value::LargestUInt value = 0;
596   while ( current < token.end_ )
597   {
598      Char c = *current++;
599      if ( c < '0'  ||  c > '9' )
600         return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
601      Value::UInt digit(c - '0');
602      if ( value >= threshold )
603      {
604         // We've hit or exceeded the max value divided by 10 (rounded down). If
605         // a) we've only just touched the limit, b) this is the last digit, and
606         // c) it's small enough to fit in that rounding delta, we're okay.
607         // Otherwise treat this number as a double to avoid overflow.
608         if (value > threshold ||
609             current != token.end_ ||
610             digit > maxIntegerValue % 10)
611         {
612            return decodeDouble( token );
613         }
614      }
615      value = value * 10 + digit;
616   }
617   if ( isNegative )
618      currentValue() = -Value::LargestInt( value );
619   else if ( value <= Value::LargestUInt(Value::maxInt) )
620      currentValue() = Value::LargestInt( value );
621   else
622      currentValue() = value;
623   return true;
624}
625
626
627bool
628Reader::decodeDouble( Token &token )
629{
630   double value = 0;
631   const int bufferSize = 32;
632   int count;
633   int length = int(token.end_ - token.start_);
634
635   // Sanity check to avoid buffer overflow exploits.
636   if (length < 0) {
637      return addError( "Unable to parse token length", token );
638   }
639
640   // Avoid using a string constant for the format control string given to
641   // sscanf, as this can cause hard to debug crashes on OS X. See here for more
642   // info:
643   //
644   //     http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
645   char format[] = "%lf";
646
647   if ( length <= bufferSize )
648   {
649      Char buffer[bufferSize+1];
650      memcpy( buffer, token.start_, length );
651      buffer[length] = 0;
652      count = sscanf( buffer, format, &value );
653   }
654   else
655   {
656      std::string buffer( token.start_, token.end_ );
657      count = sscanf( buffer.c_str(), format, &value );
658   }
659
660   if ( count != 1 )
661      return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
662   currentValue() = value;
663   return true;
664}
665
666
667bool
668Reader::decodeString( Token &token )
669{
670   std::string decoded;
671   if ( !decodeString( token, decoded ) )
672      return false;
673   currentValue() = decoded;
674   return true;
675}
676
677
678bool
679Reader::decodeString( Token &token, std::string &decoded )
680{
681   decoded.reserve( token.end_ - token.start_ - 2 );
682   Location current = token.start_ + 1; // skip '"'
683   Location end = token.end_ - 1;      // do not include '"'
684   while ( current != end )
685   {
686      Char c = *current++;
687      if ( c == '"' )
688         break;
689      else if ( c == '\\' )
690      {
691         if ( current == end )
692            return addError( "Empty escape sequence in string", token, current );
693         Char escape = *current++;
694         switch ( escape )
695         {
696         case '"': decoded += '"'; break;
697         case '/': decoded += '/'; break;
698         case '\\': decoded += '\\'; break;
699         case 'b': decoded += '\b'; break;
700         case 'f': decoded += '\f'; break;
701         case 'n': decoded += '\n'; break;
702         case 'r': decoded += '\r'; break;
703         case 't': decoded += '\t'; break;
704         case 'u':
705            {
706               unsigned int unicode;
707               if ( !decodeUnicodeCodePoint( token, current, end, unicode ) )
708                  return false;
709               decoded += codePointToUTF8(unicode);
710            }
711            break;
712         default:
713            return addError( "Bad escape sequence in string", token, current );
714         }
715      }
716      else
717      {
718         decoded += c;
719      }
720   }
721   return true;
722}
723
724bool
725Reader::decodeUnicodeCodePoint( Token &token,
726                                     Location &current,
727                                     Location end,
728                                     unsigned int &unicode )
729{
730
731   if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) )
732      return false;
733   if (unicode >= 0xD800 && unicode <= 0xDBFF)
734   {
735      // surrogate pairs
736      if (end - current < 6)
737         return addError( "additional six characters expected to parse unicode surrogate pair.", token, current );
738      unsigned int surrogatePair;
739      if (*(current++) == '\\' && *(current++)== 'u')
740      {
741         if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair ))
742         {
743            unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
744         }
745         else
746            return false;
747      }
748      else
749         return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current );
750   }
751   return true;
752}
753
754bool
755Reader::decodeUnicodeEscapeSequence( Token &token,
756                                     Location &current,
757                                     Location end,
758                                     unsigned int &unicode )
759{
760   if ( end - current < 4 )
761      return addError( "Bad unicode escape sequence in string: four digits expected.", token, current );
762   unicode = 0;
763   for ( int index =0; index < 4; ++index )
764   {
765      Char c = *current++;
766      unicode *= 16;
767      if ( c >= '0'  &&  c <= '9' )
768         unicode += c - '0';
769      else if ( c >= 'a'  &&  c <= 'f' )
770         unicode += c - 'a' + 10;
771      else if ( c >= 'A'  &&  c <= 'F' )
772         unicode += c - 'A' + 10;
773      else
774         return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current );
775   }
776   return true;
777}
778
779
780bool
781Reader::addError( const std::string &message,
782                  Token &token,
783                  Location extra )
784{
785   ErrorInfo info;
786   info.token_ = token;
787   info.message_ = message;
788   info.extra_ = extra;
789   errors_.push_back( info );
790   return false;
791}
792
793
794bool
795Reader::recoverFromError( TokenType skipUntilToken )
796{
797   int errorCount = int(errors_.size());
798   Token skip;
799   for (;;)
800   {
801      if ( !readToken(skip) )
802         errors_.resize( errorCount ); // discard errors caused by recovery
803      if ( skip.type_ == skipUntilToken  ||  skip.type_ == tokenEndOfStream )
804         break;
805   }
806   errors_.resize( errorCount );
807   return false;
808}
809
810
811bool
812Reader::addErrorAndRecover( const std::string &message,
813                            Token &token,
814                            TokenType skipUntilToken )
815{
816   addError( message, token );
817   return recoverFromError( skipUntilToken );
818}
819
820
821Value &
822Reader::currentValue()
823{
824   return *(nodes_.top());
825}
826
827
828Reader::Char
829Reader::getNextChar()
830{
831   if ( current_ == end_ )
832      return 0;
833   return *current_++;
834}
835
836
837void
838Reader::getLocationLineAndColumn( Location location,
839                                  int &line,
840                                  int &column ) const
841{
842   Location current = begin_;
843   Location lastLineStart = current;
844   line = 0;
845   while ( current < location  &&  current != end_ )
846   {
847      Char c = *current++;
848      if ( c == '\r' )
849      {
850         if ( *current == '\n' )
851            ++current;
852         lastLineStart = current;
853         ++line;
854      }
855      else if ( c == '\n' )
856      {
857         lastLineStart = current;
858         ++line;
859      }
860   }
861   // column & line start at 1
862   column = int(location - lastLineStart) + 1;
863   ++line;
864}
865
866
867std::string
868Reader::getLocationLineAndColumn( Location location ) const
869{
870   int line, column;
871   getLocationLineAndColumn( location, line, column );
872   char buffer[18+16+16+1];
873   sprintf( buffer, "Line %d, Column %d", line, column );
874   return buffer;
875}
876
877
878// Deprecated. Preserved for backward compatibility
879std::string
880Reader::getFormatedErrorMessages() const
881{
882    return getFormattedErrorMessages();
883}
884
885
886std::string
887Reader::getFormattedErrorMessages() const
888{
889   std::string formattedMessage;
890   for ( Errors::const_iterator itError = errors_.begin();
891         itError != errors_.end();
892         ++itError )
893   {
894      const ErrorInfo &error = *itError;
895      formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n";
896      formattedMessage += "  " + error.message_ + "\n";
897      if ( error.extra_ )
898         formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n";
899   }
900   return formattedMessage;
901}
902
903
904std::istream& operator>>( std::istream &sin, Value &root )
905{
906    Json::Reader reader;
907    bool ok = reader.parse(sin, root, true);
908    if (!ok) {
909      fprintf(
910          stderr,
911          "Error from reader: %s",
912          reader.getFormattedErrorMessages().c_str());
913
914      JSON_FAIL_MESSAGE("reader error");
915    }
916    return sin;
917}
918
919
920} // namespace Json
921