1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/json/json_parser.h"
6
7#include "base/float_util.h"
8#include "base/logging.h"
9#include "base/memory/scoped_ptr.h"
10#include "base/strings/string_number_conversions.h"
11#include "base/strings/string_piece.h"
12#include "base/strings/string_util.h"
13#include "base/strings/stringprintf.h"
14#include "base/strings/utf_string_conversion_utils.h"
15#include "base/strings/utf_string_conversions.h"
16#include "base/third_party/icu/icu_utf.h"
17#include "base/values.h"
18
19namespace base {
20namespace internal {
21
22namespace {
23
24const int kStackMaxDepth = 100;
25
26const int32 kExtendedASCIIStart = 0x80;
27
28// This and the class below are used to own the JSON input string for when
29// string tokens are stored as StringPiece instead of std::string. This
30// optimization avoids about 2/3rds of string memory copies. The constructor
31// takes ownership of the input string. The real root value is Swap()ed into
32// the new instance.
33class DictionaryHiddenRootValue : public base::DictionaryValue {
34 public:
35  DictionaryHiddenRootValue(std::string* json, Value* root) : json_(json) {
36    DCHECK(root->IsType(Value::TYPE_DICTIONARY));
37    DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
38  }
39
40  virtual void Swap(DictionaryValue* other) OVERRIDE {
41    DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
42
43    // First deep copy to convert JSONStringValue to std::string and swap that
44    // copy with |other|, which contains the new contents of |this|.
45    scoped_ptr<base::DictionaryValue> copy(DeepCopy());
46    copy->Swap(other);
47
48    // Then erase the contents of the current dictionary and swap in the
49    // new contents, originally from |other|.
50    Clear();
51    json_.reset();
52    DictionaryValue::Swap(copy.get());
53  }
54
55  // Not overriding DictionaryValue::Remove because it just calls through to
56  // the method below.
57
58  virtual bool RemoveWithoutPathExpansion(const std::string& key,
59                                          scoped_ptr<Value>* out) OVERRIDE {
60    // If the caller won't take ownership of the removed value, just call up.
61    if (!out)
62      return DictionaryValue::RemoveWithoutPathExpansion(key, out);
63
64    DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
65
66    // Otherwise, remove the value while its still "owned" by this and copy it
67    // to convert any JSONStringValues to std::string.
68    scoped_ptr<Value> out_owned;
69    if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
70      return false;
71
72    out->reset(out_owned->DeepCopy());
73
74    return true;
75  }
76
77 private:
78  scoped_ptr<std::string> json_;
79
80  DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
81};
82
83class ListHiddenRootValue : public base::ListValue {
84 public:
85  ListHiddenRootValue(std::string* json, Value* root) : json_(json) {
86    DCHECK(root->IsType(Value::TYPE_LIST));
87    ListValue::Swap(static_cast<ListValue*>(root));
88  }
89
90  virtual void Swap(ListValue* other) OVERRIDE {
91    DVLOG(1) << "Swap()ing a ListValue inefficiently.";
92
93    // First deep copy to convert JSONStringValue to std::string and swap that
94    // copy with |other|, which contains the new contents of |this|.
95    scoped_ptr<base::ListValue> copy(DeepCopy());
96    copy->Swap(other);
97
98    // Then erase the contents of the current list and swap in the new contents,
99    // originally from |other|.
100    Clear();
101    json_.reset();
102    ListValue::Swap(copy.get());
103  }
104
105  virtual bool Remove(size_t index, scoped_ptr<Value>* out) OVERRIDE {
106    // If the caller won't take ownership of the removed value, just call up.
107    if (!out)
108      return ListValue::Remove(index, out);
109
110    DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
111
112    // Otherwise, remove the value while its still "owned" by this and copy it
113    // to convert any JSONStringValues to std::string.
114    scoped_ptr<Value> out_owned;
115    if (!ListValue::Remove(index, &out_owned))
116      return false;
117
118    out->reset(out_owned->DeepCopy());
119
120    return true;
121  }
122
123 private:
124  scoped_ptr<std::string> json_;
125
126  DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
127};
128
129// A variant on StringValue that uses StringPiece instead of copying the string
130// into the Value. This can only be stored in a child of hidden root (above),
131// otherwise the referenced string will not be guaranteed to outlive it.
132class JSONStringValue : public base::Value {
133 public:
134  explicit JSONStringValue(const base::StringPiece& piece)
135      : Value(TYPE_STRING),
136        string_piece_(piece) {
137  }
138
139  // Overridden from base::Value:
140  virtual bool GetAsString(std::string* out_value) const OVERRIDE {
141    string_piece_.CopyToString(out_value);
142    return true;
143  }
144  virtual bool GetAsString(string16* out_value) const OVERRIDE {
145    *out_value = UTF8ToUTF16(string_piece_);
146    return true;
147  }
148  virtual Value* DeepCopy() const OVERRIDE {
149    return new StringValue(string_piece_.as_string());
150  }
151  virtual bool Equals(const Value* other) const OVERRIDE {
152    std::string other_string;
153    return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
154        StringPiece(other_string) == string_piece_;
155  }
156
157 private:
158  // The location in the original input stream.
159  base::StringPiece string_piece_;
160
161  DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
162};
163
164// Simple class that checks for maximum recursion/"stack overflow."
165class StackMarker {
166 public:
167  explicit StackMarker(int* depth) : depth_(depth) {
168    ++(*depth_);
169    DCHECK_LE(*depth_, kStackMaxDepth);
170  }
171  ~StackMarker() {
172    --(*depth_);
173  }
174
175  bool IsTooDeep() const {
176    return *depth_ >= kStackMaxDepth;
177  }
178
179 private:
180  int* const depth_;
181
182  DISALLOW_COPY_AND_ASSIGN(StackMarker);
183};
184
185}  // namespace
186
187JSONParser::JSONParser(int options)
188    : options_(options),
189      start_pos_(NULL),
190      pos_(NULL),
191      end_pos_(NULL),
192      index_(0),
193      stack_depth_(0),
194      line_number_(0),
195      index_last_line_(0),
196      error_code_(JSONReader::JSON_NO_ERROR),
197      error_line_(0),
198      error_column_(0) {
199}
200
201JSONParser::~JSONParser() {
202}
203
204Value* JSONParser::Parse(const StringPiece& input) {
205  scoped_ptr<std::string> input_copy;
206  // If the children of a JSON root can be detached, then hidden roots cannot
207  // be used, so do not bother copying the input because StringPiece will not
208  // be used anywhere.
209  if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
210    input_copy.reset(new std::string(input.as_string()));
211    start_pos_ = input_copy->data();
212  } else {
213    start_pos_ = input.data();
214  }
215  pos_ = start_pos_;
216  end_pos_ = start_pos_ + input.length();
217  index_ = 0;
218  line_number_ = 1;
219  index_last_line_ = 0;
220
221  error_code_ = JSONReader::JSON_NO_ERROR;
222  error_line_ = 0;
223  error_column_ = 0;
224
225  // When the input JSON string starts with a UTF-8 Byte-Order-Mark
226  // <0xEF 0xBB 0xBF>, advance the start position to avoid the
227  // ParseNextToken function mis-treating a Unicode BOM as an invalid
228  // character and returning NULL.
229  if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF &&
230      static_cast<uint8>(*(pos_ + 1)) == 0xBB &&
231      static_cast<uint8>(*(pos_ + 2)) == 0xBF) {
232    NextNChars(3);
233  }
234
235  // Parse the first and any nested tokens.
236  scoped_ptr<Value> root(ParseNextToken());
237  if (!root.get())
238    return NULL;
239
240  // Make sure the input stream is at an end.
241  if (GetNextToken() != T_END_OF_INPUT) {
242    if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
243      ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
244      return NULL;
245    }
246  }
247
248  // Dictionaries and lists can contain JSONStringValues, so wrap them in a
249  // hidden root.
250  if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
251    if (root->IsType(Value::TYPE_DICTIONARY)) {
252      return new DictionaryHiddenRootValue(input_copy.release(), root.get());
253    } else if (root->IsType(Value::TYPE_LIST)) {
254      return new ListHiddenRootValue(input_copy.release(), root.get());
255    } else if (root->IsType(Value::TYPE_STRING)) {
256      // A string type could be a JSONStringValue, but because there's no
257      // corresponding HiddenRootValue, the memory will be lost. Deep copy to
258      // preserve it.
259      return root->DeepCopy();
260    }
261  }
262
263  // All other values can be returned directly.
264  return root.release();
265}
266
267JSONReader::JsonParseError JSONParser::error_code() const {
268  return error_code_;
269}
270
271std::string JSONParser::GetErrorMessage() const {
272  return FormatErrorMessage(error_line_, error_column_,
273      JSONReader::ErrorCodeToString(error_code_));
274}
275
276// StringBuilder ///////////////////////////////////////////////////////////////
277
278JSONParser::StringBuilder::StringBuilder()
279    : pos_(NULL),
280      length_(0),
281      string_(NULL) {
282}
283
284JSONParser::StringBuilder::StringBuilder(const char* pos)
285    : pos_(pos),
286      length_(0),
287      string_(NULL) {
288}
289
290void JSONParser::StringBuilder::Swap(StringBuilder* other) {
291  std::swap(other->string_, string_);
292  std::swap(other->pos_, pos_);
293  std::swap(other->length_, length_);
294}
295
296JSONParser::StringBuilder::~StringBuilder() {
297  delete string_;
298}
299
300void JSONParser::StringBuilder::Append(const char& c) {
301  DCHECK_GE(c, 0);
302  DCHECK_LT(c, 128);
303
304  if (string_)
305    string_->push_back(c);
306  else
307    ++length_;
308}
309
310void JSONParser::StringBuilder::AppendString(const std::string& str) {
311  DCHECK(string_);
312  string_->append(str);
313}
314
315void JSONParser::StringBuilder::Convert() {
316  if (string_)
317    return;
318  string_  = new std::string(pos_, length_);
319}
320
321bool JSONParser::StringBuilder::CanBeStringPiece() const {
322  return !string_;
323}
324
325StringPiece JSONParser::StringBuilder::AsStringPiece() {
326  if (string_)
327    return StringPiece();
328  return StringPiece(pos_, length_);
329}
330
331const std::string& JSONParser::StringBuilder::AsString() {
332  if (!string_)
333    Convert();
334  return *string_;
335}
336
337// JSONParser private //////////////////////////////////////////////////////////
338
339inline bool JSONParser::CanConsume(int length) {
340  return pos_ + length <= end_pos_;
341}
342
343const char* JSONParser::NextChar() {
344  DCHECK(CanConsume(1));
345  ++index_;
346  ++pos_;
347  return pos_;
348}
349
350void JSONParser::NextNChars(int n) {
351  DCHECK(CanConsume(n));
352  index_ += n;
353  pos_ += n;
354}
355
356JSONParser::Token JSONParser::GetNextToken() {
357  EatWhitespaceAndComments();
358  if (!CanConsume(1))
359    return T_END_OF_INPUT;
360
361  switch (*pos_) {
362    case '{':
363      return T_OBJECT_BEGIN;
364    case '}':
365      return T_OBJECT_END;
366    case '[':
367      return T_ARRAY_BEGIN;
368    case ']':
369      return T_ARRAY_END;
370    case '"':
371      return T_STRING;
372    case '0':
373    case '1':
374    case '2':
375    case '3':
376    case '4':
377    case '5':
378    case '6':
379    case '7':
380    case '8':
381    case '9':
382    case '-':
383      return T_NUMBER;
384    case 't':
385      return T_BOOL_TRUE;
386    case 'f':
387      return T_BOOL_FALSE;
388    case 'n':
389      return T_NULL;
390    case ',':
391      return T_LIST_SEPARATOR;
392    case ':':
393      return T_OBJECT_PAIR_SEPARATOR;
394    default:
395      return T_INVALID_TOKEN;
396  }
397}
398
399void JSONParser::EatWhitespaceAndComments() {
400  while (pos_ < end_pos_) {
401    switch (*pos_) {
402      case '\r':
403      case '\n':
404        index_last_line_ = index_;
405        // Don't increment line_number_ twice for "\r\n".
406        if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r'))
407          ++line_number_;
408        // Fall through.
409      case ' ':
410      case '\t':
411        NextChar();
412        break;
413      case '/':
414        if (!EatComment())
415          return;
416        break;
417      default:
418        return;
419    }
420  }
421}
422
423bool JSONParser::EatComment() {
424  if (*pos_ != '/' || !CanConsume(1))
425    return false;
426
427  char next_char = *NextChar();
428  if (next_char == '/') {
429    // Single line comment, read to newline.
430    while (CanConsume(1)) {
431      char next_char = *NextChar();
432      if (next_char == '\n' || next_char == '\r')
433        return true;
434    }
435  } else if (next_char == '*') {
436    char previous_char = '\0';
437    // Block comment, read until end marker.
438    while (CanConsume(1)) {
439      next_char = *NextChar();
440      if (previous_char == '*' && next_char == '/') {
441        // EatWhitespaceAndComments will inspect pos_, which will still be on
442        // the last / of the comment, so advance once more (which may also be
443        // end of input).
444        NextChar();
445        return true;
446      }
447      previous_char = next_char;
448    }
449
450    // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
451  }
452
453  return false;
454}
455
456Value* JSONParser::ParseNextToken() {
457  return ParseToken(GetNextToken());
458}
459
460Value* JSONParser::ParseToken(Token token) {
461  switch (token) {
462    case T_OBJECT_BEGIN:
463      return ConsumeDictionary();
464    case T_ARRAY_BEGIN:
465      return ConsumeList();
466    case T_STRING:
467      return ConsumeString();
468    case T_NUMBER:
469      return ConsumeNumber();
470    case T_BOOL_TRUE:
471    case T_BOOL_FALSE:
472    case T_NULL:
473      return ConsumeLiteral();
474    default:
475      ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
476      return NULL;
477  }
478}
479
480Value* JSONParser::ConsumeDictionary() {
481  if (*pos_ != '{') {
482    ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
483    return NULL;
484  }
485
486  StackMarker depth_check(&stack_depth_);
487  if (depth_check.IsTooDeep()) {
488    ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
489    return NULL;
490  }
491
492  scoped_ptr<DictionaryValue> dict(new DictionaryValue);
493
494  NextChar();
495  Token token = GetNextToken();
496  while (token != T_OBJECT_END) {
497    if (token != T_STRING) {
498      ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
499      return NULL;
500    }
501
502    // First consume the key.
503    StringBuilder key;
504    if (!ConsumeStringRaw(&key)) {
505      return NULL;
506    }
507
508    // Read the separator.
509    NextChar();
510    token = GetNextToken();
511    if (token != T_OBJECT_PAIR_SEPARATOR) {
512      ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
513      return NULL;
514    }
515
516    // The next token is the value. Ownership transfers to |dict|.
517    NextChar();
518    Value* value = ParseNextToken();
519    if (!value) {
520      // ReportError from deeper level.
521      return NULL;
522    }
523
524    dict->SetWithoutPathExpansion(key.AsString(), value);
525
526    NextChar();
527    token = GetNextToken();
528    if (token == T_LIST_SEPARATOR) {
529      NextChar();
530      token = GetNextToken();
531      if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
532        ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
533        return NULL;
534      }
535    } else if (token != T_OBJECT_END) {
536      ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
537      return NULL;
538    }
539  }
540
541  return dict.release();
542}
543
544Value* JSONParser::ConsumeList() {
545  if (*pos_ != '[') {
546    ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
547    return NULL;
548  }
549
550  StackMarker depth_check(&stack_depth_);
551  if (depth_check.IsTooDeep()) {
552    ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
553    return NULL;
554  }
555
556  scoped_ptr<ListValue> list(new ListValue);
557
558  NextChar();
559  Token token = GetNextToken();
560  while (token != T_ARRAY_END) {
561    Value* item = ParseToken(token);
562    if (!item) {
563      // ReportError from deeper level.
564      return NULL;
565    }
566
567    list->Append(item);
568
569    NextChar();
570    token = GetNextToken();
571    if (token == T_LIST_SEPARATOR) {
572      NextChar();
573      token = GetNextToken();
574      if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
575        ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
576        return NULL;
577      }
578    } else if (token != T_ARRAY_END) {
579      ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
580      return NULL;
581    }
582  }
583
584  return list.release();
585}
586
587Value* JSONParser::ConsumeString() {
588  StringBuilder string;
589  if (!ConsumeStringRaw(&string))
590    return NULL;
591
592  // Create the Value representation, using a hidden root, if configured
593  // to do so, and if the string can be represented by StringPiece.
594  if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
595    return new JSONStringValue(string.AsStringPiece());
596  } else {
597    if (string.CanBeStringPiece())
598      string.Convert();
599    return new StringValue(string.AsString());
600  }
601}
602
603bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
604  if (*pos_ != '"') {
605    ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
606    return false;
607  }
608
609  // StringBuilder will internally build a StringPiece unless a UTF-16
610  // conversion occurs, at which point it will perform a copy into a
611  // std::string.
612  StringBuilder string(NextChar());
613
614  int length = end_pos_ - start_pos_;
615  int32 next_char = 0;
616
617  while (CanConsume(1)) {
618    pos_ = start_pos_ + index_;  // CBU8_NEXT is postcrement.
619    CBU8_NEXT(start_pos_, index_, length, next_char);
620    if (next_char < 0 || !IsValidCharacter(next_char)) {
621      ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
622      return false;
623    }
624
625    // If this character is an escape sequence...
626    if (next_char == '\\') {
627      // The input string will be adjusted (either by combining the two
628      // characters of an encoded escape sequence, or with a UTF conversion),
629      // so using StringPiece isn't possible -- force a conversion.
630      string.Convert();
631
632      if (!CanConsume(1)) {
633        ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
634        return false;
635      }
636
637      switch (*NextChar()) {
638        // Allowed esape sequences:
639        case 'x': {  // UTF-8 sequence.
640          // UTF-8 \x escape sequences are not allowed in the spec, but they
641          // are supported here for backwards-compatiblity with the old parser.
642          if (!CanConsume(2)) {
643            ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
644            return false;
645          }
646
647          int hex_digit = 0;
648          if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
649            ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
650            return false;
651          }
652          NextChar();
653
654          if (hex_digit < kExtendedASCIIStart)
655            string.Append(hex_digit);
656          else
657            DecodeUTF8(hex_digit, &string);
658          break;
659        }
660        case 'u': {  // UTF-16 sequence.
661          // UTF units are of the form \uXXXX.
662          if (!CanConsume(5)) {  // 5 being 'u' and four HEX digits.
663            ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
664            return false;
665          }
666
667          // Skip the 'u'.
668          NextChar();
669
670          std::string utf8_units;
671          if (!DecodeUTF16(&utf8_units)) {
672            ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
673            return false;
674          }
675
676          string.AppendString(utf8_units);
677          break;
678        }
679        case '"':
680          string.Append('"');
681          break;
682        case '\\':
683          string.Append('\\');
684          break;
685        case '/':
686          string.Append('/');
687          break;
688        case 'b':
689          string.Append('\b');
690          break;
691        case 'f':
692          string.Append('\f');
693          break;
694        case 'n':
695          string.Append('\n');
696          break;
697        case 'r':
698          string.Append('\r');
699          break;
700        case 't':
701          string.Append('\t');
702          break;
703        case 'v':  // Not listed as valid escape sequence in the RFC.
704          string.Append('\v');
705          break;
706        // All other escape squences are illegal.
707        default:
708          ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
709          return false;
710      }
711    } else if (next_char == '"') {
712      --index_;  // Rewind by one because of CBU8_NEXT.
713      out->Swap(&string);
714      return true;
715    } else {
716      if (next_char < kExtendedASCIIStart)
717        string.Append(next_char);
718      else
719        DecodeUTF8(next_char, &string);
720    }
721  }
722
723  ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
724  return false;
725}
726
727// Entry is at the first X in \uXXXX.
728bool JSONParser::DecodeUTF16(std::string* dest_string) {
729  if (!CanConsume(4))
730    return false;
731
732  // This is a 32-bit field because the shift operations in the
733  // conversion process below cause MSVC to error about "data loss."
734  // This only stores UTF-16 code units, though.
735  // Consume the UTF-16 code unit, which may be a high surrogate.
736  int code_unit16_high = 0;
737  if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
738    return false;
739
740  // Only add 3, not 4, because at the end of this iteration, the parser has
741  // finished working with the last digit of the UTF sequence, meaning that
742  // the next iteration will advance to the next byte.
743  NextNChars(3);
744
745  // Used to convert the UTF-16 code units to a code point and then to a UTF-8
746  // code unit sequence.
747  char code_unit8[8] = { 0 };
748  size_t offset = 0;
749
750  // If this is a high surrogate, consume the next code unit to get the
751  // low surrogate.
752  if (CBU16_IS_SURROGATE(code_unit16_high)) {
753    // Make sure this is the high surrogate. If not, it's an encoding
754    // error.
755    if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
756      return false;
757
758    // Make sure that the token has more characters to consume the
759    // lower surrogate.
760    if (!CanConsume(6))  // 6 being '\' 'u' and four HEX digits.
761      return false;
762    if (*NextChar() != '\\' || *NextChar() != 'u')
763      return false;
764
765    NextChar();  // Read past 'u'.
766    int code_unit16_low = 0;
767    if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
768      return false;
769
770    NextNChars(3);
771
772    if (!CBU16_IS_TRAIL(code_unit16_low)) {
773      return false;
774    }
775
776    uint32 code_point = CBU16_GET_SUPPLEMENTARY(code_unit16_high,
777                                                code_unit16_low);
778    offset = 0;
779    CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
780  } else {
781    // Not a surrogate.
782    DCHECK(CBU16_IS_SINGLE(code_unit16_high));
783    CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
784  }
785
786  dest_string->append(code_unit8);
787  return true;
788}
789
790void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) {
791  // Anything outside of the basic ASCII plane will need to be decoded from
792  // int32 to a multi-byte sequence.
793  if (point < kExtendedASCIIStart) {
794    dest->Append(point);
795  } else {
796    char utf8_units[4] = { 0 };
797    int offset = 0;
798    CBU8_APPEND_UNSAFE(utf8_units, offset, point);
799    dest->Convert();
800    // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be
801    // zero terminated at this point.  |offset| contains the correct length.
802    dest->AppendString(std::string(utf8_units, offset));
803  }
804}
805
806Value* JSONParser::ConsumeNumber() {
807  const char* num_start = pos_;
808  const int start_index = index_;
809  int end_index = start_index;
810
811  if (*pos_ == '-')
812    NextChar();
813
814  if (!ReadInt(false)) {
815    ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
816    return NULL;
817  }
818  end_index = index_;
819
820  // The optional fraction part.
821  if (*pos_ == '.') {
822    if (!CanConsume(1)) {
823      ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
824      return NULL;
825    }
826    NextChar();
827    if (!ReadInt(true)) {
828      ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
829      return NULL;
830    }
831    end_index = index_;
832  }
833
834  // Optional exponent part.
835  if (*pos_ == 'e' || *pos_ == 'E') {
836    NextChar();
837    if (*pos_ == '-' || *pos_ == '+')
838      NextChar();
839    if (!ReadInt(true)) {
840      ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
841      return NULL;
842    }
843    end_index = index_;
844  }
845
846  // ReadInt is greedy because numbers have no easily detectable sentinel,
847  // so save off where the parser should be on exit (see Consume invariant at
848  // the top of the header), then make sure the next token is one which is
849  // valid.
850  const char* exit_pos = pos_ - 1;
851  int exit_index = index_ - 1;
852
853  switch (GetNextToken()) {
854    case T_OBJECT_END:
855    case T_ARRAY_END:
856    case T_LIST_SEPARATOR:
857    case T_END_OF_INPUT:
858      break;
859    default:
860      ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
861      return NULL;
862  }
863
864  pos_ = exit_pos;
865  index_ = exit_index;
866
867  StringPiece num_string(num_start, end_index - start_index);
868
869  int num_int;
870  if (StringToInt(num_string, &num_int))
871    return new FundamentalValue(num_int);
872
873  double num_double;
874  if (base::StringToDouble(num_string.as_string(), &num_double) &&
875      IsFinite(num_double)) {
876    return new FundamentalValue(num_double);
877  }
878
879  return NULL;
880}
881
882bool JSONParser::ReadInt(bool allow_leading_zeros) {
883  char first = *pos_;
884  int len = 0;
885
886  char c = first;
887  while (CanConsume(1) && IsAsciiDigit(c)) {
888    c = *NextChar();
889    ++len;
890  }
891
892  if (len == 0)
893    return false;
894
895  if (!allow_leading_zeros && len > 1 && first == '0')
896    return false;
897
898  return true;
899}
900
901Value* JSONParser::ConsumeLiteral() {
902  switch (*pos_) {
903    case 't': {
904      const char* kTrueLiteral = "true";
905      const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
906      if (!CanConsume(kTrueLen - 1) ||
907          !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
908        ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
909        return NULL;
910      }
911      NextNChars(kTrueLen - 1);
912      return new FundamentalValue(true);
913    }
914    case 'f': {
915      const char* kFalseLiteral = "false";
916      const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
917      if (!CanConsume(kFalseLen - 1) ||
918          !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
919        ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
920        return NULL;
921      }
922      NextNChars(kFalseLen - 1);
923      return new FundamentalValue(false);
924    }
925    case 'n': {
926      const char* kNullLiteral = "null";
927      const int kNullLen = static_cast<int>(strlen(kNullLiteral));
928      if (!CanConsume(kNullLen - 1) ||
929          !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
930        ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
931        return NULL;
932      }
933      NextNChars(kNullLen - 1);
934      return Value::CreateNullValue();
935    }
936    default:
937      ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
938      return NULL;
939  }
940}
941
942// static
943bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
944  return strncmp(one, two, len) == 0;
945}
946
947void JSONParser::ReportError(JSONReader::JsonParseError code,
948                             int column_adjust) {
949  error_code_ = code;
950  error_line_ = line_number_;
951  error_column_ = index_ - index_last_line_ + column_adjust;
952}
953
954// static
955std::string JSONParser::FormatErrorMessage(int line, int column,
956                                           const std::string& description) {
957  if (line || column) {
958    return StringPrintf("Line: %i, column: %i, %s",
959        line, column, description.c_str());
960  }
961  return description;
962}
963
964}  // namespace internal
965}  // namespace base
966