json_reader.h revision 72a454cd3513ac24fbdd0e0cb9ad70b86a99b801
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// A JSON parser.  Converts strings of JSON into a Value object (see
6// base/values.h).
7// http://www.ietf.org/rfc/rfc4627.txt?number=4627
8//
9// Known limitations/deviations from the RFC:
10// - Only knows how to parse ints within the range of a signed 32 bit int and
11//   decimal numbers within a double.
12// - Assumes input is encoded as UTF8.  The spec says we should allow UTF-16
13//   (BE or LE) and UTF-32 (BE or LE) as well.
14// - We limit nesting to 100 levels to prevent stack overflow (this is allowed
15//   by the RFC).
16// - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data
17//   stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input
18//   UTF-8 string for the JSONReader::JsonToValue() function may start with a
19//   UTF-8 BOM (0xEF, 0xBB, 0xBF).
20//   To avoid the function from mis-treating a UTF-8 BOM as an invalid
21//   character, the function skips a Unicode BOM at the beginning of the
22//   Unicode string (converted from the input UTF-8 string) before parsing it.
23//
24// TODO(tc): Add a parsing option to to relax object keys being wrapped in
25//   double quotes
26// TODO(tc): Add an option to disable comment stripping
27// TODO(aa): Consider making the constructor public and the static Read() method
28// only a convenience for the common uses with more complex configuration going
29// on the instance.
30
31#ifndef BASE_JSON_JSON_READER_H_
32#define BASE_JSON_JSON_READER_H_
33#pragma once
34
35#include <string>
36
37#include "base/basictypes.h"
38
39// Chromium and Chromium OS check out gtest to different places, so we're
40// unable to compile on both if we include gtest_prod.h here.  Instead, include
41// its only contents -- this will need to be updated if the macro ever changes.
42#define FRIEND_TEST(test_case_name, test_name)\
43friend class test_case_name##_##test_name##_Test
44
45class Value;
46
47namespace base {
48
49class JSONReader {
50 public:
51  // A struct to hold a JS token.
52  class Token {
53   public:
54    enum Type {
55     OBJECT_BEGIN,           // {
56     OBJECT_END,             // }
57     ARRAY_BEGIN,            // [
58     ARRAY_END,              // ]
59     STRING,
60     NUMBER,
61     BOOL_TRUE,              // true
62     BOOL_FALSE,             // false
63     NULL_TOKEN,             // null
64     LIST_SEPARATOR,         // ,
65     OBJECT_PAIR_SEPARATOR,  // :
66     END_OF_INPUT,
67     INVALID_TOKEN,
68    };
69    Token(Type t, const wchar_t* b, int len)
70      : type(t), begin(b), length(len) {}
71
72    // Get the character that's one past the end of this token.
73    wchar_t NextChar() {
74      return *(begin + length);
75    }
76
77    Type type;
78
79    // A pointer into JSONReader::json_pos_ that's the beginning of this token.
80    const wchar_t* begin;
81
82    // End should be one char past the end of the token.
83    int length;
84  };
85
86  // Error codes during parsing.
87  enum JsonParseError {
88    JSON_NO_ERROR = 0,
89    JSON_BAD_ROOT_ELEMENT_TYPE,
90    JSON_INVALID_ESCAPE,
91    JSON_SYNTAX_ERROR,
92    JSON_TRAILING_COMMA,
93    JSON_TOO_MUCH_NESTING,
94    JSON_UNEXPECTED_DATA_AFTER_ROOT,
95    JSON_UNSUPPORTED_ENCODING,
96    JSON_UNQUOTED_DICTIONARY_KEY,
97  };
98
99  // String versions of parse error codes.
100  static const char* kBadRootElementType;
101  static const char* kInvalidEscape;
102  static const char* kSyntaxError;
103  static const char* kTrailingComma;
104  static const char* kTooMuchNesting;
105  static const char* kUnexpectedDataAfterRoot;
106  static const char* kUnsupportedEncoding;
107  static const char* kUnquotedDictionaryKey;
108
109  JSONReader();
110
111  // Reads and parses |json|, returning a Value. The caller owns the returned
112  // instance. If |json| is not a properly formed JSON string, returns NULL.
113  // If |allow_trailing_comma| is true, we will ignore trailing commas in
114  // objects and arrays even though this goes against the RFC.
115  static Value* Read(const std::string& json, bool allow_trailing_comma);
116
117  // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out|
118  // are optional. If specified and NULL is returned, they will be populated
119  // an error code and a formatted error message (including error location if
120  // appropriate). Otherwise, they will be unmodified.
121  static Value* ReadAndReturnError(const std::string& json,
122                                   bool allow_trailing_comma,
123                                   int* error_code_out,
124                                   std::string* error_msg_out);
125
126  // Converts a JSON parse error code into a human readable message.
127  // Returns an empty string if error_code is JSON_NO_ERROR.
128  static std::string ErrorCodeToString(JsonParseError error_code);
129
130  // Returns the error code if the last call to JsonToValue() failed.
131  // Returns JSON_NO_ERROR otherwise.
132  JsonParseError error_code() const { return error_code_; }
133
134  // Converts error_code_ to a human-readable string, including line and column
135  // numbers if appropriate.
136  std::string GetErrorMessage() const;
137
138  // Reads and parses |json|, returning a Value. The caller owns the returned
139  // instance. If |json| is not a properly formed JSON string, returns NULL and
140  // a detailed error can be retrieved from |error_message()|.
141  // If |check_root| is true, we require that the root object be an object or
142  // array. Otherwise, it can be any valid JSON type.
143  // If |allow_trailing_comma| is true, we will ignore trailing commas in
144  // objects and arrays even though this goes against the RFC.
145  Value* JsonToValue(const std::string& json, bool check_root,
146                     bool allow_trailing_comma);
147
148 private:
149  FRIEND_TEST(JSONReaderTest, Reading);
150  FRIEND_TEST(JSONReaderTest, ErrorMessages);
151
152  static std::string FormatErrorMessage(int line, int column,
153                                        const std::string& description);
154
155  // Recursively build Value.  Returns NULL if we don't have a valid JSON
156  // string.  If |is_root| is true, we verify that the root element is either
157  // an object or an array.
158  Value* BuildValue(bool is_root);
159
160  // Parses a sequence of characters into a Token::NUMBER. If the sequence of
161  // characters is not a valid number, returns a Token::INVALID_TOKEN. Note
162  // that DecodeNumber is used to actually convert from a string to an
163  // int/double.
164  Token ParseNumberToken();
165
166  // Try and convert the substring that token holds into an int or a double. If
167  // we can (ie., no overflow), return the value, else return NULL.
168  Value* DecodeNumber(const Token& token);
169
170  // Parses a sequence of characters into a Token::STRING. If the sequence of
171  // characters is not a valid string, returns a Token::INVALID_TOKEN. Note
172  // that DecodeString is used to actually decode the escaped string into an
173  // actual wstring.
174  Token ParseStringToken();
175
176  // Convert the substring into a value string.  This should always succeed
177  // (otherwise ParseStringToken would have failed).
178  Value* DecodeString(const Token& token);
179
180  // Grabs the next token in the JSON stream.  This does not increment the
181  // stream so it can be used to look ahead at the next token.
182  Token ParseToken();
183
184  // Increments |json_pos_| past leading whitespace and comments.
185  void EatWhitespaceAndComments();
186
187  // If |json_pos_| is at the start of a comment, eat it, otherwise, returns
188  // false.
189  bool EatComment();
190
191  // Checks if |json_pos_| matches str.
192  bool NextStringMatch(const std::wstring& str);
193
194  // Sets the error code that will be returned to the caller. The current
195  // line and column are determined and added into the final message.
196  void SetErrorCode(const JsonParseError error, const wchar_t* error_pos);
197
198  // Pointer to the starting position in the input string.
199  const wchar_t* start_pos_;
200
201  // Pointer to the current position in the input string.
202  const wchar_t* json_pos_;
203
204  // Used to keep track of how many nested lists/dicts there are.
205  int stack_depth_;
206
207  // A parser flag that allows trailing commas in objects and arrays.
208  bool allow_trailing_comma_;
209
210  // Contains the error code for the last call to JsonToValue(), if any.
211  JsonParseError error_code_;
212  int error_line_;
213  int error_col_;
214
215  DISALLOW_COPY_AND_ASSIGN(JSONReader);
216};
217
218}  // namespace base
219
220#endif  // BASE_JSON_JSON_READER_H_
221