json_reader.h revision 72a454cd3513ac24fbdd0e0cb9ad70b86a99b801
1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4// 5// A JSON parser. Converts strings of JSON into a Value object (see 6// base/values.h). 7// http://www.ietf.org/rfc/rfc4627.txt?number=4627 8// 9// Known limitations/deviations from the RFC: 10// - Only knows how to parse ints within the range of a signed 32 bit int and 11// decimal numbers within a double. 12// - Assumes input is encoded as UTF8. The spec says we should allow UTF-16 13// (BE or LE) and UTF-32 (BE or LE) as well. 14// - We limit nesting to 100 levels to prevent stack overflow (this is allowed 15// by the RFC). 16// - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data 17// stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input 18// UTF-8 string for the JSONReader::JsonToValue() function may start with a 19// UTF-8 BOM (0xEF, 0xBB, 0xBF). 20// To avoid the function from mis-treating a UTF-8 BOM as an invalid 21// character, the function skips a Unicode BOM at the beginning of the 22// Unicode string (converted from the input UTF-8 string) before parsing it. 23// 24// TODO(tc): Add a parsing option to to relax object keys being wrapped in 25// double quotes 26// TODO(tc): Add an option to disable comment stripping 27// TODO(aa): Consider making the constructor public and the static Read() method 28// only a convenience for the common uses with more complex configuration going 29// on the instance. 30 31#ifndef BASE_JSON_JSON_READER_H_ 32#define BASE_JSON_JSON_READER_H_ 33#pragma once 34 35#include <string> 36 37#include "base/basictypes.h" 38 39// Chromium and Chromium OS check out gtest to different places, so we're 40// unable to compile on both if we include gtest_prod.h here. Instead, include 41// its only contents -- this will need to be updated if the macro ever changes. 42#define FRIEND_TEST(test_case_name, test_name)\ 43friend class test_case_name##_##test_name##_Test 44 45class Value; 46 47namespace base { 48 49class JSONReader { 50 public: 51 // A struct to hold a JS token. 52 class Token { 53 public: 54 enum Type { 55 OBJECT_BEGIN, // { 56 OBJECT_END, // } 57 ARRAY_BEGIN, // [ 58 ARRAY_END, // ] 59 STRING, 60 NUMBER, 61 BOOL_TRUE, // true 62 BOOL_FALSE, // false 63 NULL_TOKEN, // null 64 LIST_SEPARATOR, // , 65 OBJECT_PAIR_SEPARATOR, // : 66 END_OF_INPUT, 67 INVALID_TOKEN, 68 }; 69 Token(Type t, const wchar_t* b, int len) 70 : type(t), begin(b), length(len) {} 71 72 // Get the character that's one past the end of this token. 73 wchar_t NextChar() { 74 return *(begin + length); 75 } 76 77 Type type; 78 79 // A pointer into JSONReader::json_pos_ that's the beginning of this token. 80 const wchar_t* begin; 81 82 // End should be one char past the end of the token. 83 int length; 84 }; 85 86 // Error codes during parsing. 87 enum JsonParseError { 88 JSON_NO_ERROR = 0, 89 JSON_BAD_ROOT_ELEMENT_TYPE, 90 JSON_INVALID_ESCAPE, 91 JSON_SYNTAX_ERROR, 92 JSON_TRAILING_COMMA, 93 JSON_TOO_MUCH_NESTING, 94 JSON_UNEXPECTED_DATA_AFTER_ROOT, 95 JSON_UNSUPPORTED_ENCODING, 96 JSON_UNQUOTED_DICTIONARY_KEY, 97 }; 98 99 // String versions of parse error codes. 100 static const char* kBadRootElementType; 101 static const char* kInvalidEscape; 102 static const char* kSyntaxError; 103 static const char* kTrailingComma; 104 static const char* kTooMuchNesting; 105 static const char* kUnexpectedDataAfterRoot; 106 static const char* kUnsupportedEncoding; 107 static const char* kUnquotedDictionaryKey; 108 109 JSONReader(); 110 111 // Reads and parses |json|, returning a Value. The caller owns the returned 112 // instance. If |json| is not a properly formed JSON string, returns NULL. 113 // If |allow_trailing_comma| is true, we will ignore trailing commas in 114 // objects and arrays even though this goes against the RFC. 115 static Value* Read(const std::string& json, bool allow_trailing_comma); 116 117 // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out| 118 // are optional. If specified and NULL is returned, they will be populated 119 // an error code and a formatted error message (including error location if 120 // appropriate). Otherwise, they will be unmodified. 121 static Value* ReadAndReturnError(const std::string& json, 122 bool allow_trailing_comma, 123 int* error_code_out, 124 std::string* error_msg_out); 125 126 // Converts a JSON parse error code into a human readable message. 127 // Returns an empty string if error_code is JSON_NO_ERROR. 128 static std::string ErrorCodeToString(JsonParseError error_code); 129 130 // Returns the error code if the last call to JsonToValue() failed. 131 // Returns JSON_NO_ERROR otherwise. 132 JsonParseError error_code() const { return error_code_; } 133 134 // Converts error_code_ to a human-readable string, including line and column 135 // numbers if appropriate. 136 std::string GetErrorMessage() const; 137 138 // Reads and parses |json|, returning a Value. The caller owns the returned 139 // instance. If |json| is not a properly formed JSON string, returns NULL and 140 // a detailed error can be retrieved from |error_message()|. 141 // If |check_root| is true, we require that the root object be an object or 142 // array. Otherwise, it can be any valid JSON type. 143 // If |allow_trailing_comma| is true, we will ignore trailing commas in 144 // objects and arrays even though this goes against the RFC. 145 Value* JsonToValue(const std::string& json, bool check_root, 146 bool allow_trailing_comma); 147 148 private: 149 FRIEND_TEST(JSONReaderTest, Reading); 150 FRIEND_TEST(JSONReaderTest, ErrorMessages); 151 152 static std::string FormatErrorMessage(int line, int column, 153 const std::string& description); 154 155 // Recursively build Value. Returns NULL if we don't have a valid JSON 156 // string. If |is_root| is true, we verify that the root element is either 157 // an object or an array. 158 Value* BuildValue(bool is_root); 159 160 // Parses a sequence of characters into a Token::NUMBER. If the sequence of 161 // characters is not a valid number, returns a Token::INVALID_TOKEN. Note 162 // that DecodeNumber is used to actually convert from a string to an 163 // int/double. 164 Token ParseNumberToken(); 165 166 // Try and convert the substring that token holds into an int or a double. If 167 // we can (ie., no overflow), return the value, else return NULL. 168 Value* DecodeNumber(const Token& token); 169 170 // Parses a sequence of characters into a Token::STRING. If the sequence of 171 // characters is not a valid string, returns a Token::INVALID_TOKEN. Note 172 // that DecodeString is used to actually decode the escaped string into an 173 // actual wstring. 174 Token ParseStringToken(); 175 176 // Convert the substring into a value string. This should always succeed 177 // (otherwise ParseStringToken would have failed). 178 Value* DecodeString(const Token& token); 179 180 // Grabs the next token in the JSON stream. This does not increment the 181 // stream so it can be used to look ahead at the next token. 182 Token ParseToken(); 183 184 // Increments |json_pos_| past leading whitespace and comments. 185 void EatWhitespaceAndComments(); 186 187 // If |json_pos_| is at the start of a comment, eat it, otherwise, returns 188 // false. 189 bool EatComment(); 190 191 // Checks if |json_pos_| matches str. 192 bool NextStringMatch(const std::wstring& str); 193 194 // Sets the error code that will be returned to the caller. The current 195 // line and column are determined and added into the final message. 196 void SetErrorCode(const JsonParseError error, const wchar_t* error_pos); 197 198 // Pointer to the starting position in the input string. 199 const wchar_t* start_pos_; 200 201 // Pointer to the current position in the input string. 202 const wchar_t* json_pos_; 203 204 // Used to keep track of how many nested lists/dicts there are. 205 int stack_depth_; 206 207 // A parser flag that allows trailing commas in objects and arrays. 208 bool allow_trailing_comma_; 209 210 // Contains the error code for the last call to JsonToValue(), if any. 211 JsonParseError error_code_; 212 int error_line_; 213 int error_col_; 214 215 DISALLOW_COPY_AND_ASSIGN(JSONReader); 216}; 217 218} // namespace base 219 220#endif // BASE_JSON_JSON_READER_H_ 221