15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A JSON parser. Converts strings of JSON into a Value object (see 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// base/values.h). 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// http://www.ietf.org/rfc/rfc4627.txt?number=4627 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Known limitations/deviations from the RFC: 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - Only knows how to parse ints within the range of a signed 32 bit int and 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// decimal numbers within a double. 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - Assumes input is encoded as UTF8. The spec says we should allow UTF-16 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// (BE or LE) and UTF-32 (BE or LE) as well. 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - We limit nesting to 100 levels to prevent stack overflow (this is allowed 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// by the RFC). 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// UTF-8 string for the JSONReader::JsonToValue() function may start with a 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// UTF-8 BOM (0xEF, 0xBB, 0xBF). 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// To avoid the function from mis-treating a UTF-8 BOM as an invalid 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// character, the function skips a Unicode BOM at the beginning of the 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Unicode string (converted from the input UTF-8 string) before parsing it. 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TODO(tc): Add a parsing option to to relax object keys being wrapped in 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// double quotes 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TODO(tc): Add an option to disable comment stripping 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef BASE_JSON_JSON_READER_H_ 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define BASE_JSON_JSON_READER_H_ 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/base_export.h" 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h" 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/scoped_ptr.h" 36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/strings/string_piece.h" 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace base { 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Value; 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace internal { 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class JSONParser; 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace base { 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)enum JSONParserOptions { 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Parses the input strictly according to RFC 4627, except for where noted 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // above. 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) JSON_PARSE_RFC = 0, 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Allows commas to exist after the last element in structures. 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) JSON_ALLOW_TRAILING_COMMAS = 1 << 0, 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The parser can perform optimizations by placing hidden data in the root of 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the JSON object, which speeds up certain operations on children. However, 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // if the child is Remove()d from root, it would result in use-after-free 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // unless it is DeepCopy()ed or this option is used. 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) JSON_DETACHABLE_CHILDREN = 1 << 1, 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class BASE_EXPORT JSONReader { 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Error codes during parsing. 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) enum JsonParseError { 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) JSON_NO_ERROR = 0, 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) JSON_INVALID_ESCAPE, 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) JSON_SYNTAX_ERROR, 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) JSON_UNEXPECTED_TOKEN, 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) JSON_TRAILING_COMMA, 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) JSON_TOO_MUCH_NESTING, 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) JSON_UNEXPECTED_DATA_AFTER_ROOT, 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) JSON_UNSUPPORTED_ENCODING, 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) JSON_UNQUOTED_DICTIONARY_KEY, 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // String versions of parse error codes. 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const char* kInvalidEscape; 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const char* kSyntaxError; 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const char* kUnexpectedToken; 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const char* kTrailingComma; 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const char* kTooMuchNesting; 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const char* kUnexpectedDataAfterRoot; 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const char* kUnsupportedEncoding; 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const char* kUnquotedDictionaryKey; 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Constructs a reader with the default options, JSON_PARSE_RFC. 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) JSONReader(); 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Constructs a reader with custom options. 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) explicit JSONReader(int options); 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ~JSONReader(); 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Reads and parses |json|, returning a Value. The caller owns the returned 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // instance. If |json| is not a properly formed JSON string, returns NULL. 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static Value* Read(const StringPiece& json); 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Reads and parses |json|, returning a Value owned by the caller. The 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // parser respects the given |options|. If the input is not properly formed, 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // returns NULL. 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static Value* Read(const StringPiece& json, int options); 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out| 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // are optional. If specified and NULL is returned, they will be populated 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // an error code and a formatted error message (including error location if 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // appropriate). Otherwise, they will be unmodified. 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static Value* ReadAndReturnError(const StringPiece& json, 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int options, // JSONParserOptions 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int* error_code_out, 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string* error_msg_out); 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Converts a JSON parse error code into a human readable message. 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns an empty string if error_code is JSON_NO_ERROR. 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static std::string ErrorCodeToString(JsonParseError error_code); 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Parses an input string into a Value that is owned by the caller. 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Value* ReadToValue(const std::string& json); 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns the error code if the last call to ReadToValue() failed. 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns JSON_NO_ERROR otherwise. 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) JsonParseError error_code() const; 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Converts error_code_ to a human-readable string, including line and column 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // numbers if appropriate. 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string GetErrorMessage() const; 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_ptr<internal::JSONParser> parser_; 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace base 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // BASE_JSON_JSON_READER_H_ 136