15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A JSON parser.  Converts strings of JSON into a Value object (see
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// base/values.h).
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// http://www.ietf.org/rfc/rfc4627.txt?number=4627
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Known limitations/deviations from the RFC:
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - Only knows how to parse ints within the range of a signed 32 bit int and
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   decimal numbers within a double.
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - Assumes input is encoded as UTF8.  The spec says we should allow UTF-16
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   (BE or LE) and UTF-32 (BE or LE) as well.
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - We limit nesting to 100 levels to prevent stack overflow (this is allowed
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   by the RFC).
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   UTF-8 string for the JSONReader::JsonToValue() function may start with a
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   UTF-8 BOM (0xEF, 0xBB, 0xBF).
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   To avoid the function from mis-treating a UTF-8 BOM as an invalid
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   character, the function skips a Unicode BOM at the beginning of the
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   Unicode string (converted from the input UTF-8 string) before parsing it.
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TODO(tc): Add a parsing option to to relax object keys being wrapped in
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   double quotes
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TODO(tc): Add an option to disable comment stripping
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef BASE_JSON_JSON_READER_H_
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define BASE_JSON_JSON_READER_H_
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string>
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/base_export.h"
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h"
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/scoped_ptr.h"
36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/strings/string_piece.h"
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace base {
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Value;
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace internal {
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class JSONParser;
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace base {
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)enum JSONParserOptions {
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Parses the input strictly according to RFC 4627, except for where noted
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // above.
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  JSON_PARSE_RFC = 0,
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Allows commas to exist after the last element in structures.
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  JSON_ALLOW_TRAILING_COMMAS = 1 << 0,
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The parser can perform optimizations by placing hidden data in the root of
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the JSON object, which speeds up certain operations on children. However,
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // if the child is Remove()d from root, it would result in use-after-free
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // unless it is DeepCopy()ed or this option is used.
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  JSON_DETACHABLE_CHILDREN = 1 << 1,
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class BASE_EXPORT JSONReader {
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Error codes during parsing.
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  enum JsonParseError {
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    JSON_NO_ERROR = 0,
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    JSON_INVALID_ESCAPE,
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    JSON_SYNTAX_ERROR,
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    JSON_UNEXPECTED_TOKEN,
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    JSON_TRAILING_COMMA,
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    JSON_TOO_MUCH_NESTING,
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    JSON_UNEXPECTED_DATA_AFTER_ROOT,
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    JSON_UNSUPPORTED_ENCODING,
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    JSON_UNQUOTED_DICTIONARY_KEY,
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // String versions of parse error codes.
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const char* kInvalidEscape;
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const char* kSyntaxError;
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const char* kUnexpectedToken;
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const char* kTrailingComma;
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const char* kTooMuchNesting;
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const char* kUnexpectedDataAfterRoot;
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const char* kUnsupportedEncoding;
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const char* kUnquotedDictionaryKey;
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Constructs a reader with the default options, JSON_PARSE_RFC.
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  JSONReader();
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Constructs a reader with custom options.
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  explicit JSONReader(int options);
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~JSONReader();
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Reads and parses |json|, returning a Value. The caller owns the returned
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // instance. If |json| is not a properly formed JSON string, returns NULL.
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Value* Read(const StringPiece& json);
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Reads and parses |json|, returning a Value owned by the caller. The
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // parser respects the given |options|. If the input is not properly formed,
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // returns NULL.
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Value* Read(const StringPiece& json, int options);
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out|
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // are optional. If specified and NULL is returned, they will be populated
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // an error code and a formatted error message (including error location if
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // appropriate). Otherwise, they will be unmodified.
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Value* ReadAndReturnError(const StringPiece& json,
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   int options,  // JSONParserOptions
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   int* error_code_out,
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   std::string* error_msg_out);
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Converts a JSON parse error code into a human readable message.
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns an empty string if error_code is JSON_NO_ERROR.
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static std::string ErrorCodeToString(JsonParseError error_code);
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Parses an input string into a Value that is owned by the caller.
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Value* ReadToValue(const std::string& json);
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the error code if the last call to ReadToValue() failed.
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns JSON_NO_ERROR otherwise.
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  JsonParseError error_code() const;
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Converts error_code_ to a human-readable string, including line and column
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // numbers if appropriate.
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string GetErrorMessage() const;
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  scoped_ptr<internal::JSONParser> parser_;
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace base
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // BASE_JSON_JSON_READER_H_
136