1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef BASE_JSON_JSON_PARSER_H_
6#define BASE_JSON_JSON_PARSER_H_
7
8#include <stddef.h>
9#include <stdint.h>
10
11#include <memory>
12#include <string>
13
14#include "base/base_export.h"
15#include "base/compiler_specific.h"
16#include "base/gtest_prod_util.h"
17#include "base/json/json_reader.h"
18#include "base/macros.h"
19#include "base/strings/string_piece.h"
20
21namespace base {
22
23class Value;
24
25namespace internal {
26
27class JSONParserTest;
28
29// The implementation behind the JSONReader interface. This class is not meant
30// to be used directly; it encapsulates logic that need not be exposed publicly.
31//
32// This parser guarantees O(n) time through the input string. It also optimizes
33// base::StringValue by using StringPiece where possible when returning Value
34// objects by using "hidden roots," discussed in the implementation.
35//
36// Iteration happens on the byte level, with the functions CanConsume and
37// NextChar. The conversion from byte to JSON token happens without advancing
38// the parser in GetNextToken/ParseToken, that is tokenization operates on
39// the current parser position without advancing.
40//
41// Built on top of these are a family of Consume functions that iterate
42// internally. Invariant: on entry of a Consume function, the parser is wound
43// to the first byte of a valid JSON token. On exit, it is on the last byte
44// of a token, such that the next iteration of the parser will be at the byte
45// immediately following the token, which would likely be the first byte of the
46// next token.
47class BASE_EXPORT JSONParser {
48 public:
49  explicit JSONParser(int options);
50  ~JSONParser();
51
52  // Parses the input string according to the set options and returns the
53  // result as a Value.
54  // Wrap this in base::FooValue::From() to check the Value is of type Foo and
55  // convert to a FooValue at the same time.
56  std::unique_ptr<Value> Parse(StringPiece input);
57
58  // Returns the error code.
59  JSONReader::JsonParseError error_code() const;
60
61  // Returns the human-friendly error message.
62  std::string GetErrorMessage() const;
63
64  // Returns the error line number if parse error happened. Otherwise always
65  // returns 0.
66  int error_line() const;
67
68  // Returns the error column number if parse error happened. Otherwise always
69  // returns 0.
70  int error_column() const;
71
72 private:
73  enum Token {
74    T_OBJECT_BEGIN,           // {
75    T_OBJECT_END,             // }
76    T_ARRAY_BEGIN,            // [
77    T_ARRAY_END,              // ]
78    T_STRING,
79    T_NUMBER,
80    T_BOOL_TRUE,              // true
81    T_BOOL_FALSE,             // false
82    T_NULL,                   // null
83    T_LIST_SEPARATOR,         // ,
84    T_OBJECT_PAIR_SEPARATOR,  // :
85    T_END_OF_INPUT,
86    T_INVALID_TOKEN,
87  };
88
89  // A helper class used for parsing strings. One optimization performed is to
90  // create base::Value with a StringPiece to avoid unnecessary std::string
91  // copies. This is not possible if the input string needs to be decoded from
92  // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped.
93  // This class centralizes that logic.
94  class StringBuilder {
95   public:
96    // Empty constructor. Used for creating a builder with which to Swap().
97    StringBuilder();
98
99    // |pos| is the beginning of an input string, excluding the |"|.
100    explicit StringBuilder(const char* pos);
101
102    ~StringBuilder();
103
104    // Swaps the contents of |other| with this.
105    void Swap(StringBuilder* other);
106
107    // Either increases the |length_| of the string or copies the character if
108    // the StringBuilder has been converted. |c| must be in the basic ASCII
109    // plane; all other characters need to be in UTF-8 units, appended with
110    // AppendString below.
111    void Append(const char& c);
112
113    // Appends a string to the std::string. Must be Convert()ed to use.
114    void AppendString(const std::string& str);
115
116    // Converts the builder from its default StringPiece to a full std::string,
117    // performing a copy. Once a builder is converted, it cannot be made a
118    // StringPiece again.
119    void Convert();
120
121    // Returns whether the builder can be converted to a StringPiece.
122    bool CanBeStringPiece() const;
123
124    // Returns the StringPiece representation. Returns an empty piece if it
125    // cannot be converted.
126    StringPiece AsStringPiece();
127
128    // Returns the builder as a std::string.
129    const std::string& AsString();
130
131   private:
132    // The beginning of the input string.
133    const char* pos_;
134
135    // Number of bytes in |pos_| that make up the string being built.
136    size_t length_;
137
138    // The copied string representation. NULL until Convert() is called.
139    // Strong. std::unique_ptr<T> has too much of an overhead here.
140    std::string* string_;
141  };
142
143  // Quick check that the stream has capacity to consume |length| more bytes.
144  bool CanConsume(int length);
145
146  // The basic way to consume a single character in the stream. Consumes one
147  // byte of the input stream and returns a pointer to the rest of it.
148  const char* NextChar();
149
150  // Performs the equivalent of NextChar N times.
151  void NextNChars(int n);
152
153  // Skips over whitespace and comments to find the next token in the stream.
154  // This does not advance the parser for non-whitespace or comment chars.
155  Token GetNextToken();
156
157  // Consumes whitespace characters and comments until the next non-that is
158  // encountered.
159  void EatWhitespaceAndComments();
160  // Helper function that consumes a comment, assuming that the parser is
161  // currently wound to a '/'.
162  bool EatComment();
163
164  // Calls GetNextToken() and then ParseToken(). Caller owns the result.
165  Value* ParseNextToken();
166
167  // Takes a token that represents the start of a Value ("a structural token"
168  // in RFC terms) and consumes it, returning the result as an object the
169  // caller owns.
170  Value* ParseToken(Token token);
171
172  // Assuming that the parser is currently wound to '{', this parses a JSON
173  // object into a DictionaryValue.
174  Value* ConsumeDictionary();
175
176  // Assuming that the parser is wound to '[', this parses a JSON list into a
177  // ListValue.
178  Value* ConsumeList();
179
180  // Calls through ConsumeStringRaw and wraps it in a value.
181  Value* ConsumeString();
182
183  // Assuming that the parser is wound to a double quote, this parses a string,
184  // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on
185  // success and Swap()s the result into |out|. Returns false on failure with
186  // error information set.
187  bool ConsumeStringRaw(StringBuilder* out);
188  // Helper function for ConsumeStringRaw() that consumes the next four or 10
189  // bytes (parser is wound to the first character of a HEX sequence, with the
190  // potential for consuming another \uXXXX for a surrogate). Returns true on
191  // success and places the UTF8 code units in |dest_string|, and false on
192  // failure.
193  bool DecodeUTF16(std::string* dest_string);
194  // Helper function for ConsumeStringRaw() that takes a single code point,
195  // decodes it into UTF-8 units, and appends it to the given builder. The
196  // point must be valid.
197  void DecodeUTF8(const int32_t& point, StringBuilder* dest);
198
199  // Assuming that the parser is wound to the start of a valid JSON number,
200  // this parses and converts it to either an int or double value.
201  Value* ConsumeNumber();
202  // Helper that reads characters that are ints. Returns true if a number was
203  // read and false on error.
204  bool ReadInt(bool allow_leading_zeros);
205
206  // Consumes the literal values of |true|, |false|, and |null|, assuming the
207  // parser is wound to the first character of any of those.
208  Value* ConsumeLiteral();
209
210  // Compares two string buffers of a given length.
211  static bool StringsAreEqual(const char* left, const char* right, size_t len);
212
213  // Sets the error information to |code| at the current column, based on
214  // |index_| and |index_last_line_|, with an optional positive/negative
215  // adjustment by |column_adjust|.
216  void ReportError(JSONReader::JsonParseError code, int column_adjust);
217
218  // Given the line and column number of an error, formats one of the error
219  // message contants from json_reader.h for human display.
220  static std::string FormatErrorMessage(int line, int column,
221                                        const std::string& description);
222
223  // base::JSONParserOptions that control parsing.
224  const int options_;
225
226  // Pointer to the start of the input data.
227  const char* start_pos_;
228
229  // Pointer to the current position in the input data. Equivalent to
230  // |start_pos_ + index_|.
231  const char* pos_;
232
233  // Pointer to the last character of the input data.
234  const char* end_pos_;
235
236  // The index in the input stream to which the parser is wound.
237  int index_;
238
239  // The number of times the parser has recursed (current stack depth).
240  int stack_depth_;
241
242  // The line number that the parser is at currently.
243  int line_number_;
244
245  // The last value of |index_| on the previous line.
246  int index_last_line_;
247
248  // Error information.
249  JSONReader::JsonParseError error_code_;
250  int error_line_;
251  int error_column_;
252
253  friend class JSONParserTest;
254  FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar);
255  FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary);
256  FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList);
257  FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString);
258  FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals);
259  FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers);
260  FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages);
261
262  DISALLOW_COPY_AND_ASSIGN(JSONParser);
263};
264
265}  // namespace internal
266}  // namespace base
267
268#endif  // BASE_JSON_JSON_PARSER_H_
269