1b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// Protocol Buffers - Google's data interchange format 2b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// Copyright 2008 Google Inc. All rights reserved. 3b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// https://developers.google.com/protocol-buffers/ 4b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// 5b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// Redistribution and use in source and binary forms, with or without 6b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// modification, are permitted provided that the following conditions are 7b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// met: 8b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// 9b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// * Redistributions of source code must retain the above copyright 10b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// notice, this list of conditions and the following disclaimer. 11b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// * Redistributions in binary form must reproduce the above 12b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// copyright notice, this list of conditions and the following disclaimer 13b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// in the documentation and/or other materials provided with the 14b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// distribution. 15b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// * Neither the name of Google Inc. nor the names of its 16b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// contributors may be used to endorse or promote products derived from 17b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// this software without specific prior written permission. 18b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// 19b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 31b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#include <google/protobuf/util/internal/json_stream_parser.h> 32b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 33b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#include <algorithm> 34b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#include <cctype> 35b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#include <cerrno> 36b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#include <cstdlib> 37b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#include <cstring> 38b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#include <memory> 39b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#ifndef _SHARED_PTR_H 40b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#include <google/protobuf/stubs/shared_ptr.h> 41b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#endif 42b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 43b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#include <google/protobuf/stubs/logging.h> 44b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#include <google/protobuf/stubs/common.h> 45b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#include <google/protobuf/util/internal/object_writer.h> 46b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#include <google/protobuf/util/internal/json_escaping.h> 47b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer#include <google/protobuf/stubs/strutil.h> 48b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 49b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammernamespace google { 50b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammernamespace protobuf { 51b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammernamespace util { 52b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 53b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// Allow these symbols to be referenced as util::Status, util::error::* in 54b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// this file. 55b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerusing util::Status; 56b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammernamespace error { 57b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerusing util::error::INTERNAL; 58b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerusing util::error::INVALID_ARGUMENT; 59b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} // namespace error 60b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 61b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammernamespace converter { 62b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 63b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// Number of digits in an escaped UTF-16 code unit ('\\' 'u' X X X X) 64b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerstatic const int kUnicodeEscapedLength = 6; 65b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 66b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// Length of the true, false, and null literals. 67b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerstatic const int true_len = strlen("true"); 68b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerstatic const int false_len = strlen("false"); 69b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerstatic const int null_len = strlen("null"); 70b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 71b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerinline bool IsLetter(char c) { 72b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_') || 73b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer (c == '$'); 74b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 75b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 76b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerinline bool IsAlphanumeric(char c) { 77b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return IsLetter(c) || ('0' <= c && c <= '9'); 78b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 79b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 80b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerstatic bool ConsumeKey(StringPiece* input, StringPiece* key) { 81b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (input->empty() || !IsLetter((*input)[0])) return false; 82b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer int len = 1; 83b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer for (; len < input->size(); ++len) { 84b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!IsAlphanumeric((*input)[len])) { 85b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 86b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 87b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 88b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer *key = StringPiece(input->data(), len); 89b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer *input = StringPiece(input->data() + len, input->size() - len); 90b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return true; 91b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 92b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 93b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerstatic bool MatchKey(StringPiece input) { 94b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return !input.empty() && IsLetter(input[0]); 95b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 96b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 97b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas BerghammerJsonStreamParser::JsonStreamParser(ObjectWriter* ow) 98b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer : ow_(ow), 99b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer stack_(), 100b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer leftover_(), 101b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer json_(), 102b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_(), 103b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_(), 104b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_storage_(), 105b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer finishing_(false), 106b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_(), 107b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_storage_(), 108b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer string_open_(0), 109b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer chunk_storage_(), 110b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer coerce_to_utf8_(false) { 111b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Initialize the stack with a single value to be parsed. 112b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer stack_.push(VALUE); 113b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 114b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 115b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas BerghammerJsonStreamParser::~JsonStreamParser() {} 116b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 117b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 118b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::Parse(StringPiece json) { 119b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer StringPiece chunk = json; 120b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we have leftovers from a previous chunk, append the new chunk to it 121b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // and create a new StringPiece pointing at the string's data. This could 122b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // be large but we rely on the chunks to be small, assuming they are 123b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // fragments of a Cord. 124b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!leftover_.empty()) { 125b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Don't point chunk to leftover_ because leftover_ will be updated in 126b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // ParseChunk(chunk). 127b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer chunk_storage_.swap(leftover_); 128b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer json.AppendToString(&chunk_storage_); 129b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer chunk = StringPiece(chunk_storage_); 130b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 131b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 132b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Find the structurally valid UTF8 prefix and parse only that. 133b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer int n = internal::UTF8SpnStructurallyValid(chunk); 134b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (n > 0) { 135b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer util::Status status = ParseChunk(chunk.substr(0, n)); 136b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 137b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Any leftover characters are stashed in leftover_ for later parsing when 138b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // there is more data available. 139b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer chunk.substr(n).AppendToString(&leftover_); 140b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return status; 141b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } else { 142b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer chunk.CopyToString(&leftover_); 143b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 144b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 145b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 146b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 147b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::FinishParse() { 148b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we do not expect anything and there is nothing left to parse we're all 149b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // done. 150b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (stack_.empty() && leftover_.empty()) { 151b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 152b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 153b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 154b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Storage for UTF8-coerced string. 155b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer google::protobuf::scoped_array<char> utf8; 156b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (coerce_to_utf8_) { 157b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer utf8.reset(new char[leftover_.size()]); 158b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer char* coerced = internal::UTF8CoerceToStructurallyValid(leftover_, utf8.get(), ' '); 159b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_ = json_ = StringPiece(coerced, leftover_.size()); 160b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } else { 161b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_ = json_ = leftover_; 162b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!internal::IsStructurallyValidUTF8(leftover_)) { 163b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Encountered non UTF-8 code points."); 164b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 165b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 166b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 167b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Parse the remainder in finishing mode, which reports errors for things like 168b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // unterminated strings or unknown tokens that would normally be retried. 169b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer finishing_ = true; 170b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer util::Status result = RunParser(); 171b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (result.ok()) { 172b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer SkipWhitespace(); 173b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!p_.empty()) { 174b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result = ReportFailure("Parsing terminated before end of input."); 175b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 176b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 177b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return result; 178b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 179b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 180b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseChunk(StringPiece chunk) { 181b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Do not do any work if the chunk is empty. 182b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (chunk.empty()) return util::Status::OK; 183b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 184b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_ = json_ = chunk; 185b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 186b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer finishing_ = false; 187b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer util::Status result = RunParser(); 188b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!result.ok()) return result; 189b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 190b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer SkipWhitespace(); 191b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (p_.empty()) { 192b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we parsed everything we had, clear the leftover. 193b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer leftover_.clear(); 194b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } else { 195b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we do not expect anything i.e. stack is empty, and we have non-empty 196b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // string left to parse, we report an error. 197b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (stack_.empty()) { 198b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Parsing terminated before end of input."); 199b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 200b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we expect future data i.e. stack is non-empty, and we have some 201b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // unparsed data left, we save it for later parse. 202b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer leftover_ = p_.ToString(); 203b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 204b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 205b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 206b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 207b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::RunParser() { 208b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer while (!stack_.empty()) { 209b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ParseType type = stack_.top(); 210b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer TokenType t = (string_open_ == 0) ? GetNextTokenType() : BEGIN_STRING; 211b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer stack_.pop(); 212b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer util::Status result; 213b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer switch (type) { 214b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case VALUE: 215b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result = ParseValue(t); 216b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 217b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 218b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case OBJ_MID: 219b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result = ParseObjectMid(t); 220b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 221b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 222b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case ENTRY: 223b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result = ParseEntry(t); 224b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 225b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 226b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case ENTRY_MID: 227b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result = ParseEntryMid(t); 228b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 229b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 230b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case ARRAY_VALUE: 231b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result = ParseArrayValue(t); 232b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 233b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 234b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case ARRAY_MID: 235b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result = ParseArrayMid(t); 236b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 237b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 238b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer default: 239b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result = util::Status(util::error::INTERNAL, 240b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer StrCat("Unknown parse type: ", type)); 241b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 242b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 243b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!result.ok()) { 244b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we were cancelled, save our state and try again later. 245b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!finishing_ && result == util::Status::CANCELLED) { 246b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer stack_.push(type); 247b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we have a key we still need to render, make sure to save off the 248b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // contents in our own storage. 249b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!key_.empty() && key_storage_.empty()) { 250b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_.AppendToString(&key_storage_); 251b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_ = StringPiece(key_storage_); 252b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 253b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result = util::Status::OK; 254b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 255b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return result; 256b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 257b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 258b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 259b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 260b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 261b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseValue(TokenType type) { 262b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer switch (type) { 263b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case BEGIN_OBJECT: 264b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return HandleBeginObject(); 265b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case BEGIN_ARRAY: 266b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return HandleBeginArray(); 267b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case BEGIN_STRING: 268b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ParseString(); 269b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case BEGIN_NUMBER: 270b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ParseNumber(); 271b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case BEGIN_TRUE: 272b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ParseTrue(); 273b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case BEGIN_FALSE: 274b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ParseFalse(); 275b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case BEGIN_NULL: 276b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ParseNull(); 277b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case UNKNOWN: 278b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportUnknown("Expected a value."); 279b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer default: { 280b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Special case for having been cut off while parsing, wait for more data. 281b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // This handles things like 'fals' being at the end of the string, we 282b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // don't know if the next char would be e, completing it, or something 283b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // else, making it invalid. 284b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!finishing_ && p_.length() < false_len) { 285b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::CANCELLED; 286b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 287b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Unexpected token."); 288b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 289b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 290b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 291b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 292b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseString() { 293b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer util::Status result = ParseStringHelper(); 294b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (result.ok()) { 295b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ow_->RenderString(key_, parsed_); 296b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_.clear(); 297b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_.clear(); 298b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_storage_.clear(); 299b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 300b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return result; 301b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 302b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 303b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseStringHelper() { 304b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we haven't seen the start quote, grab it and remember it for later. 305b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (string_open_ == 0) { 306b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer string_open_ = *p_.data(); 307b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer GOOGLE_DCHECK(string_open_ == '\"' || string_open_ == '\''); 308b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer Advance(); 309b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 310b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Track where we last copied data from so we can minimize copying. 311b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer const char* last = p_.data(); 312b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer while (!p_.empty()) { 313b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer const char* data = p_.data(); 314b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (*data == '\\') { 315b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // We're about to handle an escape, copy all bytes from last to data. 316b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (last < data) { 317b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_storage_.append(last, data - last); 318b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer last = data; 319b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 320b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we ran out of string after the \, cancel or report an error 321b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // depending on if we expect more data later. 322b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (p_.length() == 1) { 323b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!finishing_) { 324b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::CANCELLED; 325b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 326b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Closing quote expected in string."); 327b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 328b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Parse a unicode escape if we found \u in the string. 329b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (data[1] == 'u') { 330b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer util::Status result = ParseUnicodeEscape(); 331b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!result.ok()) { 332b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return result; 333b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 334b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Move last pointer past the unicode escape and continue. 335b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer last = p_.data(); 336b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer continue; 337b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 338b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Handle the standard set of backslash-escaped characters. 339b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer switch (data[1]) { 340b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case 'b': 341b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_storage_.push_back('\b'); 342b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 343b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case 'f': 344b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_storage_.push_back('\f'); 345b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 346b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case 'n': 347b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_storage_.push_back('\n'); 348b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 349b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case 'r': 350b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_storage_.push_back('\r'); 351b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 352b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case 't': 353b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_storage_.push_back('\t'); 354b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 355b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case 'v': 356b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_storage_.push_back('\v'); 357b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 358b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer default: 359b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_storage_.push_back(data[1]); 360b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 361b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // We handled two characters, so advance past them and continue. 362b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_.remove_prefix(2); 363b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer last = p_.data(); 364b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer continue; 365b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 366b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we found the closing quote note it, advance past it, and return. 367b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (*data == string_open_) { 368b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we didn't copy anything, reuse the input buffer. 369b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (parsed_storage_.empty()) { 370b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_ = StringPiece(last, data - last); 371b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } else { 372b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (last < data) { 373b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_storage_.append(last, data - last); 374b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer last = data; 375b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 376b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_ = StringPiece(parsed_storage_); 377b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 378b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Clear the quote char so next time we try to parse a string we'll 379b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // start fresh. 380b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer string_open_ = 0; 381b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer Advance(); 382b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 383b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 384b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Normal character, just advance past it. 385b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer Advance(); 386b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 387b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we ran out of characters, copy over what we have so far. 388b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (last < p_.data()) { 389b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_storage_.append(last, p_.data() - last); 390b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 391b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we didn't find the closing quote but we expect more data, cancel for now 392b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!finishing_) { 393b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::CANCELLED; 394b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 395b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // End of string reached without a closing quote, report an error. 396b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer string_open_ = 0; 397b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Closing quote expected in string."); 398b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 399b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 400b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// Converts a unicode escaped character to a decimal value stored in a char32 401b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// for use in UTF8 encoding utility. We assume that str begins with \uhhhh and 402b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// convert that from the hex number to a decimal value. 403b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// 404b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// There are some security exploits with UTF-8 that we should be careful of: 405b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// - http://www.unicode.org/reports/tr36/#UTF-8_Exploit 406b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer// - http://sites/intl-eng/design-guide/core-application 407b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseUnicodeEscape() { 408b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (p_.length() < kUnicodeEscapedLength) { 409b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!finishing_) { 410b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::CANCELLED; 411b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 412b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Illegal hex string."); 413b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 414b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer GOOGLE_DCHECK_EQ('\\', p_.data()[0]); 415b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer GOOGLE_DCHECK_EQ('u', p_.data()[1]); 416b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer uint32 code = 0; 417b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer for (int i = 2; i < kUnicodeEscapedLength; ++i) { 418b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!isxdigit(p_.data()[i])) { 419b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Invalid escape sequence."); 420b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 421b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer code = (code << 4) + hex_digit_to_int(p_.data()[i]); 422b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 423b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (code >= JsonEscaping::kMinHighSurrogate && 424b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer code <= JsonEscaping::kMaxHighSurrogate) { 425b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (p_.length() < 2 * kUnicodeEscapedLength) { 426b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!finishing_) { 427b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::CANCELLED; 428b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 429b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!coerce_to_utf8_) { 430b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Missing low surrogate."); 431b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 432b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } else if (p_.data()[kUnicodeEscapedLength] == '\\' && 433b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_.data()[kUnicodeEscapedLength + 1] == 'u') { 434b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer uint32 low_code = 0; 435b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer for (int i = kUnicodeEscapedLength + 2; i < 2 * kUnicodeEscapedLength; 436b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ++i) { 437b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!isxdigit(p_.data()[i])) { 438b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Invalid escape sequence."); 439b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 440b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer low_code = (low_code << 4) + hex_digit_to_int(p_.data()[i]); 441b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 442b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (low_code >= JsonEscaping::kMinLowSurrogate && 443b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer low_code <= JsonEscaping::kMaxLowSurrogate) { 444b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Convert UTF-16 surrogate pair to 21-bit Unicode codepoint. 445b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer code = (((code & 0x3FF) << 10) | (low_code & 0x3FF)) + 446b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer JsonEscaping::kMinSupplementaryCodePoint; 447b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Advance past the first code unit escape. 448b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_.remove_prefix(kUnicodeEscapedLength); 449b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } else if (!coerce_to_utf8_) { 450b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Invalid low surrogate."); 451b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 452b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } else if (!coerce_to_utf8_) { 453b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Missing low surrogate."); 454b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 455b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 456b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!coerce_to_utf8_ && !IsValidCodePoint(code)) { 457b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Invalid unicode code point."); 458b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 459b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer char buf[UTFmax]; 460b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer int len = EncodeAsUTF8Char(code, buf); 461b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Advance past the [final] code unit escape. 462b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_.remove_prefix(kUnicodeEscapedLength); 463b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_storage_.append(buf, len); 464b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 465b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 466b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 467b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseNumber() { 468b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer NumberResult number; 469b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer util::Status result = ParseNumberHelper(&number); 470b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (result.ok()) { 471b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer switch (number.type) { 472b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case NumberResult::DOUBLE: 473b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ow_->RenderDouble(key_, number.double_val); 474b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_.clear(); 475b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 476b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 477b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case NumberResult::INT: 478b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ow_->RenderInt64(key_, number.int_val); 479b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_.clear(); 480b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 481b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 482b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer case NumberResult::UINT: 483b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ow_->RenderUint64(key_, number.uint_val); 484b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_.clear(); 485b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 486b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 487b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer default: 488b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Unable to parse number."); 489b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 490b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 491b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return result; 492b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 493b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 494b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) { 495b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer const char* data = p_.data(); 496b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer int length = p_.length(); 497b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 498b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Look for the first non-numeric character, or the end of the string. 499b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer int index = 0; 500b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer bool floating = false; 501b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer bool negative = data[index] == '-'; 502b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Find the first character that cannot be part of the number. Along the way 503b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // detect if the number needs to be parsed as a double. 504b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Note that this restricts numbers to the JSON specification, so for example 505b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // we do not support hex or octal notations. 506b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer for (; index < length; ++index) { 507b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer char c = data[index]; 508b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (isdigit(c)) continue; 509b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (c == '.' || c == 'e' || c == 'E') { 510b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer floating = true; 511b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer continue; 512b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 513b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (c == '+' || c == '-' || c == 'x') continue; 514b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Not a valid number character, break out. 515b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer break; 516b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 517b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 518b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If the entire input is a valid number, and we may have more content in the 519b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // future, we abort for now and resume when we know more. 520b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (index == length && !finishing_) { 521b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::CANCELLED; 522b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 523b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 524b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Create a string containing just the number, so we can use safe_strtoX 525b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer string number = p_.substr(0, index).ToString(); 526b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 527b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Floating point number, parse as a double. 528b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (floating) { 529b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!safe_strtod(number, &result->double_val)) { 530b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Unable to parse number."); 531b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 532b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result->type = NumberResult::DOUBLE; 533b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_.remove_prefix(index); 534b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 535b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 536b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 537b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Positive non-floating point number, parse as a uint64. 538b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!negative) { 539b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Octal/Hex numbers are not valid JSON values. 540b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (number.length() >= 2 && number[0] == '0') { 541b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Octal/hex numbers are not valid JSON values."); 542b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 543b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!safe_strtou64(number, &result->uint_val)) { 544b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Unable to parse number."); 545b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 546b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result->type = NumberResult::UINT; 547b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_.remove_prefix(index); 548b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 549b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 550b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 551b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Octal/Hex numbers are not valid JSON values. 552b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (number.length() >= 3 && number[1] == '0') { 553b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Octal/hex numbers are not valid JSON values."); 554b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 555b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Negative non-floating point number, parse as an int64. 556b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!safe_strto64(number, &result->int_val)) { 557b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Unable to parse number."); 558b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 559b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result->type = NumberResult::INT; 560b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_.remove_prefix(index); 561b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 562b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 563b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 564b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::HandleBeginObject() { 565b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer GOOGLE_DCHECK_EQ('{', *p_.data()); 566b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer Advance(); 567b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ow_->StartObject(key_); 568b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_.clear(); 569b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer stack_.push(ENTRY); 570b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 571b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 572b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 573b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseObjectMid(TokenType type) { 574b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (type == UNKNOWN) { 575b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportUnknown("Expected , or } after key:value pair."); 576b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 577b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 578b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Object is complete, advance past the comma and render the EndObject. 579b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (type == END_OBJECT) { 580b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer Advance(); 581b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ow_->EndObject(); 582b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 583b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 584b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Found a comma, advance past it and get ready for an entry. 585b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (type == VALUE_SEPARATOR) { 586b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer Advance(); 587b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer stack_.push(ENTRY); 588b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 589b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 590b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Illegal token after key:value pair. 591b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Expected , or } after key:value pair."); 592b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 593b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 594b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseEntry(TokenType type) { 595b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (type == UNKNOWN) { 596b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportUnknown("Expected an object key or }."); 597b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 598b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 599b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Close the object and return. This allows for trailing commas. 600b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (type == END_OBJECT) { 601b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ow_->EndObject(); 602b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer Advance(); 603b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 604b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 605b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 606b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer util::Status result; 607b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (type == BEGIN_STRING) { 608b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Key is a string (standard JSON), parse it and store the string. 609b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result = ParseStringHelper(); 610b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (result.ok()) { 611b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_storage_.clear(); 612b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!parsed_storage_.empty()) { 613b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_storage_.swap(key_storage_); 614b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_ = StringPiece(key_storage_); 615b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } else { 616b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_ = parsed_; 617b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 618b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer parsed_.clear(); 619b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 620b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } else if (type == BEGIN_KEY) { 621b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Key is a bare key (back compat), create a StringPiece pointing to it. 622b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result = ParseKey(); 623b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } else { 624b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Unknown key type, report an error. 625b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer result = ReportFailure("Expected an object key or }."); 626b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 627b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // On success we next expect an entry mid ':' then an object mid ',' or '}' 628b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (result.ok()) { 629b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer stack_.push(OBJ_MID); 630b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer stack_.push(ENTRY_MID); 631b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 632b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return result; 633b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 634b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 635b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseEntryMid(TokenType type) { 636b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (type == UNKNOWN) { 637b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportUnknown("Expected : between key:value pair."); 638b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 639b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (type == ENTRY_SEPARATOR) { 640b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer Advance(); 641b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer stack_.push(VALUE); 642b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 643b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 644b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Expected : between key:value pair."); 645b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 646b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 647b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::HandleBeginArray() { 648b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer GOOGLE_DCHECK_EQ('[', *p_.data()); 649b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer Advance(); 650b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ow_->StartList(key_); 651b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_.clear(); 652b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer stack_.push(ARRAY_VALUE); 653b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 654b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 655b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 656b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseArrayValue(TokenType type) { 657b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (type == UNKNOWN) { 658b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportUnknown("Expected a value or ] within an array."); 659b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 660b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 661b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (type == END_ARRAY) { 662b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ow_->EndList(); 663b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer Advance(); 664b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 665b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 666b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 667b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // The ParseValue call may push something onto the stack so we need to make 668b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // sure an ARRAY_MID is after it, so we push it on now. 669b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer stack_.push(ARRAY_MID); 670b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer util::Status result = ParseValue(type); 671b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (result == util::Status::CANCELLED) { 672b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we were cancelled, pop back off the ARRAY_MID so we don't try to 673b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // push it on again when we try over. 674b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer stack_.pop(); 675b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 676b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return result; 677b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 678b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 679b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseArrayMid(TokenType type) { 680b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (type == UNKNOWN) { 681b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportUnknown("Expected , or ] after array value."); 682b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 683b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 684b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (type == END_ARRAY) { 685b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ow_->EndList(); 686b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer Advance(); 687b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 688b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 689b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 690b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Found a comma, advance past it and expect an array value next. 691b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (type == VALUE_SEPARATOR) { 692b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer Advance(); 693b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer stack_.push(ARRAY_VALUE); 694b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 695b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 696b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Illegal token after array value. 697b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Expected , or ] after array value."); 698b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 699b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 700b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseTrue() { 701b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ow_->RenderBool(key_, true); 702b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_.clear(); 703b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_.remove_prefix(true_len); 704b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 705b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 706b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 707b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseFalse() { 708b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ow_->RenderBool(key_, false); 709b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_.clear(); 710b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_.remove_prefix(false_len); 711b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 712b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 713b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 714b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseNull() { 715b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer ow_->RenderNull(key_); 716b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_.clear(); 717b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_.remove_prefix(null_len); 718b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 719b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 720b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 721b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ReportFailure(StringPiece message) { 722b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer static const int kContextLength = 20; 723b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer const char* p_start = p_.data(); 724b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer const char* json_start = json_.data(); 725b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer const char* begin = std::max(p_start - kContextLength, json_start); 726b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer const char* end = 727b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer std::min(p_start + kContextLength, json_start + json_.size()); 728b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer StringPiece segment(begin, end - begin); 729b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer string location(p_start - begin, ' '); 730b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer location.push_back('^'); 731b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status(util::error::INVALID_ARGUMENT, 732b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer StrCat(message, "\n", segment, "\n", location)); 733b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 734b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 735b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ReportUnknown(StringPiece message) { 736b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we aren't finishing the parse, cancel parsing and try later. 737b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!finishing_) { 738b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::CANCELLED; 739b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 740b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (p_.empty()) { 741b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure(StrCat("Unexpected end of string. ", message)); 742b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 743b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure(message); 744b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 745b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 746b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammervoid JsonStreamParser::SkipWhitespace() { 747b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer while (!p_.empty() && ascii_isspace(*p_.data())) { 748b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer Advance(); 749b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 750b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 751b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 752b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammervoid JsonStreamParser::Advance() { 753b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Advance by moving one UTF8 character while making sure we don't go beyond 754b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // the length of StringPiece. 755b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_.remove_prefix(std::min<int>( 756b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length()))); 757b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 758b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 759b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammerutil::Status JsonStreamParser::ParseKey() { 760b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer StringPiece original = p_; 761b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!ConsumeKey(&p_, &key_)) { 762b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return ReportFailure("Invalid key or variable name."); 763b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 764b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we consumed everything but expect more data, reset p_ and cancel since 765b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // we can't know if the key was complete or not. 766b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (!finishing_ && p_.empty()) { 767b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer p_ = original; 768b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::CANCELLED; 769b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 770b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // Since we aren't using the key storage, clear it out. 771b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer key_storage_.clear(); 772b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return util::Status::OK; 773b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 774b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 775b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas BerghammerJsonStreamParser::TokenType JsonStreamParser::GetNextTokenType() { 776b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer SkipWhitespace(); 777b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 778b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer int size = p_.size(); 779b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (size == 0) { 780b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // If we ran out of data, report unknown and we'll place the previous parse 781b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // type onto the stack and try again when we have more data. 782b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return UNKNOWN; 783b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 784b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // TODO(sven): Split this method based on context since different contexts 785b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // support different tokens. Would slightly speed up processing? 786b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer const char* data = p_.data(); 787b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (*data == '\"' || *data == '\'') return BEGIN_STRING; 788b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (*data == '-' || ('0' <= *data && *data <= '9')) { 789b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return BEGIN_NUMBER; 790b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 791b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (size >= true_len && !strncmp(data, "true", true_len)) { 792b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return BEGIN_TRUE; 793b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 794b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (size >= false_len && !strncmp(data, "false", false_len)) { 795b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return BEGIN_FALSE; 796b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 797b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (size >= null_len && !strncmp(data, "null", null_len)) { 798b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return BEGIN_NULL; 799b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 800b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (*data == '{') return BEGIN_OBJECT; 801b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (*data == '}') return END_OBJECT; 802b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (*data == '[') return BEGIN_ARRAY; 803b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (*data == ']') return END_ARRAY; 804b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (*data == ':') return ENTRY_SEPARATOR; 805b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (*data == ',') return VALUE_SEPARATOR; 806b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer if (MatchKey(p_)) { 807b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return BEGIN_KEY; 808b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer } 809b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 810b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // We don't know that we necessarily have an invalid token here, just that we 811b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // can't parse what we have so far. So we don't report an error and just 812b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // return UNKNOWN so we can try again later when we have more data, or if we 813b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer // finish and we have leftovers. 814b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer return UNKNOWN; 815b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} 816b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer 817b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} // namespace converter 818b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} // namespace util 819b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} // namespace protobuf 820b0575e93e4c39dec69365b850088a1eb7f82c5b3Tamas Berghammer} // namespace google 821