1// Protocol Buffers - Google's data interchange format 2// Copyright 2008 Google Inc. All rights reserved. 3// https://developers.google.com/protocol-buffers/ 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are 7// met: 8// 9// * Redistributions of source code must retain the above copyright 10// notice, this list of conditions and the following disclaimer. 11// * Redistributions in binary form must reproduce the above 12// copyright notice, this list of conditions and the following disclaimer 13// in the documentation and/or other materials provided with the 14// distribution. 15// * Neither the name of Google Inc. nor the names of its 16// contributors may be used to endorse or promote products derived from 17// this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31#include <google/protobuf/util/internal/json_stream_parser.h> 32 33#include <algorithm> 34#include <cctype> 35#include <cerrno> 36#include <cstdlib> 37#include <cstring> 38#include <memory> 39#ifndef _SHARED_PTR_H 40#include <google/protobuf/stubs/shared_ptr.h> 41#endif 42 43#include <google/protobuf/stubs/logging.h> 44#include <google/protobuf/stubs/common.h> 45#include <google/protobuf/util/internal/object_writer.h> 46#include <google/protobuf/util/internal/json_escaping.h> 47#include <google/protobuf/stubs/strutil.h> 48 49namespace google { 50namespace protobuf { 51namespace util { 52 53// Allow these symbols to be referenced as util::Status, util::error::* in 54// this file. 55using util::Status; 56namespace error { 57using util::error::INTERNAL; 58using util::error::INVALID_ARGUMENT; 59} // namespace error 60 61namespace converter { 62 63// Number of digits in an escaped UTF-16 code unit ('\\' 'u' X X X X) 64static const int kUnicodeEscapedLength = 6; 65 66// Length of the true, false, and null literals. 67static const int true_len = strlen("true"); 68static const int false_len = strlen("false"); 69static const int null_len = strlen("null"); 70 71inline bool IsLetter(char c) { 72 return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_') || 73 (c == '$'); 74} 75 76inline bool IsAlphanumeric(char c) { 77 return IsLetter(c) || ('0' <= c && c <= '9'); 78} 79 80static bool ConsumeKey(StringPiece* input, StringPiece* key) { 81 if (input->empty() || !IsLetter((*input)[0])) return false; 82 int len = 1; 83 for (; len < input->size(); ++len) { 84 if (!IsAlphanumeric((*input)[len])) { 85 break; 86 } 87 } 88 *key = StringPiece(input->data(), len); 89 *input = StringPiece(input->data() + len, input->size() - len); 90 return true; 91} 92 93static bool MatchKey(StringPiece input) { 94 return !input.empty() && IsLetter(input[0]); 95} 96 97JsonStreamParser::JsonStreamParser(ObjectWriter* ow) 98 : ow_(ow), 99 stack_(), 100 leftover_(), 101 json_(), 102 p_(), 103 key_(), 104 key_storage_(), 105 finishing_(false), 106 parsed_(), 107 parsed_storage_(), 108 string_open_(0), 109 chunk_storage_(), 110 coerce_to_utf8_(false) { 111 // Initialize the stack with a single value to be parsed. 112 stack_.push(VALUE); 113} 114 115JsonStreamParser::~JsonStreamParser() {} 116 117 118util::Status JsonStreamParser::Parse(StringPiece json) { 119 StringPiece chunk = json; 120 // If we have leftovers from a previous chunk, append the new chunk to it 121 // and create a new StringPiece pointing at the string's data. This could 122 // be large but we rely on the chunks to be small, assuming they are 123 // fragments of a Cord. 124 if (!leftover_.empty()) { 125 // Don't point chunk to leftover_ because leftover_ will be updated in 126 // ParseChunk(chunk). 127 chunk_storage_.swap(leftover_); 128 json.AppendToString(&chunk_storage_); 129 chunk = StringPiece(chunk_storage_); 130 } 131 132 // Find the structurally valid UTF8 prefix and parse only that. 133 int n = internal::UTF8SpnStructurallyValid(chunk); 134 if (n > 0) { 135 util::Status status = ParseChunk(chunk.substr(0, n)); 136 137 // Any leftover characters are stashed in leftover_ for later parsing when 138 // there is more data available. 139 chunk.substr(n).AppendToString(&leftover_); 140 return status; 141 } else { 142 chunk.CopyToString(&leftover_); 143 return util::Status::OK; 144 } 145} 146 147util::Status JsonStreamParser::FinishParse() { 148 // If we do not expect anything and there is nothing left to parse we're all 149 // done. 150 if (stack_.empty() && leftover_.empty()) { 151 return util::Status::OK; 152 } 153 154 // Storage for UTF8-coerced string. 155 google::protobuf::scoped_array<char> utf8; 156 if (coerce_to_utf8_) { 157 utf8.reset(new char[leftover_.size()]); 158 char* coerced = internal::UTF8CoerceToStructurallyValid(leftover_, utf8.get(), ' '); 159 p_ = json_ = StringPiece(coerced, leftover_.size()); 160 } else { 161 p_ = json_ = leftover_; 162 if (!internal::IsStructurallyValidUTF8(leftover_)) { 163 return ReportFailure("Encountered non UTF-8 code points."); 164 } 165 } 166 167 // Parse the remainder in finishing mode, which reports errors for things like 168 // unterminated strings or unknown tokens that would normally be retried. 169 finishing_ = true; 170 util::Status result = RunParser(); 171 if (result.ok()) { 172 SkipWhitespace(); 173 if (!p_.empty()) { 174 result = ReportFailure("Parsing terminated before end of input."); 175 } 176 } 177 return result; 178} 179 180util::Status JsonStreamParser::ParseChunk(StringPiece chunk) { 181 // Do not do any work if the chunk is empty. 182 if (chunk.empty()) return util::Status::OK; 183 184 p_ = json_ = chunk; 185 186 finishing_ = false; 187 util::Status result = RunParser(); 188 if (!result.ok()) return result; 189 190 SkipWhitespace(); 191 if (p_.empty()) { 192 // If we parsed everything we had, clear the leftover. 193 leftover_.clear(); 194 } else { 195 // If we do not expect anything i.e. stack is empty, and we have non-empty 196 // string left to parse, we report an error. 197 if (stack_.empty()) { 198 return ReportFailure("Parsing terminated before end of input."); 199 } 200 // If we expect future data i.e. stack is non-empty, and we have some 201 // unparsed data left, we save it for later parse. 202 leftover_ = p_.ToString(); 203 } 204 return util::Status::OK; 205} 206 207util::Status JsonStreamParser::RunParser() { 208 while (!stack_.empty()) { 209 ParseType type = stack_.top(); 210 TokenType t = (string_open_ == 0) ? GetNextTokenType() : BEGIN_STRING; 211 stack_.pop(); 212 util::Status result; 213 switch (type) { 214 case VALUE: 215 result = ParseValue(t); 216 break; 217 218 case OBJ_MID: 219 result = ParseObjectMid(t); 220 break; 221 222 case ENTRY: 223 result = ParseEntry(t); 224 break; 225 226 case ENTRY_MID: 227 result = ParseEntryMid(t); 228 break; 229 230 case ARRAY_VALUE: 231 result = ParseArrayValue(t); 232 break; 233 234 case ARRAY_MID: 235 result = ParseArrayMid(t); 236 break; 237 238 default: 239 result = util::Status(util::error::INTERNAL, 240 StrCat("Unknown parse type: ", type)); 241 break; 242 } 243 if (!result.ok()) { 244 // If we were cancelled, save our state and try again later. 245 if (!finishing_ && result == util::Status::CANCELLED) { 246 stack_.push(type); 247 // If we have a key we still need to render, make sure to save off the 248 // contents in our own storage. 249 if (!key_.empty() && key_storage_.empty()) { 250 key_.AppendToString(&key_storage_); 251 key_ = StringPiece(key_storage_); 252 } 253 result = util::Status::OK; 254 } 255 return result; 256 } 257 } 258 return util::Status::OK; 259} 260 261util::Status JsonStreamParser::ParseValue(TokenType type) { 262 switch (type) { 263 case BEGIN_OBJECT: 264 return HandleBeginObject(); 265 case BEGIN_ARRAY: 266 return HandleBeginArray(); 267 case BEGIN_STRING: 268 return ParseString(); 269 case BEGIN_NUMBER: 270 return ParseNumber(); 271 case BEGIN_TRUE: 272 return ParseTrue(); 273 case BEGIN_FALSE: 274 return ParseFalse(); 275 case BEGIN_NULL: 276 return ParseNull(); 277 case UNKNOWN: 278 return ReportUnknown("Expected a value."); 279 default: { 280 // Special case for having been cut off while parsing, wait for more data. 281 // This handles things like 'fals' being at the end of the string, we 282 // don't know if the next char would be e, completing it, or something 283 // else, making it invalid. 284 if (!finishing_ && p_.length() < false_len) { 285 return util::Status::CANCELLED; 286 } 287 return ReportFailure("Unexpected token."); 288 } 289 } 290} 291 292util::Status JsonStreamParser::ParseString() { 293 util::Status result = ParseStringHelper(); 294 if (result.ok()) { 295 ow_->RenderString(key_, parsed_); 296 key_.clear(); 297 parsed_.clear(); 298 parsed_storage_.clear(); 299 } 300 return result; 301} 302 303util::Status JsonStreamParser::ParseStringHelper() { 304 // If we haven't seen the start quote, grab it and remember it for later. 305 if (string_open_ == 0) { 306 string_open_ = *p_.data(); 307 GOOGLE_DCHECK(string_open_ == '\"' || string_open_ == '\''); 308 Advance(); 309 } 310 // Track where we last copied data from so we can minimize copying. 311 const char* last = p_.data(); 312 while (!p_.empty()) { 313 const char* data = p_.data(); 314 if (*data == '\\') { 315 // We're about to handle an escape, copy all bytes from last to data. 316 if (last < data) { 317 parsed_storage_.append(last, data - last); 318 last = data; 319 } 320 // If we ran out of string after the \, cancel or report an error 321 // depending on if we expect more data later. 322 if (p_.length() == 1) { 323 if (!finishing_) { 324 return util::Status::CANCELLED; 325 } 326 return ReportFailure("Closing quote expected in string."); 327 } 328 // Parse a unicode escape if we found \u in the string. 329 if (data[1] == 'u') { 330 util::Status result = ParseUnicodeEscape(); 331 if (!result.ok()) { 332 return result; 333 } 334 // Move last pointer past the unicode escape and continue. 335 last = p_.data(); 336 continue; 337 } 338 // Handle the standard set of backslash-escaped characters. 339 switch (data[1]) { 340 case 'b': 341 parsed_storage_.push_back('\b'); 342 break; 343 case 'f': 344 parsed_storage_.push_back('\f'); 345 break; 346 case 'n': 347 parsed_storage_.push_back('\n'); 348 break; 349 case 'r': 350 parsed_storage_.push_back('\r'); 351 break; 352 case 't': 353 parsed_storage_.push_back('\t'); 354 break; 355 case 'v': 356 parsed_storage_.push_back('\v'); 357 break; 358 default: 359 parsed_storage_.push_back(data[1]); 360 } 361 // We handled two characters, so advance past them and continue. 362 p_.remove_prefix(2); 363 last = p_.data(); 364 continue; 365 } 366 // If we found the closing quote note it, advance past it, and return. 367 if (*data == string_open_) { 368 // If we didn't copy anything, reuse the input buffer. 369 if (parsed_storage_.empty()) { 370 parsed_ = StringPiece(last, data - last); 371 } else { 372 if (last < data) { 373 parsed_storage_.append(last, data - last); 374 last = data; 375 } 376 parsed_ = StringPiece(parsed_storage_); 377 } 378 // Clear the quote char so next time we try to parse a string we'll 379 // start fresh. 380 string_open_ = 0; 381 Advance(); 382 return util::Status::OK; 383 } 384 // Normal character, just advance past it. 385 Advance(); 386 } 387 // If we ran out of characters, copy over what we have so far. 388 if (last < p_.data()) { 389 parsed_storage_.append(last, p_.data() - last); 390 } 391 // If we didn't find the closing quote but we expect more data, cancel for now 392 if (!finishing_) { 393 return util::Status::CANCELLED; 394 } 395 // End of string reached without a closing quote, report an error. 396 string_open_ = 0; 397 return ReportFailure("Closing quote expected in string."); 398} 399 400// Converts a unicode escaped character to a decimal value stored in a char32 401// for use in UTF8 encoding utility. We assume that str begins with \uhhhh and 402// convert that from the hex number to a decimal value. 403// 404// There are some security exploits with UTF-8 that we should be careful of: 405// - http://www.unicode.org/reports/tr36/#UTF-8_Exploit 406// - http://sites/intl-eng/design-guide/core-application 407util::Status JsonStreamParser::ParseUnicodeEscape() { 408 if (p_.length() < kUnicodeEscapedLength) { 409 if (!finishing_) { 410 return util::Status::CANCELLED; 411 } 412 return ReportFailure("Illegal hex string."); 413 } 414 GOOGLE_DCHECK_EQ('\\', p_.data()[0]); 415 GOOGLE_DCHECK_EQ('u', p_.data()[1]); 416 uint32 code = 0; 417 for (int i = 2; i < kUnicodeEscapedLength; ++i) { 418 if (!isxdigit(p_.data()[i])) { 419 return ReportFailure("Invalid escape sequence."); 420 } 421 code = (code << 4) + hex_digit_to_int(p_.data()[i]); 422 } 423 if (code >= JsonEscaping::kMinHighSurrogate && 424 code <= JsonEscaping::kMaxHighSurrogate) { 425 if (p_.length() < 2 * kUnicodeEscapedLength) { 426 if (!finishing_) { 427 return util::Status::CANCELLED; 428 } 429 if (!coerce_to_utf8_) { 430 return ReportFailure("Missing low surrogate."); 431 } 432 } else if (p_.data()[kUnicodeEscapedLength] == '\\' && 433 p_.data()[kUnicodeEscapedLength + 1] == 'u') { 434 uint32 low_code = 0; 435 for (int i = kUnicodeEscapedLength + 2; i < 2 * kUnicodeEscapedLength; 436 ++i) { 437 if (!isxdigit(p_.data()[i])) { 438 return ReportFailure("Invalid escape sequence."); 439 } 440 low_code = (low_code << 4) + hex_digit_to_int(p_.data()[i]); 441 } 442 if (low_code >= JsonEscaping::kMinLowSurrogate && 443 low_code <= JsonEscaping::kMaxLowSurrogate) { 444 // Convert UTF-16 surrogate pair to 21-bit Unicode codepoint. 445 code = (((code & 0x3FF) << 10) | (low_code & 0x3FF)) + 446 JsonEscaping::kMinSupplementaryCodePoint; 447 // Advance past the first code unit escape. 448 p_.remove_prefix(kUnicodeEscapedLength); 449 } else if (!coerce_to_utf8_) { 450 return ReportFailure("Invalid low surrogate."); 451 } 452 } else if (!coerce_to_utf8_) { 453 return ReportFailure("Missing low surrogate."); 454 } 455 } 456 if (!coerce_to_utf8_ && !IsValidCodePoint(code)) { 457 return ReportFailure("Invalid unicode code point."); 458 } 459 char buf[UTFmax]; 460 int len = EncodeAsUTF8Char(code, buf); 461 // Advance past the [final] code unit escape. 462 p_.remove_prefix(kUnicodeEscapedLength); 463 parsed_storage_.append(buf, len); 464 return util::Status::OK; 465} 466 467util::Status JsonStreamParser::ParseNumber() { 468 NumberResult number; 469 util::Status result = ParseNumberHelper(&number); 470 if (result.ok()) { 471 switch (number.type) { 472 case NumberResult::DOUBLE: 473 ow_->RenderDouble(key_, number.double_val); 474 key_.clear(); 475 break; 476 477 case NumberResult::INT: 478 ow_->RenderInt64(key_, number.int_val); 479 key_.clear(); 480 break; 481 482 case NumberResult::UINT: 483 ow_->RenderUint64(key_, number.uint_val); 484 key_.clear(); 485 break; 486 487 default: 488 return ReportFailure("Unable to parse number."); 489 } 490 } 491 return result; 492} 493 494util::Status JsonStreamParser::ParseNumberHelper(NumberResult* result) { 495 const char* data = p_.data(); 496 int length = p_.length(); 497 498 // Look for the first non-numeric character, or the end of the string. 499 int index = 0; 500 bool floating = false; 501 bool negative = data[index] == '-'; 502 // Find the first character that cannot be part of the number. Along the way 503 // detect if the number needs to be parsed as a double. 504 // Note that this restricts numbers to the JSON specification, so for example 505 // we do not support hex or octal notations. 506 for (; index < length; ++index) { 507 char c = data[index]; 508 if (isdigit(c)) continue; 509 if (c == '.' || c == 'e' || c == 'E') { 510 floating = true; 511 continue; 512 } 513 if (c == '+' || c == '-' || c == 'x') continue; 514 // Not a valid number character, break out. 515 break; 516 } 517 518 // If the entire input is a valid number, and we may have more content in the 519 // future, we abort for now and resume when we know more. 520 if (index == length && !finishing_) { 521 return util::Status::CANCELLED; 522 } 523 524 // Create a string containing just the number, so we can use safe_strtoX 525 string number = p_.substr(0, index).ToString(); 526 527 // Floating point number, parse as a double. 528 if (floating) { 529 if (!safe_strtod(number, &result->double_val)) { 530 return ReportFailure("Unable to parse number."); 531 } 532 result->type = NumberResult::DOUBLE; 533 p_.remove_prefix(index); 534 return util::Status::OK; 535 } 536 537 // Positive non-floating point number, parse as a uint64. 538 if (!negative) { 539 // Octal/Hex numbers are not valid JSON values. 540 if (number.length() >= 2 && number[0] == '0') { 541 return ReportFailure("Octal/hex numbers are not valid JSON values."); 542 } 543 if (!safe_strtou64(number, &result->uint_val)) { 544 return ReportFailure("Unable to parse number."); 545 } 546 result->type = NumberResult::UINT; 547 p_.remove_prefix(index); 548 return util::Status::OK; 549 } 550 551 // Octal/Hex numbers are not valid JSON values. 552 if (number.length() >= 3 && number[1] == '0') { 553 return ReportFailure("Octal/hex numbers are not valid JSON values."); 554 } 555 // Negative non-floating point number, parse as an int64. 556 if (!safe_strto64(number, &result->int_val)) { 557 return ReportFailure("Unable to parse number."); 558 } 559 result->type = NumberResult::INT; 560 p_.remove_prefix(index); 561 return util::Status::OK; 562} 563 564util::Status JsonStreamParser::HandleBeginObject() { 565 GOOGLE_DCHECK_EQ('{', *p_.data()); 566 Advance(); 567 ow_->StartObject(key_); 568 key_.clear(); 569 stack_.push(ENTRY); 570 return util::Status::OK; 571} 572 573util::Status JsonStreamParser::ParseObjectMid(TokenType type) { 574 if (type == UNKNOWN) { 575 return ReportUnknown("Expected , or } after key:value pair."); 576 } 577 578 // Object is complete, advance past the comma and render the EndObject. 579 if (type == END_OBJECT) { 580 Advance(); 581 ow_->EndObject(); 582 return util::Status::OK; 583 } 584 // Found a comma, advance past it and get ready for an entry. 585 if (type == VALUE_SEPARATOR) { 586 Advance(); 587 stack_.push(ENTRY); 588 return util::Status::OK; 589 } 590 // Illegal token after key:value pair. 591 return ReportFailure("Expected , or } after key:value pair."); 592} 593 594util::Status JsonStreamParser::ParseEntry(TokenType type) { 595 if (type == UNKNOWN) { 596 return ReportUnknown("Expected an object key or }."); 597 } 598 599 // Close the object and return. This allows for trailing commas. 600 if (type == END_OBJECT) { 601 ow_->EndObject(); 602 Advance(); 603 return util::Status::OK; 604 } 605 606 util::Status result; 607 if (type == BEGIN_STRING) { 608 // Key is a string (standard JSON), parse it and store the string. 609 result = ParseStringHelper(); 610 if (result.ok()) { 611 key_storage_.clear(); 612 if (!parsed_storage_.empty()) { 613 parsed_storage_.swap(key_storage_); 614 key_ = StringPiece(key_storage_); 615 } else { 616 key_ = parsed_; 617 } 618 parsed_.clear(); 619 } 620 } else if (type == BEGIN_KEY) { 621 // Key is a bare key (back compat), create a StringPiece pointing to it. 622 result = ParseKey(); 623 } else { 624 // Unknown key type, report an error. 625 result = ReportFailure("Expected an object key or }."); 626 } 627 // On success we next expect an entry mid ':' then an object mid ',' or '}' 628 if (result.ok()) { 629 stack_.push(OBJ_MID); 630 stack_.push(ENTRY_MID); 631 } 632 return result; 633} 634 635util::Status JsonStreamParser::ParseEntryMid(TokenType type) { 636 if (type == UNKNOWN) { 637 return ReportUnknown("Expected : between key:value pair."); 638 } 639 if (type == ENTRY_SEPARATOR) { 640 Advance(); 641 stack_.push(VALUE); 642 return util::Status::OK; 643 } 644 return ReportFailure("Expected : between key:value pair."); 645} 646 647util::Status JsonStreamParser::HandleBeginArray() { 648 GOOGLE_DCHECK_EQ('[', *p_.data()); 649 Advance(); 650 ow_->StartList(key_); 651 key_.clear(); 652 stack_.push(ARRAY_VALUE); 653 return util::Status::OK; 654} 655 656util::Status JsonStreamParser::ParseArrayValue(TokenType type) { 657 if (type == UNKNOWN) { 658 return ReportUnknown("Expected a value or ] within an array."); 659 } 660 661 if (type == END_ARRAY) { 662 ow_->EndList(); 663 Advance(); 664 return util::Status::OK; 665 } 666 667 // The ParseValue call may push something onto the stack so we need to make 668 // sure an ARRAY_MID is after it, so we push it on now. 669 stack_.push(ARRAY_MID); 670 util::Status result = ParseValue(type); 671 if (result == util::Status::CANCELLED) { 672 // If we were cancelled, pop back off the ARRAY_MID so we don't try to 673 // push it on again when we try over. 674 stack_.pop(); 675 } 676 return result; 677} 678 679util::Status JsonStreamParser::ParseArrayMid(TokenType type) { 680 if (type == UNKNOWN) { 681 return ReportUnknown("Expected , or ] after array value."); 682 } 683 684 if (type == END_ARRAY) { 685 ow_->EndList(); 686 Advance(); 687 return util::Status::OK; 688 } 689 690 // Found a comma, advance past it and expect an array value next. 691 if (type == VALUE_SEPARATOR) { 692 Advance(); 693 stack_.push(ARRAY_VALUE); 694 return util::Status::OK; 695 } 696 // Illegal token after array value. 697 return ReportFailure("Expected , or ] after array value."); 698} 699 700util::Status JsonStreamParser::ParseTrue() { 701 ow_->RenderBool(key_, true); 702 key_.clear(); 703 p_.remove_prefix(true_len); 704 return util::Status::OK; 705} 706 707util::Status JsonStreamParser::ParseFalse() { 708 ow_->RenderBool(key_, false); 709 key_.clear(); 710 p_.remove_prefix(false_len); 711 return util::Status::OK; 712} 713 714util::Status JsonStreamParser::ParseNull() { 715 ow_->RenderNull(key_); 716 key_.clear(); 717 p_.remove_prefix(null_len); 718 return util::Status::OK; 719} 720 721util::Status JsonStreamParser::ReportFailure(StringPiece message) { 722 static const int kContextLength = 20; 723 const char* p_start = p_.data(); 724 const char* json_start = json_.data(); 725 const char* begin = std::max(p_start - kContextLength, json_start); 726 const char* end = 727 std::min(p_start + kContextLength, json_start + json_.size()); 728 StringPiece segment(begin, end - begin); 729 string location(p_start - begin, ' '); 730 location.push_back('^'); 731 return util::Status(util::error::INVALID_ARGUMENT, 732 StrCat(message, "\n", segment, "\n", location)); 733} 734 735util::Status JsonStreamParser::ReportUnknown(StringPiece message) { 736 // If we aren't finishing the parse, cancel parsing and try later. 737 if (!finishing_) { 738 return util::Status::CANCELLED; 739 } 740 if (p_.empty()) { 741 return ReportFailure(StrCat("Unexpected end of string. ", message)); 742 } 743 return ReportFailure(message); 744} 745 746void JsonStreamParser::SkipWhitespace() { 747 while (!p_.empty() && ascii_isspace(*p_.data())) { 748 Advance(); 749 } 750} 751 752void JsonStreamParser::Advance() { 753 // Advance by moving one UTF8 character while making sure we don't go beyond 754 // the length of StringPiece. 755 p_.remove_prefix(std::min<int>( 756 p_.length(), UTF8FirstLetterNumBytes(p_.data(), p_.length()))); 757} 758 759util::Status JsonStreamParser::ParseKey() { 760 StringPiece original = p_; 761 if (!ConsumeKey(&p_, &key_)) { 762 return ReportFailure("Invalid key or variable name."); 763 } 764 // If we consumed everything but expect more data, reset p_ and cancel since 765 // we can't know if the key was complete or not. 766 if (!finishing_ && p_.empty()) { 767 p_ = original; 768 return util::Status::CANCELLED; 769 } 770 // Since we aren't using the key storage, clear it out. 771 key_storage_.clear(); 772 return util::Status::OK; 773} 774 775JsonStreamParser::TokenType JsonStreamParser::GetNextTokenType() { 776 SkipWhitespace(); 777 778 int size = p_.size(); 779 if (size == 0) { 780 // If we ran out of data, report unknown and we'll place the previous parse 781 // type onto the stack and try again when we have more data. 782 return UNKNOWN; 783 } 784 // TODO(sven): Split this method based on context since different contexts 785 // support different tokens. Would slightly speed up processing? 786 const char* data = p_.data(); 787 if (*data == '\"' || *data == '\'') return BEGIN_STRING; 788 if (*data == '-' || ('0' <= *data && *data <= '9')) { 789 return BEGIN_NUMBER; 790 } 791 if (size >= true_len && !strncmp(data, "true", true_len)) { 792 return BEGIN_TRUE; 793 } 794 if (size >= false_len && !strncmp(data, "false", false_len)) { 795 return BEGIN_FALSE; 796 } 797 if (size >= null_len && !strncmp(data, "null", null_len)) { 798 return BEGIN_NULL; 799 } 800 if (*data == '{') return BEGIN_OBJECT; 801 if (*data == '}') return END_OBJECT; 802 if (*data == '[') return BEGIN_ARRAY; 803 if (*data == ']') return END_ARRAY; 804 if (*data == ':') return ENTRY_SEPARATOR; 805 if (*data == ',') return VALUE_SEPARATOR; 806 if (MatchKey(p_)) { 807 return BEGIN_KEY; 808 } 809 810 // We don't know that we necessarily have an invalid token here, just that we 811 // can't parse what we have so far. So we don't report an error and just 812 // return UNKNOWN so we can try again later when we have more data, or if we 813 // finish and we have leftovers. 814 return UNKNOWN; 815} 816 817} // namespace converter 818} // namespace util 819} // namespace protobuf 820} // namespace google 821