1// Copyright 2011 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_JSON_PARSER_H_
6#define V8_JSON_PARSER_H_
7
8#include "src/v8.h"
9
10#include "src/char-predicates-inl.h"
11#include "src/conversions.h"
12#include "src/heap/spaces-inl.h"
13#include "src/messages.h"
14#include "src/token.h"
15
16namespace v8 {
17namespace internal {
18
19// A simple json parser.
20template <bool seq_one_byte>
21class JsonParser BASE_EMBEDDED {
22 public:
23  MUST_USE_RESULT static MaybeHandle<Object> Parse(Handle<String> source) {
24    return JsonParser(source).ParseJson();
25  }
26
27  static const int kEndOfString = -1;
28
29 private:
30  explicit JsonParser(Handle<String> source)
31      : source_(source),
32        source_length_(source->length()),
33        isolate_(source->map()->GetHeap()->isolate()),
34        factory_(isolate_->factory()),
35        zone_(isolate_),
36        object_constructor_(isolate_->native_context()->object_function(),
37                            isolate_),
38        position_(-1) {
39    source_ = String::Flatten(source_);
40    pretenure_ = (source_length_ >= kPretenureTreshold) ? TENURED : NOT_TENURED;
41
42    // Optimized fast case where we only have Latin1 characters.
43    if (seq_one_byte) {
44      seq_source_ = Handle<SeqOneByteString>::cast(source_);
45    }
46  }
47
48  // Parse a string containing a single JSON value.
49  MaybeHandle<Object> ParseJson();
50
51  inline void Advance() {
52    position_++;
53    if (position_ >= source_length_) {
54      c0_ = kEndOfString;
55    } else if (seq_one_byte) {
56      c0_ = seq_source_->SeqOneByteStringGet(position_);
57    } else {
58      c0_ = source_->Get(position_);
59    }
60  }
61
62  // The JSON lexical grammar is specified in the ECMAScript 5 standard,
63  // section 15.12.1.1. The only allowed whitespace characters between tokens
64  // are tab, carriage-return, newline and space.
65
66  inline void AdvanceSkipWhitespace() {
67    do {
68      Advance();
69    } while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r');
70  }
71
72  inline void SkipWhitespace() {
73    while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r') {
74      Advance();
75    }
76  }
77
78  inline uc32 AdvanceGetChar() {
79    Advance();
80    return c0_;
81  }
82
83  // Checks that current charater is c.
84  // If so, then consume c and skip whitespace.
85  inline bool MatchSkipWhiteSpace(uc32 c) {
86    if (c0_ == c) {
87      AdvanceSkipWhitespace();
88      return true;
89    }
90    return false;
91  }
92
93  // A JSON string (production JSONString) is subset of valid JavaScript string
94  // literals. The string must only be double-quoted (not single-quoted), and
95  // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
96  // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
97  Handle<String> ParseJsonString() {
98    return ScanJsonString<false>();
99  }
100
101  bool ParseJsonString(Handle<String> expected) {
102    int length = expected->length();
103    if (source_->length() - position_ - 1 > length) {
104      DisallowHeapAllocation no_gc;
105      String::FlatContent content = expected->GetFlatContent();
106      if (content.IsOneByte()) {
107        DCHECK_EQ('"', c0_);
108        const uint8_t* input_chars = seq_source_->GetChars() + position_ + 1;
109        const uint8_t* expected_chars = content.ToOneByteVector().start();
110        for (int i = 0; i < length; i++) {
111          uint8_t c0 = input_chars[i];
112          if (c0 != expected_chars[i] ||
113              c0 == '"' || c0 < 0x20 || c0 == '\\') {
114            return false;
115          }
116        }
117        if (input_chars[length] == '"') {
118          position_ = position_ + length + 1;
119          AdvanceSkipWhitespace();
120          return true;
121        }
122      }
123    }
124    return false;
125  }
126
127  Handle<String> ParseJsonInternalizedString() {
128    return ScanJsonString<true>();
129  }
130
131  template <bool is_internalized>
132  Handle<String> ScanJsonString();
133  // Creates a new string and copies prefix[start..end] into the beginning
134  // of it. Then scans the rest of the string, adding characters after the
135  // prefix. Called by ScanJsonString when reaching a '\' or non-Latin1 char.
136  template <typename StringType, typename SinkChar>
137  Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end);
138
139  // A JSON number (production JSONNumber) is a subset of the valid JavaScript
140  // decimal number literals.
141  // It includes an optional minus sign, must have at least one
142  // digit before and after a decimal point, may not have prefixed zeros (unless
143  // the integer part is zero), and may include an exponent part (e.g., "e-10").
144  // Hexadecimal and octal numbers are not allowed.
145  Handle<Object> ParseJsonNumber();
146
147  // Parse a single JSON value from input (grammar production JSONValue).
148  // A JSON value is either a (double-quoted) string literal, a number literal,
149  // one of "true", "false", or "null", or an object or array literal.
150  Handle<Object> ParseJsonValue();
151
152  // Parse a JSON object literal (grammar production JSONObject).
153  // An object literal is a squiggly-braced and comma separated sequence
154  // (possibly empty) of key/value pairs, where the key is a JSON string
155  // literal, the value is a JSON value, and the two are separated by a colon.
156  // A JSON array doesn't allow numbers and identifiers as keys, like a
157  // JavaScript array.
158  Handle<Object> ParseJsonObject();
159
160  // Parses a JSON array literal (grammar production JSONArray). An array
161  // literal is a square-bracketed and comma separated sequence (possibly empty)
162  // of JSON values.
163  // A JSON array doesn't allow leaving out values from the sequence, nor does
164  // it allow a terminal comma, like a JavaScript array does.
165  Handle<Object> ParseJsonArray();
166
167
168  // Mark that a parsing error has happened at the current token, and
169  // return a null handle. Primarily for readability.
170  inline Handle<Object> ReportUnexpectedCharacter() {
171    return Handle<Object>::null();
172  }
173
174  inline Isolate* isolate() { return isolate_; }
175  inline Factory* factory() { return factory_; }
176  inline Handle<JSFunction> object_constructor() { return object_constructor_; }
177
178  static const int kInitialSpecialStringLength = 1024;
179  static const int kPretenureTreshold = 100 * 1024;
180
181
182 private:
183  Zone* zone() { return &zone_; }
184
185  void CommitStateToJsonObject(Handle<JSObject> json_object, Handle<Map> map,
186                               ZoneList<Handle<Object> >* properties);
187
188  Handle<String> source_;
189  int source_length_;
190  Handle<SeqOneByteString> seq_source_;
191
192  PretenureFlag pretenure_;
193  Isolate* isolate_;
194  Factory* factory_;
195  Zone zone_;
196  Handle<JSFunction> object_constructor_;
197  uc32 c0_;
198  int position_;
199};
200
201template <bool seq_one_byte>
202MaybeHandle<Object> JsonParser<seq_one_byte>::ParseJson() {
203  // Advance to the first character (possibly EOS)
204  AdvanceSkipWhitespace();
205  Handle<Object> result = ParseJsonValue();
206  if (result.is_null() || c0_ != kEndOfString) {
207    // Some exception (for example stack overflow) is already pending.
208    if (isolate_->has_pending_exception()) return Handle<Object>::null();
209
210    // Parse failed. Current character is the unexpected token.
211    const char* message;
212    Factory* factory = this->factory();
213    Handle<JSArray> array;
214
215    switch (c0_) {
216      case kEndOfString:
217        message = "unexpected_eos";
218        array = factory->NewJSArray(0);
219        break;
220      case '-':
221      case '0':
222      case '1':
223      case '2':
224      case '3':
225      case '4':
226      case '5':
227      case '6':
228      case '7':
229      case '8':
230      case '9':
231        message = "unexpected_token_number";
232        array = factory->NewJSArray(0);
233        break;
234      case '"':
235        message = "unexpected_token_string";
236        array = factory->NewJSArray(0);
237        break;
238      default:
239        message = "unexpected_token";
240        Handle<Object> name = factory->LookupSingleCharacterStringFromCode(c0_);
241        Handle<FixedArray> element = factory->NewFixedArray(1);
242        element->set(0, *name);
243        array = factory->NewJSArrayWithElements(element);
244        break;
245    }
246
247    MessageLocation location(factory->NewScript(source_),
248                             position_,
249                             position_ + 1);
250    Handle<Object> error;
251    ASSIGN_RETURN_ON_EXCEPTION(isolate(), error,
252                               factory->NewSyntaxError(message, array), Object);
253    return isolate()->template Throw<Object>(error, &location);
254  }
255  return result;
256}
257
258
259// Parse any JSON value.
260template <bool seq_one_byte>
261Handle<Object> JsonParser<seq_one_byte>::ParseJsonValue() {
262  StackLimitCheck stack_check(isolate_);
263  if (stack_check.HasOverflowed()) {
264    isolate_->StackOverflow();
265    return Handle<Object>::null();
266  }
267
268  if (c0_ == '"') return ParseJsonString();
269  if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber();
270  if (c0_ == '{') return ParseJsonObject();
271  if (c0_ == '[') return ParseJsonArray();
272  if (c0_ == 'f') {
273    if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
274        AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
275      AdvanceSkipWhitespace();
276      return factory()->false_value();
277    }
278    return ReportUnexpectedCharacter();
279  }
280  if (c0_ == 't') {
281    if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
282        AdvanceGetChar() == 'e') {
283      AdvanceSkipWhitespace();
284      return factory()->true_value();
285    }
286    return ReportUnexpectedCharacter();
287  }
288  if (c0_ == 'n') {
289    if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
290        AdvanceGetChar() == 'l') {
291      AdvanceSkipWhitespace();
292      return factory()->null_value();
293    }
294    return ReportUnexpectedCharacter();
295  }
296  return ReportUnexpectedCharacter();
297}
298
299
300// Parse a JSON object. Position must be right at '{'.
301template <bool seq_one_byte>
302Handle<Object> JsonParser<seq_one_byte>::ParseJsonObject() {
303  HandleScope scope(isolate());
304  Handle<JSObject> json_object =
305      factory()->NewJSObject(object_constructor(), pretenure_);
306  Handle<Map> map(json_object->map());
307  ZoneList<Handle<Object> > properties(8, zone());
308  DCHECK_EQ(c0_, '{');
309
310  bool transitioning = true;
311
312  AdvanceSkipWhitespace();
313  if (c0_ != '}') {
314    do {
315      if (c0_ != '"') return ReportUnexpectedCharacter();
316
317      int start_position = position_;
318      Advance();
319
320      uint32_t index = 0;
321      if (c0_ >= '0' && c0_ <= '9') {
322        // Maybe an array index, try to parse it.
323        if (c0_ == '0') {
324          // With a leading zero, the string has to be "0" only to be an index.
325          Advance();
326        } else {
327          do {
328            int d = c0_ - '0';
329            if (index > 429496729U - ((d > 5) ? 1 : 0)) break;
330            index = (index * 10) + d;
331            Advance();
332          } while (c0_ >= '0' && c0_ <= '9');
333        }
334
335        if (c0_ == '"') {
336          // Successfully parsed index, parse and store element.
337          AdvanceSkipWhitespace();
338
339          if (c0_ != ':') return ReportUnexpectedCharacter();
340          AdvanceSkipWhitespace();
341          Handle<Object> value = ParseJsonValue();
342          if (value.is_null()) return ReportUnexpectedCharacter();
343
344          JSObject::SetOwnElement(json_object, index, value, SLOPPY).Assert();
345          continue;
346        }
347        // Not an index, fallback to the slow path.
348      }
349
350      position_ = start_position;
351#ifdef DEBUG
352      c0_ = '"';
353#endif
354
355      Handle<String> key;
356      Handle<Object> value;
357
358      // Try to follow existing transitions as long as possible. Once we stop
359      // transitioning, no transition can be found anymore.
360      if (transitioning) {
361        // First check whether there is a single expected transition. If so, try
362        // to parse it first.
363        bool follow_expected = false;
364        Handle<Map> target;
365        if (seq_one_byte) {
366          key = Map::ExpectedTransitionKey(map);
367          follow_expected = !key.is_null() && ParseJsonString(key);
368        }
369        // If the expected transition hits, follow it.
370        if (follow_expected) {
371          target = Map::ExpectedTransitionTarget(map);
372        } else {
373          // If the expected transition failed, parse an internalized string and
374          // try to find a matching transition.
375          key = ParseJsonInternalizedString();
376          if (key.is_null()) return ReportUnexpectedCharacter();
377
378          target = Map::FindTransitionToField(map, key);
379          // If a transition was found, follow it and continue.
380          transitioning = !target.is_null();
381        }
382        if (c0_ != ':') return ReportUnexpectedCharacter();
383
384        AdvanceSkipWhitespace();
385        value = ParseJsonValue();
386        if (value.is_null()) return ReportUnexpectedCharacter();
387
388        if (transitioning) {
389          int descriptor = map->NumberOfOwnDescriptors();
390          PropertyDetails details =
391              target->instance_descriptors()->GetDetails(descriptor);
392          Representation expected_representation = details.representation();
393
394          if (value->FitsRepresentation(expected_representation)) {
395            if (expected_representation.IsDouble()) {
396              value = Object::NewStorageFor(isolate(), value,
397                                            expected_representation);
398            } else if (expected_representation.IsHeapObject() &&
399                       !target->instance_descriptors()->GetFieldType(
400                           descriptor)->NowContains(value)) {
401              Handle<HeapType> value_type(value->OptimalType(
402                      isolate(), expected_representation));
403              Map::GeneralizeFieldType(target, descriptor, value_type);
404            }
405            DCHECK(target->instance_descriptors()->GetFieldType(
406                    descriptor)->NowContains(value));
407            properties.Add(value, zone());
408            map = target;
409            continue;
410          } else {
411            transitioning = false;
412          }
413        }
414
415        // Commit the intermediate state to the object and stop transitioning.
416        CommitStateToJsonObject(json_object, map, &properties);
417      } else {
418        key = ParseJsonInternalizedString();
419        if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
420
421        AdvanceSkipWhitespace();
422        value = ParseJsonValue();
423        if (value.is_null()) return ReportUnexpectedCharacter();
424      }
425
426      Runtime::DefineObjectProperty(json_object, key, value, NONE).Check();
427    } while (MatchSkipWhiteSpace(','));
428    if (c0_ != '}') {
429      return ReportUnexpectedCharacter();
430    }
431
432    // If we transitioned until the very end, transition the map now.
433    if (transitioning) {
434      CommitStateToJsonObject(json_object, map, &properties);
435    }
436  }
437  AdvanceSkipWhitespace();
438  return scope.CloseAndEscape(json_object);
439}
440
441
442template <bool seq_one_byte>
443void JsonParser<seq_one_byte>::CommitStateToJsonObject(
444    Handle<JSObject> json_object, Handle<Map> map,
445    ZoneList<Handle<Object> >* properties) {
446  JSObject::AllocateStorageForMap(json_object, map);
447  DCHECK(!json_object->map()->is_dictionary_map());
448
449  DisallowHeapAllocation no_gc;
450  Factory* factory = isolate()->factory();
451  // If the |json_object|'s map is exactly the same as |map| then the
452  // |properties| values correspond to the |map| and nothing more has to be
453  // done. But if the |json_object|'s map is different then we have to
454  // iterate descriptors to ensure that properties still correspond to the
455  // map.
456  bool slow_case = json_object->map() != *map;
457  DescriptorArray* descriptors = NULL;
458
459  int length = properties->length();
460  if (slow_case) {
461    descriptors = json_object->map()->instance_descriptors();
462    DCHECK(json_object->map()->NumberOfOwnDescriptors() == length);
463  }
464  for (int i = 0; i < length; i++) {
465    Handle<Object> value = (*properties)[i];
466    if (slow_case && value->IsMutableHeapNumber() &&
467        !descriptors->GetDetails(i).representation().IsDouble()) {
468      // Turn mutable heap numbers into immutable if the field representation
469      // is not double.
470      HeapNumber::cast(*value)->set_map(*factory->heap_number_map());
471    }
472    FieldIndex index = FieldIndex::ForPropertyIndex(*map, i);
473    json_object->FastPropertyAtPut(index, *value);
474  }
475}
476
477
478// Parse a JSON array. Position must be right at '['.
479template <bool seq_one_byte>
480Handle<Object> JsonParser<seq_one_byte>::ParseJsonArray() {
481  HandleScope scope(isolate());
482  ZoneList<Handle<Object> > elements(4, zone());
483  DCHECK_EQ(c0_, '[');
484
485  AdvanceSkipWhitespace();
486  if (c0_ != ']') {
487    do {
488      Handle<Object> element = ParseJsonValue();
489      if (element.is_null()) return ReportUnexpectedCharacter();
490      elements.Add(element, zone());
491    } while (MatchSkipWhiteSpace(','));
492    if (c0_ != ']') {
493      return ReportUnexpectedCharacter();
494    }
495  }
496  AdvanceSkipWhitespace();
497  // Allocate a fixed array with all the elements.
498  Handle<FixedArray> fast_elements =
499      factory()->NewFixedArray(elements.length(), pretenure_);
500  for (int i = 0, n = elements.length(); i < n; i++) {
501    fast_elements->set(i, *elements[i]);
502  }
503  Handle<Object> json_array = factory()->NewJSArrayWithElements(
504      fast_elements, FAST_ELEMENTS, pretenure_);
505  return scope.CloseAndEscape(json_array);
506}
507
508
509template <bool seq_one_byte>
510Handle<Object> JsonParser<seq_one_byte>::ParseJsonNumber() {
511  bool negative = false;
512  int beg_pos = position_;
513  if (c0_ == '-') {
514    Advance();
515    negative = true;
516  }
517  if (c0_ == '0') {
518    Advance();
519    // Prefix zero is only allowed if it's the only digit before
520    // a decimal point or exponent.
521    if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedCharacter();
522  } else {
523    int i = 0;
524    int digits = 0;
525    if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
526    do {
527      i = i * 10 + c0_ - '0';
528      digits++;
529      Advance();
530    } while (c0_ >= '0' && c0_ <= '9');
531    if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
532      SkipWhitespace();
533      return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate());
534    }
535  }
536  if (c0_ == '.') {
537    Advance();
538    if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
539    do {
540      Advance();
541    } while (c0_ >= '0' && c0_ <= '9');
542  }
543  if (AsciiAlphaToLower(c0_) == 'e') {
544    Advance();
545    if (c0_ == '-' || c0_ == '+') Advance();
546    if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
547    do {
548      Advance();
549    } while (c0_ >= '0' && c0_ <= '9');
550  }
551  int length = position_ - beg_pos;
552  double number;
553  if (seq_one_byte) {
554    Vector<const uint8_t> chars(seq_source_->GetChars() +  beg_pos, length);
555    number = StringToDouble(isolate()->unicode_cache(),
556                            chars,
557                            NO_FLAGS,  // Hex, octal or trailing junk.
558                            base::OS::nan_value());
559  } else {
560    Vector<uint8_t> buffer = Vector<uint8_t>::New(length);
561    String::WriteToFlat(*source_, buffer.start(), beg_pos, position_);
562    Vector<const uint8_t> result =
563        Vector<const uint8_t>(buffer.start(), length);
564    number = StringToDouble(isolate()->unicode_cache(),
565                            result,
566                            NO_FLAGS,  // Hex, octal or trailing junk.
567                            0.0);
568    buffer.Dispose();
569  }
570  SkipWhitespace();
571  return factory()->NewNumber(number, pretenure_);
572}
573
574
575template <typename StringType>
576inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c);
577
578template <>
579inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) {
580  seq_str->SeqTwoByteStringSet(i, c);
581}
582
583template <>
584inline void SeqStringSet(Handle<SeqOneByteString> seq_str, int i, uc32 c) {
585  seq_str->SeqOneByteStringSet(i, c);
586}
587
588template <typename StringType>
589inline Handle<StringType> NewRawString(Factory* factory,
590                                       int length,
591                                       PretenureFlag pretenure);
592
593template <>
594inline Handle<SeqTwoByteString> NewRawString(Factory* factory,
595                                             int length,
596                                             PretenureFlag pretenure) {
597  return factory->NewRawTwoByteString(length, pretenure).ToHandleChecked();
598}
599
600template <>
601inline Handle<SeqOneByteString> NewRawString(Factory* factory,
602                                           int length,
603                                           PretenureFlag pretenure) {
604  return factory->NewRawOneByteString(length, pretenure).ToHandleChecked();
605}
606
607
608// Scans the rest of a JSON string starting from position_ and writes
609// prefix[start..end] along with the scanned characters into a
610// sequential string of type StringType.
611template <bool seq_one_byte>
612template <typename StringType, typename SinkChar>
613Handle<String> JsonParser<seq_one_byte>::SlowScanJsonString(
614    Handle<String> prefix, int start, int end) {
615  int count = end - start;
616  int max_length = count + source_length_ - position_;
617  int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count));
618  Handle<StringType> seq_string =
619      NewRawString<StringType>(factory(), length, pretenure_);
620  // Copy prefix into seq_str.
621  SinkChar* dest = seq_string->GetChars();
622  String::WriteToFlat(*prefix, dest, start, end);
623
624  while (c0_ != '"') {
625    // Check for control character (0x00-0x1f) or unterminated string (<0).
626    if (c0_ < 0x20) return Handle<String>::null();
627    if (count >= length) {
628      // We need to create a longer sequential string for the result.
629      return SlowScanJsonString<StringType, SinkChar>(seq_string, 0, count);
630    }
631    if (c0_ != '\\') {
632      // If the sink can contain UC16 characters, or source_ contains only
633      // Latin1 characters, there's no need to test whether we can store the
634      // character. Otherwise check whether the UC16 source character can fit
635      // in the Latin1 sink.
636      if (sizeof(SinkChar) == kUC16Size || seq_one_byte ||
637          c0_ <= String::kMaxOneByteCharCode) {
638        SeqStringSet(seq_string, count++, c0_);
639        Advance();
640      } else {
641        // StringType is SeqOneByteString and we just read a non-Latin1 char.
642        return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0, count);
643      }
644    } else {
645      Advance();  // Advance past the \.
646      switch (c0_) {
647        case '"':
648        case '\\':
649        case '/':
650          SeqStringSet(seq_string, count++, c0_);
651          break;
652        case 'b':
653          SeqStringSet(seq_string, count++, '\x08');
654          break;
655        case 'f':
656          SeqStringSet(seq_string, count++, '\x0c');
657          break;
658        case 'n':
659          SeqStringSet(seq_string, count++, '\x0a');
660          break;
661        case 'r':
662          SeqStringSet(seq_string, count++, '\x0d');
663          break;
664        case 't':
665          SeqStringSet(seq_string, count++, '\x09');
666          break;
667        case 'u': {
668          uc32 value = 0;
669          for (int i = 0; i < 4; i++) {
670            Advance();
671            int digit = HexValue(c0_);
672            if (digit < 0) {
673              return Handle<String>::null();
674            }
675            value = value * 16 + digit;
676          }
677          if (sizeof(SinkChar) == kUC16Size ||
678              value <= String::kMaxOneByteCharCode) {
679            SeqStringSet(seq_string, count++, value);
680            break;
681          } else {
682            // StringType is SeqOneByteString and we just read a non-Latin1
683            // char.
684            position_ -= 6;  // Rewind position_ to \ in \uxxxx.
685            Advance();
686            return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string,
687                                                              0,
688                                                              count);
689          }
690        }
691        default:
692          return Handle<String>::null();
693      }
694      Advance();
695    }
696  }
697
698  DCHECK_EQ('"', c0_);
699  // Advance past the last '"'.
700  AdvanceSkipWhitespace();
701
702  // Shrink seq_string length to count and return.
703  return SeqString::Truncate(seq_string, count);
704}
705
706
707template <bool seq_one_byte>
708template <bool is_internalized>
709Handle<String> JsonParser<seq_one_byte>::ScanJsonString() {
710  DCHECK_EQ('"', c0_);
711  Advance();
712  if (c0_ == '"') {
713    AdvanceSkipWhitespace();
714    return factory()->empty_string();
715  }
716
717  if (seq_one_byte && is_internalized) {
718    // Fast path for existing internalized strings.  If the the string being
719    // parsed is not a known internalized string, contains backslashes or
720    // unexpectedly reaches the end of string, return with an empty handle.
721    uint32_t running_hash = isolate()->heap()->HashSeed();
722    int position = position_;
723    uc32 c0 = c0_;
724    do {
725      if (c0 == '\\') {
726        c0_ = c0;
727        int beg_pos = position_;
728        position_ = position;
729        return SlowScanJsonString<SeqOneByteString, uint8_t>(source_,
730                                                             beg_pos,
731                                                             position_);
732      }
733      if (c0 < 0x20) return Handle<String>::null();
734      if (static_cast<uint32_t>(c0) >
735          unibrow::Utf16::kMaxNonSurrogateCharCode) {
736        running_hash =
737            StringHasher::AddCharacterCore(running_hash,
738                                           unibrow::Utf16::LeadSurrogate(c0));
739        running_hash =
740            StringHasher::AddCharacterCore(running_hash,
741                                           unibrow::Utf16::TrailSurrogate(c0));
742      } else {
743        running_hash = StringHasher::AddCharacterCore(running_hash, c0);
744      }
745      position++;
746      if (position >= source_length_) return Handle<String>::null();
747      c0 = seq_source_->SeqOneByteStringGet(position);
748    } while (c0 != '"');
749    int length = position - position_;
750    uint32_t hash = (length <= String::kMaxHashCalcLength)
751                        ? StringHasher::GetHashCore(running_hash)
752                        : static_cast<uint32_t>(length);
753    Vector<const uint8_t> string_vector(
754        seq_source_->GetChars() + position_, length);
755    StringTable* string_table = isolate()->heap()->string_table();
756    uint32_t capacity = string_table->Capacity();
757    uint32_t entry = StringTable::FirstProbe(hash, capacity);
758    uint32_t count = 1;
759    Handle<String> result;
760    while (true) {
761      Object* element = string_table->KeyAt(entry);
762      if (element == isolate()->heap()->undefined_value()) {
763        // Lookup failure.
764        result = factory()->InternalizeOneByteString(
765            seq_source_, position_, length);
766        break;
767      }
768      if (element != isolate()->heap()->the_hole_value() &&
769          String::cast(element)->IsOneByteEqualTo(string_vector)) {
770        result = Handle<String>(String::cast(element), isolate());
771#ifdef DEBUG
772        uint32_t hash_field =
773            (hash << String::kHashShift) | String::kIsNotArrayIndexMask;
774        DCHECK_EQ(static_cast<int>(result->Hash()),
775                  static_cast<int>(hash_field >> String::kHashShift));
776#endif
777        break;
778      }
779      entry = StringTable::NextProbe(entry, count++, capacity);
780    }
781    position_ = position;
782    // Advance past the last '"'.
783    AdvanceSkipWhitespace();
784    return result;
785  }
786
787  int beg_pos = position_;
788  // Fast case for Latin1 only without escape characters.
789  do {
790    // Check for control character (0x00-0x1f) or unterminated string (<0).
791    if (c0_ < 0x20) return Handle<String>::null();
792    if (c0_ != '\\') {
793      if (seq_one_byte || c0_ <= String::kMaxOneByteCharCode) {
794        Advance();
795      } else {
796        return SlowScanJsonString<SeqTwoByteString, uc16>(source_,
797                                                          beg_pos,
798                                                          position_);
799      }
800    } else {
801      return SlowScanJsonString<SeqOneByteString, uint8_t>(source_,
802                                                           beg_pos,
803                                                           position_);
804    }
805  } while (c0_ != '"');
806  int length = position_ - beg_pos;
807  Handle<String> result =
808      factory()->NewRawOneByteString(length, pretenure_).ToHandleChecked();
809  uint8_t* dest = SeqOneByteString::cast(*result)->GetChars();
810  String::WriteToFlat(*source_, dest, beg_pos, position_);
811
812  DCHECK_EQ('"', c0_);
813  // Advance past the last '"'.
814  AdvanceSkipWhitespace();
815  return result;
816}
817
818} }  // namespace v8::internal
819
820#endif  // V8_JSON_PARSER_H_
821