18b112d2025046f85ef7f6be087c6129c872ebad2Ben Murdoch// Copyright 2011 the V8 project authors. All rights reserved. 2a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Redistribution and use in source and binary forms, with or without 3a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// modification, are permitted provided that the following conditions are 4a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// met: 5a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 6a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Redistributions of source code must retain the above copyright 7a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// notice, this list of conditions and the following disclaimer. 8a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Redistributions in binary form must reproduce the above 9a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// copyright notice, this list of conditions and the following 10a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// disclaimer in the documentation and/or other materials provided 11a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// with the distribution. 12a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Neither the name of Google Inc. nor the names of its 13a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// contributors may be used to endorse or promote products derived 14a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// from this software without specific prior written permission. 15a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 16a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 28589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch// Features shared by parsing and pre-parsing scanners. 29589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 30a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#ifndef V8_SCANNER_H_ 31a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#define V8_SCANNER_H_ 32a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 33589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch#include "allocation.h" 34589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch#include "char-predicates.h" 35589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch#include "checks.h" 36589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch#include "globals.h" 37a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#include "token.h" 38589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch#include "unicode-inl.h" 39589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch#include "utils.h" 40a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 41a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blocknamespace v8 { 42a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blocknamespace internal { 43a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 443ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 453ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch// General collection of (multi-)bit-flags that can be passed to scanners and 463ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch// parsers to signify their (initial) mode of operation. 473ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdochenum ParsingFlags { 483ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch kNoParsingFlags = 0, 493ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Embed LanguageMode values in parsing flags, i.e., equivalent to: 503ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // CLASSIC_MODE = 0, 513ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // STRICT_MODE, 523ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // EXTENDED_MODE, 533ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch kLanguageModeMask = 0x03, 543ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch kAllowLazy = 0x04, 553ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch kAllowNativesSyntax = 0x08, 563ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch kAllowModules = 0x10 573ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch}; 583ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 593ef787dbeca8a5fb1086949cda830dccee07bfbdBen MurdochSTATIC_ASSERT((kLanguageModeMask & CLASSIC_MODE) == CLASSIC_MODE); 603ef787dbeca8a5fb1086949cda830dccee07bfbdBen MurdochSTATIC_ASSERT((kLanguageModeMask & STRICT_MODE) == STRICT_MODE); 613ef787dbeca8a5fb1086949cda830dccee07bfbdBen MurdochSTATIC_ASSERT((kLanguageModeMask & EXTENDED_MODE) == EXTENDED_MODE); 623ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 633ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 64589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch// Returns the value (0 .. 15) of a hexadecimal character c. 65589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch// If c is not a legal hexadecimal character, returns a value < 0. 66589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdochinline int HexValue(uc32 c) { 67589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch c -= '0'; 68589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch if (static_cast<unsigned>(c) <= 9) return c; 69589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. 70589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch if (static_cast<unsigned>(c) <= 5) return c + 10; 71589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return -1; 72589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch} 73589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 74589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 75589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch// --------------------------------------------------------------------- 763ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch// Buffered stream of UTF-16 code units, using an internal UTF-16 buffer. 773ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch// A code unit is a 16 bit value representing either a 16 bit code point 783ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch// or one part of a surrogate pair that make a single 21 bit code point. 79589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 803ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdochclass Utf16CharacterStream { 81a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 823ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch Utf16CharacterStream() : pos_(0) { } 833ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch virtual ~Utf16CharacterStream() { } 84589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 853ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Returns and advances past the next UTF-16 code unit in the input 863ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // stream. If there are no more code units, it returns a negative 87589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // value. 88589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch inline uc32 Advance() { 89589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch if (buffer_cursor_ < buffer_end_ || ReadBlock()) { 90589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch pos_++; 91589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return static_cast<uc32>(*(buffer_cursor_++)); 92589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 93589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Note: currently the following increment is necessary to avoid a 94589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // parser problem! The scanner treats the final kEndOfInput as 953ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // a code unit with a position, and does math relative to that 96589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // position. 97589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch pos_++; 98589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 99589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return kEndOfInput; 100589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 101b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 1023ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Return the current position in the code unit stream. 103589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Starts at zero. 104589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch inline unsigned pos() const { return pos_; } 105589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 1063ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Skips forward past the next code_unit_count UTF-16 code units 107589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // in the input, or until the end of input if that comes sooner. 1083ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Returns the number of code units actually skipped. If less 1093ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // than code_unit_count, 1103ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch inline unsigned SeekForward(unsigned code_unit_count) { 111589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch unsigned buffered_chars = 112589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch static_cast<unsigned>(buffer_end_ - buffer_cursor_); 1133ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch if (code_unit_count <= buffered_chars) { 1143ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch buffer_cursor_ += code_unit_count; 1153ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch pos_ += code_unit_count; 1163ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch return code_unit_count; 117589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 1183ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch return SlowSeekForward(code_unit_count); 119589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 120589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 1213ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Pushes back the most recently read UTF-16 code unit (or negative 122589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // value if at end of input), i.e., the value returned by the most recent 123589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // call to Advance. 124589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Must not be used right after calling SeekForward. 1253ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch virtual void PushBack(int32_t code_unit) = 0; 126b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 127b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch protected: 128589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch static const uc32 kEndOfInput = -1; 129589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 1303ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Ensures that the buffer_cursor_ points to the code_unit at 131589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // position pos_ of the input, if possible. If the position 132589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // is at or after the end of the input, return false. If there 1333ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // are more code_units available, return true. 134589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch virtual bool ReadBlock() = 0; 1353ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0; 136589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 137589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch const uc16* buffer_cursor_; 138589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch const uc16* buffer_end_; 139589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch unsigned pos_; 140589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch}; 141589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 142b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 143589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdochclass UnicodeCache { 144589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch// --------------------------------------------------------------------- 145589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch// Caching predicates used by scanners. 146589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch public: 147589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch UnicodeCache() {} 148589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; 149589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 150589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch StaticResource<Utf8Decoder>* utf8_decoder() { 151589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return &utf8_decoder_; 152589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 153589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 154589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); } 155589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); } 156589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); } 157589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); } 158b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 159589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch private: 160589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; 161589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; 162589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; 163589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; 164589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch StaticResource<Utf8Decoder> utf8_decoder_; 165b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 166589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch DISALLOW_COPY_AND_ASSIGN(UnicodeCache); 167a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 168a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 169a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 170589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch// ---------------------------------------------------------------------------- 171589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch// LiteralBuffer - Collector of chars of literals. 172589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 173589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdochclass LiteralBuffer { 174a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 175589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { } 176b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 177589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch ~LiteralBuffer() { 178589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch if (backing_store_.length() > 0) { 179589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch backing_store_.Dispose(); 180589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 181589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 182589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 1833ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch INLINE(void AddChar(uint32_t code_unit)) { 184589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch if (position_ >= backing_store_.length()) ExpandBuffer(); 185589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch if (is_ascii_) { 1863ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch if (code_unit < kMaxAsciiCharCodeU) { 1873ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch backing_store_[position_] = static_cast<byte>(code_unit); 188589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch position_ += kASCIISize; 189589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return; 190589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 1913ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch ConvertToUtf16(); 192589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 1933ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch ASSERT(code_unit < 0x10000u); 1943ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch *reinterpret_cast<uc16*>(&backing_store_[position_]) = code_unit; 195589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch position_ += kUC16Size; 196589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 197589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 198589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch bool is_ascii() { return is_ascii_; } 199589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 2003ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch Vector<const uc16> utf16_literal() { 201589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch ASSERT(!is_ascii_); 202589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch ASSERT((position_ & 0x1) == 0); 203589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return Vector<const uc16>( 204589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch reinterpret_cast<const uc16*>(backing_store_.start()), 205589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch position_ >> 1); 206589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 207589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 208589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Vector<const char> ascii_literal() { 209589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch ASSERT(is_ascii_); 210589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return Vector<const char>( 211589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch reinterpret_cast<const char*>(backing_store_.start()), 212589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch position_); 213589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 214b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 215589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch int length() { 216589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return is_ascii_ ? position_ : (position_ >> 1); 217589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 218589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 219589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch void Reset() { 220589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch position_ = 0; 221589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch is_ascii_ = true; 222589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 223589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 224589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch private: 225589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch static const int kInitialCapacity = 16; 226589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch static const int kGrowthFactory = 4; 227589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch static const int kMinConversionSlack = 256; 228589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch static const int kMaxGrowth = 1 * MB; 229589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch inline int NewCapacity(int min_capacity) { 230589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch int capacity = Max(min_capacity, backing_store_.length()); 231589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); 232589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return new_capacity; 233589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 234589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 235589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch void ExpandBuffer() { 236589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity)); 237589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch memcpy(new_store.start(), backing_store_.start(), position_); 238589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch backing_store_.Dispose(); 239589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch backing_store_ = new_store; 240589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 241589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 2423ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch void ConvertToUtf16() { 243589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch ASSERT(is_ascii_); 244589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Vector<byte> new_store; 245589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch int new_content_size = position_ * kUC16Size; 246589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch if (new_content_size >= backing_store_.length()) { 2473ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Ensure room for all currently read code units as UC16 as well 2483ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // as the code unit about to be stored. 249589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch new_store = Vector<byte>::New(NewCapacity(new_content_size)); 250589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } else { 251589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch new_store = backing_store_; 252589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 253589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch char* src = reinterpret_cast<char*>(backing_store_.start()); 254589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch uc16* dst = reinterpret_cast<uc16*>(new_store.start()); 255589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch for (int i = position_ - 1; i >= 0; i--) { 256589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch dst[i] = src[i]; 257589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 258589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch if (new_store.start() != backing_store_.start()) { 259589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch backing_store_.Dispose(); 260589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch backing_store_ = new_store; 261589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 262589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch position_ = new_content_size; 263589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch is_ascii_ = false; 264589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 265589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 266589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch bool is_ascii_; 267589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch int position_; 268589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Vector<byte> backing_store_; 269589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 270589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); 271a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 272a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 273a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 274589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch// ---------------------------------------------------------------------------- 2753ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch// JavaScript Scanner. 276589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 277589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdochclass Scanner { 278a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 2793ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Scoped helper for literal recording. Automatically drops the literal 2803ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // if aborting the scanning before it's complete. 281589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch class LiteralScope { 282589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch public: 2833ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch explicit LiteralScope(Scanner* self) 2843ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch : scanner_(self), complete_(false) { 2853ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch scanner_->StartLiteral(); 2863ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch } 2873ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch ~LiteralScope() { 2883ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch if (!complete_) scanner_->DropLiteral(); 2893ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch } 2903ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch void Complete() { 2913ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch scanner_->TerminateLiteral(); 2923ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch complete_ = true; 2933ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch } 294589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 295589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch private: 296589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Scanner* scanner_; 297589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch bool complete_; 298589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch }; 299589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 3003ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Representation of an interval of source positions. 301589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch struct Location { 302589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Location(int b, int e) : beg_pos(b), end_pos(e) { } 303589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Location() : beg_pos(0), end_pos(0) { } 304589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 305589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch bool IsValid() const { 306589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return beg_pos >= 0 && end_pos >= beg_pos; 307589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 308589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 309589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch static Location invalid() { return Location(-1, -1); } 310589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 311589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch int beg_pos; 312589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch int end_pos; 313589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch }; 314589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 3153ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // -1 is outside of the range of any real source code. 3163ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch static const int kNoOctalLocation = -1; 3173ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 3183ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; 3193ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 3203ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch explicit Scanner(UnicodeCache* scanner_contants); 3213ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 3223ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch void Initialize(Utf16CharacterStream* source); 3233ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 3243ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Returns the next token and advances input. 3253ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch Token::Value Next(); 3263ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Returns the current token again. 3273ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch Token::Value current_token() { return current_.token; } 328589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Returns the location information for the current token 3293ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // (the token last returned by Next()). 330589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Location location() const { return current_.location; } 331589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Returns the literal string, if any, for the current token (the 3323ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // token last returned by Next()). The string is 0-terminated. 3333ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Literal strings are collected for identifiers, strings, and 3343ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // numbers. 335589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // These functions only give the correct result if the literal 336589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // was scanned between calls to StartLiteral() and TerminateLiteral(). 33785b71799222b55eb5dd74ea26efe0c64ab655c8cBen Murdoch Vector<const char> literal_ascii_string() { 338589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch ASSERT_NOT_NULL(current_.literal_chars); 33985b71799222b55eb5dd74ea26efe0c64ab655c8cBen Murdoch return current_.literal_chars->ascii_literal(); 340589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 3413ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch Vector<const uc16> literal_utf16_string() { 342592a9fc1d8ea420377a2e7efd0600e20b058be2bBen Murdoch ASSERT_NOT_NULL(current_.literal_chars); 3433ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch return current_.literal_chars->utf16_literal(); 3443ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch } 3453ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch bool is_literal_ascii() { 3463ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch ASSERT_NOT_NULL(current_.literal_chars); 3473ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch return current_.literal_chars->is_ascii(); 348592a9fc1d8ea420377a2e7efd0600e20b058be2bBen Murdoch } 349589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch int literal_length() const { 350589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch ASSERT_NOT_NULL(current_.literal_chars); 351589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return current_.literal_chars->length(); 352589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 353589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 354589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch bool literal_contains_escapes() const { 355589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Location location = current_.location; 356589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch int source_length = (location.end_pos - location.beg_pos); 357589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch if (current_.token == Token::STRING) { 358589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Subtract delimiters. 359589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch source_length -= 2; 360589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 361589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return current_.literal_chars->length() != source_length; 362589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 363589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 3643ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Similar functions for the upcoming token. 3653ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 3663ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // One token look-ahead (past the token returned by Next()). 3673ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch Token::Value peek() const { return next_.token; } 3683ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 3693ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch Location peek_location() const { return next_.location; } 3703ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 371589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Returns the literal string for the next token (the token that 372589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // would be returned if Next() were called). 37385b71799222b55eb5dd74ea26efe0c64ab655c8cBen Murdoch Vector<const char> next_literal_ascii_string() { 374589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch ASSERT_NOT_NULL(next_.literal_chars); 37585b71799222b55eb5dd74ea26efe0c64ab655c8cBen Murdoch return next_.literal_chars->ascii_literal(); 376589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 3773ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch Vector<const uc16> next_literal_utf16_string() { 378592a9fc1d8ea420377a2e7efd0600e20b058be2bBen Murdoch ASSERT_NOT_NULL(next_.literal_chars); 3793ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch return next_.literal_chars->utf16_literal(); 3803ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch } 3813ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch bool is_next_literal_ascii() { 3823ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch ASSERT_NOT_NULL(next_.literal_chars); 3833ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch return next_.literal_chars->is_ascii(); 384592a9fc1d8ea420377a2e7efd0600e20b058be2bBen Murdoch } 385589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch int next_literal_length() const { 386589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch ASSERT_NOT_NULL(next_.literal_chars); 387589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return next_.literal_chars->length(); 388589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 389589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 390589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch UnicodeCache* unicode_cache() { return unicode_cache_; } 391589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 392589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch static const int kCharacterLookaheadBufferSize = 1; 393b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 3943ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. 3953ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch uc32 ScanOctalEscape(uc32 c, int length); 3963ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 3973ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Returns the location of the last seen octal literal. 3983ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch Location octal_position() const { return octal_pos_; } 3993ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch void clear_octal_position() { octal_pos_ = Location::invalid(); } 4003ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 4013ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Seek forward to the given position. This operation does not 4023ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // work in general, for instance when there are pushed back 4033ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // characters, but works for seeking forward until simple delimiter 4043ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // tokens, which is what it is used for. 4053ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch void SeekForward(int pos); 4063ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 4073ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch bool HarmonyScoping() const { 4083ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch return harmony_scoping_; 4093ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch } 4103ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch void SetHarmonyScoping(bool scoping) { 4113ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch harmony_scoping_ = scoping; 4123ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch } 4133ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch bool HarmonyModules() const { 4143ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch return harmony_modules_; 4153ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch } 4163ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch void SetHarmonyModules(bool modules) { 4173ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch harmony_modules_ = modules; 4183ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch } 4193ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 4203ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 4213ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Returns true if there was a line terminator before the peek'ed token, 4223ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // possibly inside a multi-line comment. 4233ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch bool HasAnyLineTerminatorBeforeNext() const { 4243ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch return has_line_terminator_before_next_ || 4253ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch has_multiline_comment_before_next_; 4263ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch } 4273ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 4283ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Scans the input as a regular expression pattern, previous 4293ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // character(s) must be /(=). Returns true if a pattern is scanned. 4303ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch bool ScanRegExpPattern(bool seen_equal); 4313ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Returns true if regexp flags are scanned (always since flags can 4323ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // be empty). 4333ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch bool ScanRegExpFlags(); 4343ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 4353ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Tells whether the buffer contains an identifier (no escapes). 4363ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Used for checking if a property name is an identifier. 4373ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch static bool IsIdentifier(unibrow::CharacterStream* buffer); 4383ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 4393ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch private: 440589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // The current and look-ahead token. 441589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch struct TokenDesc { 442589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Token::Value token; 443589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Location location; 444589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch LiteralBuffer* literal_chars; 445589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch }; 446589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 447589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Call this after setting source_ to the input. 448589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch void Init() { 449589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Set c0_ (one character ahead) 450589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); 451589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Advance(); 452589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Initialize current_ to not refer to a literal. 453589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch current_.literal_chars = NULL; 454589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 455589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 456589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Literal buffer support 457589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch inline void StartLiteral() { 458589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? 459589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch &literal_buffer2_ : &literal_buffer1_; 460589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch free_buffer->Reset(); 461589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch next_.literal_chars = free_buffer; 462589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 463589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 4643ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch INLINE(void AddLiteralChar(uc32 c)) { 465589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch ASSERT_NOT_NULL(next_.literal_chars); 466589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch next_.literal_chars->AddChar(c); 467589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 468589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 469589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Complete scanning of a literal. 470589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch inline void TerminateLiteral() { 471589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Does nothing in the current implementation. 472589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 473589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 474589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Stops scanning of a literal and drop the collected characters, 475589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // e.g., due to an encountered error. 476589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch inline void DropLiteral() { 477589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch next_.literal_chars = NULL; 478589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 479589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 480589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch inline void AddLiteralCharAdvance() { 481589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch AddLiteralChar(c0_); 482589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Advance(); 483589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 484589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 485589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Low-level scanning support. 486589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch void Advance() { c0_ = source_->Advance(); } 487589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch void PushBack(uc32 ch) { 488589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch source_->PushBack(c0_); 489589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch c0_ = ch; 490589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 491589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 492589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch inline Token::Value Select(Token::Value tok) { 493589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Advance(); 494589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return tok; 495589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 496589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 497589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { 498589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Advance(); 499589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch if (c0_ == next) { 500589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Advance(); 501589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return then; 502589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } else { 503589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch return else_; 504589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 505589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch } 506589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 507589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch uc32 ScanHexNumber(int expected_length); 508589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 5093ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Scans a single JavaScript token. 5103ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch void Scan(); 51185b71799222b55eb5dd74ea26efe0c64ab655c8cBen Murdoch 512589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch bool SkipWhiteSpace(); 513589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Token::Value SkipSingleLineComment(); 514589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Token::Value SkipMultiLineComment(); 5153ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Scans a possible HTML comment -- begins with '<!'. 5163ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch Token::Value ScanHtmlComment(); 517589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 518589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch void ScanDecimalDigits(); 519589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Token::Value ScanNumber(bool seen_period); 520589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Token::Value ScanIdentifierOrKeyword(); 521589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Token::Value ScanIdentifierSuffix(LiteralScope* literal); 522589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 523589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch void ScanEscape(); 524589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Token::Value ScanString(); 525589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 526589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Decodes a unicode escape-sequence which is part of an identifier. 527589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // If the escape sequence cannot be decoded the result is kBadChar. 528589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch uc32 ScanIdentifierUnicodeEscape(); 529589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Recognizes a uniocde escape-sequence and adds its characters, 530589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // uninterpreted, to the current literal. Used for parsing RegExp 531589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // flags. 532589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch bool ScanLiteralUnicodeEscape(); 533589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 5343ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Return the current source position. 5353ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch int source_pos() { 5363ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch return source_->pos() - kCharacterLookaheadBufferSize; 5373ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch } 5383ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 5393ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch UnicodeCache* unicode_cache_; 5403ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 5413ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Buffers collecting literal strings, numbers, etc. 5423ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch LiteralBuffer literal_buffer1_; 5433ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch LiteralBuffer literal_buffer2_; 5443ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 5453ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch TokenDesc current_; // desc for current token (as returned by Next()) 5463ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch TokenDesc next_; // desc for next token (one token look-ahead) 5473ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 5483ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Input stream. Must be initialized to an Utf16CharacterStream. 5493ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch Utf16CharacterStream* source_; 5503ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 5513ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 552589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Start position of the octal literal last scanned. 553589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch Location octal_pos_; 554589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 5553ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // One Unicode character look-ahead; c0_ < 0 at the end of the input. 5563ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch uc32 c0_; 5573ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch 558589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Whether there is a line terminator whitespace character after 559589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // the current token, and before the next. Does not count newlines 560589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // inside multiline comments. 561589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch bool has_line_terminator_before_next_; 562589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // Whether there is a multi-line comment that contains a 563589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch // line-terminator after the current token, and before the next. 564589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch bool has_multiline_comment_before_next_; 5653ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Whether we scan 'let' as a keyword for harmony block-scoped let bindings. 5663ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch bool harmony_scoping_; 5673ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // Whether we scan 'module', 'import', 'export' as keywords. 5683ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch bool harmony_modules_; 569b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch}; 570b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 571a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} } // namespace v8::internal 572a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 573a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#endif // V8_SCANNER_H_ 574