scanner.h revision 8a31eba00023874d4a1dcdc5f411cc4336776874
1// Copyright 2010 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28#ifndef V8_SCANNER_H_ 29#define V8_SCANNER_H_ 30 31#include "token.h" 32#include "char-predicates-inl.h" 33#include "scanner-base.h" 34 35namespace v8 { 36namespace internal { 37 38// UTF16 buffer to read characters from a character stream. 39class CharacterStreamUTF16Buffer: public UTF16Buffer { 40 public: 41 CharacterStreamUTF16Buffer(); 42 virtual ~CharacterStreamUTF16Buffer() {} 43 void Initialize(Handle<String> data, 44 unibrow::CharacterStream* stream, 45 int start_position, 46 int end_position); 47 virtual void PushBack(uc32 ch); 48 virtual uc32 Advance(); 49 virtual void SeekForward(int pos); 50 51 private: 52 List<uc32> pushback_buffer_; 53 uc32 last_; 54 unibrow::CharacterStream* stream_; 55 56 List<uc32>* pushback_buffer() { return &pushback_buffer_; } 57}; 58 59 60// UTF16 buffer to read characters from an external string. 61template <typename StringType, typename CharType> 62class ExternalStringUTF16Buffer: public UTF16Buffer { 63 public: 64 ExternalStringUTF16Buffer(); 65 virtual ~ExternalStringUTF16Buffer() {} 66 void Initialize(Handle<StringType> data, 67 int start_position, 68 int end_position); 69 virtual void PushBack(uc32 ch); 70 virtual uc32 Advance(); 71 virtual void SeekForward(int pos); 72 73 private: 74 const CharType* raw_data_; // Pointer to the actual array of characters. 75}; 76 77 78// Initializes a UTF16Buffer as input stream, using one of a number 79// of strategies depending on the available character sources. 80class StreamInitializer { 81 public: 82 UTF16Buffer* Init(Handle<String> source, 83 unibrow::CharacterStream* stream, 84 int start_position, 85 int end_position); 86 private: 87 // Different UTF16 buffers used to pull characters from. Based on input one of 88 // these will be initialized as the actual data source. 89 CharacterStreamUTF16Buffer char_stream_buffer_; 90 ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t> 91 two_byte_string_buffer_; 92 ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_; 93 94 // Used to convert the source string into a character stream when a stream 95 // is not passed to the scanner. 96 SafeStringInputBuffer safe_string_input_buffer_; 97}; 98 99// ---------------------------------------------------------------------------- 100// V8JavaScriptScanner 101// JavaScript scanner getting its input from either a V8 String or a unicode 102// CharacterStream. 103 104class V8JavaScriptScanner : public JavaScriptScanner { 105 public: 106 V8JavaScriptScanner() {} 107 108 Token::Value NextCheckStack(); 109 110 // Initialize the Scanner to scan source. 111 void Initialize(Handle<String> source, int literal_flags = kAllLiterals); 112 void Initialize(Handle<String> source, 113 unibrow::CharacterStream* stream, 114 int literal_flags = kAllLiterals); 115 void Initialize(Handle<String> source, 116 int start_position, int end_position, 117 int literal_flags = kAllLiterals); 118 119 protected: 120 StreamInitializer stream_initializer_; 121}; 122 123 124class JsonScanner : public Scanner { 125 public: 126 JsonScanner(); 127 128 // Initialize the Scanner to scan source. 129 void Initialize(Handle<String> source); 130 131 // Returns the next token. 132 Token::Value Next(); 133 134 protected: 135 // Skip past JSON whitespace (only space, tab, newline and carrige-return). 136 bool SkipJsonWhiteSpace(); 137 138 // Scan a single JSON token. The JSON lexical grammar is specified in the 139 // ECMAScript 5 standard, section 15.12.1.1. 140 // Recognizes all of the single-character tokens directly, or calls a function 141 // to scan a number, string or identifier literal. 142 // The only allowed whitespace characters between tokens are tab, 143 // carrige-return, newline and space. 144 void ScanJson(); 145 146 // A JSON number (production JSONNumber) is a subset of the valid JavaScript 147 // decimal number literals. 148 // It includes an optional minus sign, must have at least one 149 // digit before and after a decimal point, may not have prefixed zeros (unless 150 // the integer part is zero), and may include an exponent part (e.g., "e-10"). 151 // Hexadecimal and octal numbers are not allowed. 152 Token::Value ScanJsonNumber(); 153 154 // A JSON string (production JSONString) is subset of valid JavaScript string 155 // literals. The string must only be double-quoted (not single-quoted), and 156 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and 157 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. 158 Token::Value ScanJsonString(); 159 160 // Used to recognizes one of the literals "true", "false", or "null". These 161 // are the only valid JSON identifiers (productions JSONBooleanLiteral, 162 // JSONNullLiteral). 163 Token::Value ScanJsonIdentifier(const char* text, Token::Value token); 164 165 StreamInitializer stream_initializer_; 166}; 167 168 169// ExternalStringUTF16Buffer 170template <typename StringType, typename CharType> 171ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer() 172 : raw_data_(NULL) { } 173 174 175template <typename StringType, typename CharType> 176void ExternalStringUTF16Buffer<StringType, CharType>::Initialize( 177 Handle<StringType> data, 178 int start_position, 179 int end_position) { 180 ASSERT(!data.is_null()); 181 raw_data_ = data->resource()->data(); 182 183 ASSERT(end_position <= data->length()); 184 if (start_position > 0) { 185 SeekForward(start_position); 186 } 187 end_ = 188 end_position != kNoEndPosition ? end_position : data->length(); 189} 190 191 192template <typename StringType, typename CharType> 193uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() { 194 if (pos_ < end_) { 195 return raw_data_[pos_++]; 196 } else { 197 // note: currently the following increment is necessary to avoid a 198 // test-parser problem! 199 pos_++; 200 return static_cast<uc32>(-1); 201 } 202} 203 204 205template <typename StringType, typename CharType> 206void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) { 207 pos_--; 208 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); 209 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); 210} 211 212 213template <typename StringType, typename CharType> 214void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) { 215 pos_ = pos; 216} 217 218} } // namespace v8::internal 219 220#endif // V8_SCANNER_H_ 221