scanner.h revision 9fac840a46e8b7e26894f4792ba26dde14c56b04
1// Copyright 2010 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28#ifndef V8_SCANNER_H_ 29#define V8_SCANNER_H_ 30 31#include "token.h" 32#include "char-predicates-inl.h" 33#include "scanner-base.h" 34 35namespace v8 { 36namespace internal { 37 38// A buffered character stream based on a random access character 39// source (ReadBlock can be called with pos_ pointing to any position, 40// even positions before the current). 41class BufferedUC16CharacterStream: public UC16CharacterStream { 42 public: 43 BufferedUC16CharacterStream(); 44 virtual ~BufferedUC16CharacterStream(); 45 46 virtual void PushBack(uc16 character); 47 48 protected: 49 static const unsigned kBufferSize = 512; 50 static const unsigned kPushBackStepSize = 16; 51 52 virtual unsigned SlowSeekForward(unsigned delta); 53 virtual bool ReadBlock(); 54 virtual void SlowPushBack(uc16 character); 55 56 virtual unsigned BufferSeekForward(unsigned delta) = 0; 57 virtual unsigned FillBuffer(unsigned position, unsigned length) = 0; 58 59 const uc16* pushback_limit_; 60 uc16 buffer_[kBufferSize]; 61}; 62 63 64// Generic string stream. 65class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream { 66 public: 67 GenericStringUC16CharacterStream(Handle<String> data, 68 unsigned start_position, 69 unsigned end_position); 70 virtual ~GenericStringUC16CharacterStream(); 71 72 protected: 73 virtual unsigned BufferSeekForward(unsigned delta); 74 virtual unsigned FillBuffer(unsigned position, unsigned length); 75 76 Handle<String> string_; 77 unsigned start_position_; 78 unsigned length_; 79}; 80 81 82// UC16 stream based on a literal UTF-8 string. 83class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream { 84 public: 85 Utf8ToUC16CharacterStream(const byte* data, unsigned length); 86 virtual ~Utf8ToUC16CharacterStream(); 87 88 protected: 89 virtual unsigned BufferSeekForward(unsigned delta); 90 virtual unsigned FillBuffer(unsigned char_position, unsigned length); 91 void SetRawPosition(unsigned char_position); 92 93 const byte* raw_data_; 94 unsigned raw_data_length_; // Measured in bytes, not characters. 95 unsigned raw_data_pos_; 96 // The character position of the character at raw_data[raw_data_pos_]. 97 // Not necessarily the same as pos_. 98 unsigned raw_character_position_; 99}; 100 101 102// UTF16 buffer to read characters from an external string. 103class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream { 104 public: 105 ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data, 106 int start_position, 107 int end_position); 108 virtual ~ExternalTwoByteStringUC16CharacterStream(); 109 110 virtual void PushBack(uc16 character) { 111 ASSERT(buffer_cursor_ > raw_data_); 112 buffer_cursor_--; 113 pos_--; 114 } 115 protected: 116 virtual unsigned SlowSeekForward(unsigned delta) { 117 // Fast case always handles seeking. 118 return 0; 119 } 120 virtual bool ReadBlock() { 121 // Entire string is read at start. 122 return false; 123 } 124 Handle<ExternalTwoByteString> source_; 125 const uc16* raw_data_; // Pointer to the actual array of characters. 126}; 127 128 129// ---------------------------------------------------------------------------- 130// V8JavaScriptScanner 131// JavaScript scanner getting its input from either a V8 String or a unicode 132// CharacterStream. 133 134class V8JavaScriptScanner : public JavaScriptScanner { 135 public: 136 V8JavaScriptScanner(); 137 void Initialize(UC16CharacterStream* source); 138}; 139 140 141class JsonScanner : public Scanner { 142 public: 143 JsonScanner(); 144 145 void Initialize(UC16CharacterStream* source); 146 147 // Returns the next token. 148 Token::Value Next(); 149 150 protected: 151 // Skip past JSON whitespace (only space, tab, newline and carrige-return). 152 bool SkipJsonWhiteSpace(); 153 154 // Scan a single JSON token. The JSON lexical grammar is specified in the 155 // ECMAScript 5 standard, section 15.12.1.1. 156 // Recognizes all of the single-character tokens directly, or calls a function 157 // to scan a number, string or identifier literal. 158 // The only allowed whitespace characters between tokens are tab, 159 // carriage-return, newline and space. 160 void ScanJson(); 161 162 // A JSON number (production JSONNumber) is a subset of the valid JavaScript 163 // decimal number literals. 164 // It includes an optional minus sign, must have at least one 165 // digit before and after a decimal point, may not have prefixed zeros (unless 166 // the integer part is zero), and may include an exponent part (e.g., "e-10"). 167 // Hexadecimal and octal numbers are not allowed. 168 Token::Value ScanJsonNumber(); 169 170 // A JSON string (production JSONString) is subset of valid JavaScript string 171 // literals. The string must only be double-quoted (not single-quoted), and 172 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and 173 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. 174 Token::Value ScanJsonString(); 175 176 // Used to recognizes one of the literals "true", "false", or "null". These 177 // are the only valid JSON identifiers (productions JSONBooleanLiteral, 178 // JSONNullLiteral). 179 Token::Value ScanJsonIdentifier(const char* text, Token::Value token); 180}; 181 182} } // namespace v8::internal 183 184#endif // V8_SCANNER_H_ 185