1// Copyright 2011 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28#ifndef V8_SCANNER_H_ 29#define V8_SCANNER_H_ 30 31#include "token.h" 32#include "char-predicates-inl.h" 33#include "scanner-base.h" 34 35namespace v8 { 36namespace internal { 37 38// A buffered character stream based on a random access character 39// source (ReadBlock can be called with pos_ pointing to any position, 40// even positions before the current). 41class BufferedUC16CharacterStream: public UC16CharacterStream { 42 public: 43 BufferedUC16CharacterStream(); 44 virtual ~BufferedUC16CharacterStream(); 45 46 virtual void PushBack(uc32 character); 47 48 protected: 49 static const unsigned kBufferSize = 512; 50 static const unsigned kPushBackStepSize = 16; 51 52 virtual unsigned SlowSeekForward(unsigned delta); 53 virtual bool ReadBlock(); 54 virtual void SlowPushBack(uc16 character); 55 56 virtual unsigned BufferSeekForward(unsigned delta) = 0; 57 virtual unsigned FillBuffer(unsigned position, unsigned length) = 0; 58 59 const uc16* pushback_limit_; 60 uc16 buffer_[kBufferSize]; 61}; 62 63 64// Generic string stream. 65class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream { 66 public: 67 GenericStringUC16CharacterStream(Handle<String> data, 68 unsigned start_position, 69 unsigned end_position); 70 virtual ~GenericStringUC16CharacterStream(); 71 72 protected: 73 virtual unsigned BufferSeekForward(unsigned delta); 74 virtual unsigned FillBuffer(unsigned position, unsigned length); 75 76 Handle<String> string_; 77 unsigned start_position_; 78 unsigned length_; 79}; 80 81 82// UC16 stream based on a literal UTF-8 string. 83class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream { 84 public: 85 Utf8ToUC16CharacterStream(const byte* data, unsigned length); 86 virtual ~Utf8ToUC16CharacterStream(); 87 88 protected: 89 virtual unsigned BufferSeekForward(unsigned delta); 90 virtual unsigned FillBuffer(unsigned char_position, unsigned length); 91 void SetRawPosition(unsigned char_position); 92 93 const byte* raw_data_; 94 unsigned raw_data_length_; // Measured in bytes, not characters. 95 unsigned raw_data_pos_; 96 // The character position of the character at raw_data[raw_data_pos_]. 97 // Not necessarily the same as pos_. 98 unsigned raw_character_position_; 99}; 100 101 102// UTF16 buffer to read characters from an external string. 103class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream { 104 public: 105 ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data, 106 int start_position, 107 int end_position); 108 virtual ~ExternalTwoByteStringUC16CharacterStream(); 109 110 virtual void PushBack(uc32 character) { 111 ASSERT(buffer_cursor_ > raw_data_); 112 buffer_cursor_--; 113 pos_--; 114 } 115 116 protected: 117 virtual unsigned SlowSeekForward(unsigned delta) { 118 // Fast case always handles seeking. 119 return 0; 120 } 121 virtual bool ReadBlock() { 122 // Entire string is read at start. 123 return false; 124 } 125 Handle<ExternalTwoByteString> source_; 126 const uc16* raw_data_; // Pointer to the actual array of characters. 127}; 128 129 130// ---------------------------------------------------------------------------- 131// V8JavaScriptScanner 132// JavaScript scanner getting its input from either a V8 String or a unicode 133// CharacterStream. 134 135class V8JavaScriptScanner : public JavaScriptScanner { 136 public: 137 explicit V8JavaScriptScanner(UnicodeCache* unicode_cache) 138 : JavaScriptScanner(unicode_cache) {} 139 140 void Initialize(UC16CharacterStream* source); 141}; 142 143 144class JsonScanner : public Scanner { 145 public: 146 explicit JsonScanner(UnicodeCache* unicode_cache); 147 148 void Initialize(UC16CharacterStream* source); 149 150 // Returns the next token. 151 Token::Value Next(); 152 153 // Returns the value of a number token. 154 double number() { 155 return number_; 156 } 157 158 159 protected: 160 // Skip past JSON whitespace (only space, tab, newline and carrige-return). 161 bool SkipJsonWhiteSpace(); 162 163 // Scan a single JSON token. The JSON lexical grammar is specified in the 164 // ECMAScript 5 standard, section 15.12.1.1. 165 // Recognizes all of the single-character tokens directly, or calls a function 166 // to scan a number, string or identifier literal. 167 // The only allowed whitespace characters between tokens are tab, 168 // carriage-return, newline and space. 169 void ScanJson(); 170 171 // A JSON number (production JSONNumber) is a subset of the valid JavaScript 172 // decimal number literals. 173 // It includes an optional minus sign, must have at least one 174 // digit before and after a decimal point, may not have prefixed zeros (unless 175 // the integer part is zero), and may include an exponent part (e.g., "e-10"). 176 // Hexadecimal and octal numbers are not allowed. 177 Token::Value ScanJsonNumber(); 178 179 // A JSON string (production JSONString) is subset of valid JavaScript string 180 // literals. The string must only be double-quoted (not single-quoted), and 181 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and 182 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. 183 Token::Value ScanJsonString(); 184 185 // Used to recognizes one of the literals "true", "false", or "null". These 186 // are the only valid JSON identifiers (productions JSONBooleanLiteral, 187 // JSONNullLiteral). 188 Token::Value ScanJsonIdentifier(const char* text, Token::Value token); 189 190 // Holds the value of a scanned number token. 191 double number_; 192}; 193 194} } // namespace v8::internal 195 196#endif // V8_SCANNER_H_ 197