17d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)// Copyright 2011 the V8 project authors. All rights reserved.
27d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
37d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)// found in the LICENSE file.
47d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
57d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)#ifndef V8_SCANNER_CHARACTER_STREAMS_H_
67d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)#define V8_SCANNER_CHARACTER_STREAMS_H_
77d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
87d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)#include "src/scanner.h"
97d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
107d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)namespace v8 {
117d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)namespace internal {
127d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
137d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)// A buffered character stream based on a random access character
147d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)// source (ReadBlock can be called with pos_ pointing to any position,
157d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)// even positions before the current).
167d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)class BufferedUtf16CharacterStream: public Utf16CharacterStream {
177d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) public:
18eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  BufferedUtf16CharacterStream();
197d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  virtual ~BufferedUtf16CharacterStream();
207d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
21eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  virtual void PushBack(uc32 character);
22eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
23eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch protected:
24eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  static const unsigned kBufferSize = 512;
25eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  static const unsigned kPushBackStepSize = 16;
26eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
27eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  virtual unsigned SlowSeekForward(unsigned delta);
28eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  virtual bool ReadBlock();
29eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  virtual void SlowPushBack(uc16 character);
30eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
31eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  virtual unsigned BufferSeekForward(unsigned delta) = 0;
32eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  virtual unsigned FillBuffer(unsigned position) = 0;
33eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
34eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  const uc16* pushback_limit_;
35eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  uc16 buffer_[kBufferSize];
367d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)};
377d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
387d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
39eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch// Generic string stream.
40eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochclass GenericStringUtf16CharacterStream: public BufferedUtf16CharacterStream {
41eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch public:
427d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  GenericStringUtf16CharacterStream(Handle<String> data,
437d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)                                    unsigned start_position,
44eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch                                    unsigned end_position);
45eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  virtual ~GenericStringUtf16CharacterStream();
467d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
47eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch protected:
487d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  virtual unsigned BufferSeekForward(unsigned delta);
497d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  virtual unsigned FillBuffer(unsigned position);
507d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
517d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  Handle<String> string_;
527d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  unsigned length_;
537d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)};
547d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
557d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
567d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)// Utf16 stream based on a literal UTF-8 string.
577d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)class Utf8ToUtf16CharacterStream: public BufferedUtf16CharacterStream {
587d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) public:
597d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  Utf8ToUtf16CharacterStream(const byte* data, unsigned length);
607d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  virtual ~Utf8ToUtf16CharacterStream();
617d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
627d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  static unsigned CopyChars(uint16_t* dest, unsigned length, const byte* src,
637d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)                            unsigned* src_pos, unsigned src_length);
647d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
657d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) protected:
667d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  virtual unsigned BufferSeekForward(unsigned delta);
677d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  virtual unsigned FillBuffer(unsigned char_position);
687d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  void SetRawPosition(unsigned char_position);
697d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
707d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  const byte* raw_data_;
717d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  unsigned raw_data_length_;  // Measured in bytes, not characters.
727d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  unsigned raw_data_pos_;
737d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  // The character position of the character at raw_data[raw_data_pos_].
747d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  // Not necessarily the same as pos_.
757d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  unsigned raw_character_position_;
767d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)};
777d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
787d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
797d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)// ExternalStreamingStream is a wrapper around an ExternalSourceStream (see
807d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)// include/v8.h) subclass implemented by the embedder.
817d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)class ExternalStreamingStream : public BufferedUtf16CharacterStream {
827d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) public:
837d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  ExternalStreamingStream(ScriptCompiler::ExternalSourceStream* source_stream,
847d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)                          v8::ScriptCompiler::StreamedSource::Encoding encoding)
857d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)      : source_stream_(source_stream),
867d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)        encoding_(encoding),
877d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)        current_data_(NULL),
887d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)        current_data_offset_(0),
897d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)        current_data_length_(0),
90eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch        utf8_split_char_buffer_length_(0) {}
91eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
92eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  virtual ~ExternalStreamingStream() { delete[] current_data_; }
93eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
94eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch  virtual unsigned BufferSeekForward(unsigned delta) OVERRIDE {
957d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)    // We never need to seek forward when streaming scripts. We only seek
967d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)    // forward when we want to parse a function whose location we already know,
977d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)    // and when streaming, we don't know the locations of anything we haven't
987d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)    // seen yet.
997d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)    UNREACHABLE();
1007d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)    return 0;
1017d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  }
1027d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
1037d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  virtual unsigned FillBuffer(unsigned position);
1047d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
1057d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) private:
1067d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  void HandleUtf8SplitCharacters(unsigned* data_in_buffer);
1077d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
1087d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  ScriptCompiler::ExternalSourceStream* source_stream_;
1097d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  v8::ScriptCompiler::StreamedSource::Encoding encoding_;
1107d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  const uint8_t* current_data_;
1117d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  unsigned current_data_offset_;
1127d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  unsigned current_data_length_;
1137d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  // For converting UTF-8 characters which are split across two data chunks.
1147d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  uint8_t utf8_split_char_buffer_[4];
1157d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  unsigned utf8_split_char_buffer_length_;
1167d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)};
1177d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
1187d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
1197d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)// UTF16 buffer to read characters from an external string.
1207d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)class ExternalTwoByteStringUtf16CharacterStream: public Utf16CharacterStream {
1217d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) public:
1227d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  ExternalTwoByteStringUtf16CharacterStream(Handle<ExternalTwoByteString> data,
123                                            int start_position,
124                                            int end_position);
125  virtual ~ExternalTwoByteStringUtf16CharacterStream();
126
127  virtual void PushBack(uc32 character) {
128    DCHECK(buffer_cursor_ > raw_data_);
129    buffer_cursor_--;
130    pos_--;
131  }
132
133 protected:
134  virtual unsigned SlowSeekForward(unsigned delta) {
135    // Fast case always handles seeking.
136    return 0;
137  }
138  virtual bool ReadBlock() {
139    // Entire string is read at start.
140    return false;
141  }
142  Handle<ExternalTwoByteString> source_;
143  const uc16* raw_data_;  // Pointer to the actual array of characters.
144};
145
146} }  // namespace v8::internal
147
148#endif  // V8_SCANNER_CHARACTER_STREAMS_H_
149