1// Copyright 2012 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31
32#include "v8.h"
33
34#include "cctest.h"
35#include "compiler.h"
36#include "execution.h"
37#include "isolate.h"
38#include "parser.h"
39#include "preparser.h"
40#include "scanner-character-streams.h"
41#include "token.h"
42#include "utils.h"
43
44TEST(ScanKeywords) {
45  struct KeywordToken {
46    const char* keyword;
47    i::Token::Value token;
48  };
49
50  static const KeywordToken keywords[] = {
51#define KEYWORD(t, s, d) { s, i::Token::t },
52      TOKEN_LIST(IGNORE_TOKEN, KEYWORD)
53#undef KEYWORD
54      { NULL, i::Token::IDENTIFIER }
55  };
56
57  KeywordToken key_token;
58  i::UnicodeCache unicode_cache;
59  i::byte buffer[32];
60  for (int i = 0; (key_token = keywords[i]).keyword != NULL; i++) {
61    const i::byte* keyword =
62        reinterpret_cast<const i::byte*>(key_token.keyword);
63    int length = i::StrLength(key_token.keyword);
64    CHECK(static_cast<int>(sizeof(buffer)) >= length);
65    {
66      i::Utf8ToUtf16CharacterStream stream(keyword, length);
67      i::Scanner scanner(&unicode_cache);
68      // The scanner should parse Harmony keywords for this test.
69      scanner.SetHarmonyScoping(true);
70      scanner.SetHarmonyModules(true);
71      scanner.Initialize(&stream);
72      CHECK_EQ(key_token.token, scanner.Next());
73      CHECK_EQ(i::Token::EOS, scanner.Next());
74    }
75    // Removing characters will make keyword matching fail.
76    {
77      i::Utf8ToUtf16CharacterStream stream(keyword, length - 1);
78      i::Scanner scanner(&unicode_cache);
79      scanner.Initialize(&stream);
80      CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
81      CHECK_EQ(i::Token::EOS, scanner.Next());
82    }
83    // Adding characters will make keyword matching fail.
84    static const char chars_to_append[] = { 'z', '0', '_' };
85    for (int j = 0; j < static_cast<int>(ARRAY_SIZE(chars_to_append)); ++j) {
86      i::OS::MemMove(buffer, keyword, length);
87      buffer[length] = chars_to_append[j];
88      i::Utf8ToUtf16CharacterStream stream(buffer, length + 1);
89      i::Scanner scanner(&unicode_cache);
90      scanner.Initialize(&stream);
91      CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
92      CHECK_EQ(i::Token::EOS, scanner.Next());
93    }
94    // Replacing characters will make keyword matching fail.
95    {
96      i::OS::MemMove(buffer, keyword, length);
97      buffer[length - 1] = '_';
98      i::Utf8ToUtf16CharacterStream stream(buffer, length);
99      i::Scanner scanner(&unicode_cache);
100      scanner.Initialize(&stream);
101      CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
102      CHECK_EQ(i::Token::EOS, scanner.Next());
103    }
104  }
105}
106
107
108TEST(ScanHTMLEndComments) {
109  v8::V8::Initialize();
110  v8::Isolate* isolate = CcTest::isolate();
111
112  // Regression test. See:
113  //    http://code.google.com/p/chromium/issues/detail?id=53548
114  // Tests that --> is correctly interpreted as comment-to-end-of-line if there
115  // is only whitespace before it on the line (with comments considered as
116  // whitespace, even a multiline-comment containing a newline).
117  // This was not the case if it occurred before the first real token
118  // in the input.
119  const char* tests[] = {
120      // Before first real token.
121      "--> is eol-comment\nvar y = 37;\n",
122      "\n --> is eol-comment\nvar y = 37;\n",
123      "/* precomment */ --> is eol-comment\nvar y = 37;\n",
124      "\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
125      // After first real token.
126      "var x = 42;\n--> is eol-comment\nvar y = 37;\n",
127      "var x = 42;\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
128      NULL
129  };
130
131  const char* fail_tests[] = {
132      "x --> is eol-comment\nvar y = 37;\n",
133      "\"\\n\" --> is eol-comment\nvar y = 37;\n",
134      "x/* precomment */ --> is eol-comment\nvar y = 37;\n",
135      "x/* precomment\n */ --> is eol-comment\nvar y = 37;\n",
136      "var x = 42; --> is eol-comment\nvar y = 37;\n",
137      "var x = 42; /* precomment\n */ --> is eol-comment\nvar y = 37;\n",
138      NULL
139  };
140
141  // Parser/Scanner needs a stack limit.
142  int marker;
143  CcTest::i_isolate()->stack_guard()->SetStackLimit(
144      reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
145
146  for (int i = 0; tests[i]; i++) {
147    v8::ScriptData* data =
148        v8::ScriptData::PreCompile(isolate, tests[i], i::StrLength(tests[i]));
149    CHECK(data != NULL && !data->HasError());
150    delete data;
151  }
152
153  for (int i = 0; fail_tests[i]; i++) {
154    v8::ScriptData* data = v8::ScriptData::PreCompile(
155        isolate, fail_tests[i], i::StrLength(fail_tests[i]));
156    CHECK(data == NULL || data->HasError());
157    delete data;
158  }
159}
160
161
162class ScriptResource : public v8::String::ExternalAsciiStringResource {
163 public:
164  ScriptResource(const char* data, size_t length)
165      : data_(data), length_(length) { }
166
167  const char* data() const { return data_; }
168  size_t length() const { return length_; }
169
170 private:
171  const char* data_;
172  size_t length_;
173};
174
175
176TEST(Preparsing) {
177  v8::Isolate* isolate = CcTest::isolate();
178  v8::HandleScope handles(isolate);
179  v8::Local<v8::Context> context = v8::Context::New(isolate);
180  v8::Context::Scope context_scope(context);
181  int marker;
182  CcTest::i_isolate()->stack_guard()->SetStackLimit(
183      reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
184
185  // Source containing functions that might be lazily compiled  and all types
186  // of symbols (string, propertyName, regexp).
187  const char* source =
188      "var x = 42;"
189      "function foo(a) { return function nolazy(b) { return a + b; } }"
190      "function bar(a) { if (a) return function lazy(b) { return b; } }"
191      "var z = {'string': 'string literal', bareword: 'propertyName', "
192      "         42: 'number literal', for: 'keyword as propertyName', "
193      "         f\\u006fr: 'keyword propertyname with escape'};"
194      "var v = /RegExp Literal/;"
195      "var w = /RegExp Literal\\u0020With Escape/gin;"
196      "var y = { get getter() { return 42; }, "
197      "          set setter(v) { this.value = v; }};";
198  int source_length = i::StrLength(source);
199  const char* error_source = "var x = y z;";
200  int error_source_length = i::StrLength(error_source);
201
202  v8::ScriptData* preparse =
203      v8::ScriptData::PreCompile(isolate, source, source_length);
204  CHECK(!preparse->HasError());
205  bool lazy_flag = i::FLAG_lazy;
206  {
207    i::FLAG_lazy = true;
208    ScriptResource* resource = new ScriptResource(source, source_length);
209    v8::Local<v8::String> script_source =
210        v8::String::NewExternal(isolate, resource);
211    v8::Script::Compile(script_source, NULL, preparse);
212  }
213
214  {
215    i::FLAG_lazy = false;
216
217    ScriptResource* resource = new ScriptResource(source, source_length);
218    v8::Local<v8::String> script_source =
219        v8::String::NewExternal(isolate, resource);
220    v8::Script::New(script_source, NULL, preparse, v8::Local<v8::String>());
221  }
222  delete preparse;
223  i::FLAG_lazy = lazy_flag;
224
225  // Syntax error.
226  v8::ScriptData* error_preparse =
227      v8::ScriptData::PreCompile(isolate, error_source, error_source_length);
228  CHECK(error_preparse->HasError());
229  i::ScriptDataImpl *pre_impl =
230      reinterpret_cast<i::ScriptDataImpl*>(error_preparse);
231  i::Scanner::Location error_location =
232      pre_impl->MessageLocation();
233  // Error is at "z" in source, location 10..11.
234  CHECK_EQ(10, error_location.beg_pos);
235  CHECK_EQ(11, error_location.end_pos);
236  // Should not crash.
237  const char* message = pre_impl->BuildMessage();
238  pre_impl->BuildArgs();
239  CHECK_GT(strlen(message), 0);
240}
241
242
243TEST(StandAlonePreParser) {
244  v8::V8::Initialize();
245
246  int marker;
247  CcTest::i_isolate()->stack_guard()->SetStackLimit(
248      reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
249
250  const char* programs[] = {
251      "{label: 42}",
252      "var x = 42;",
253      "function foo(x, y) { return x + y; }",
254      "%ArgleBargle(glop);",
255      "var x = new new Function('this.x = 42');",
256      NULL
257  };
258
259  uintptr_t stack_limit = CcTest::i_isolate()->stack_guard()->real_climit();
260  for (int i = 0; programs[i]; i++) {
261    const char* program = programs[i];
262    i::Utf8ToUtf16CharacterStream stream(
263        reinterpret_cast<const i::byte*>(program),
264        static_cast<unsigned>(strlen(program)));
265    i::CompleteParserRecorder log;
266    i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
267    scanner.Initialize(&stream);
268
269    i::PreParser preparser(&scanner, &log, stack_limit);
270    preparser.set_allow_lazy(true);
271    preparser.set_allow_natives_syntax(true);
272    i::PreParser::PreParseResult result = preparser.PreParseProgram();
273    CHECK_EQ(i::PreParser::kPreParseSuccess, result);
274    i::ScriptDataImpl data(log.ExtractData());
275    CHECK(!data.has_error());
276  }
277}
278
279
280TEST(StandAlonePreParserNoNatives) {
281  v8::V8::Initialize();
282
283  int marker;
284  CcTest::i_isolate()->stack_guard()->SetStackLimit(
285      reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
286
287  const char* programs[] = {
288      "%ArgleBargle(glop);",
289      "var x = %_IsSmi(42);",
290      NULL
291  };
292
293  uintptr_t stack_limit = CcTest::i_isolate()->stack_guard()->real_climit();
294  for (int i = 0; programs[i]; i++) {
295    const char* program = programs[i];
296    i::Utf8ToUtf16CharacterStream stream(
297        reinterpret_cast<const i::byte*>(program),
298        static_cast<unsigned>(strlen(program)));
299    i::CompleteParserRecorder log;
300    i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
301    scanner.Initialize(&stream);
302
303    // Preparser defaults to disallowing natives syntax.
304    i::PreParser preparser(&scanner, &log, stack_limit);
305    preparser.set_allow_lazy(true);
306    i::PreParser::PreParseResult result = preparser.PreParseProgram();
307    CHECK_EQ(i::PreParser::kPreParseSuccess, result);
308    i::ScriptDataImpl data(log.ExtractData());
309    // Data contains syntax error.
310    CHECK(data.has_error());
311  }
312}
313
314
315TEST(RegressChromium62639) {
316  v8::V8::Initialize();
317  i::Isolate* isolate = CcTest::i_isolate();
318
319  int marker;
320  isolate->stack_guard()->SetStackLimit(
321      reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
322
323  const char* program = "var x = 'something';\n"
324                        "escape: function() {}";
325  // Fails parsing expecting an identifier after "function".
326  // Before fix, didn't check *ok after Expect(Token::Identifier, ok),
327  // and then used the invalid currently scanned literal. This always
328  // failed in debug mode, and sometimes crashed in release mode.
329
330  i::Utf8ToUtf16CharacterStream stream(
331      reinterpret_cast<const i::byte*>(program),
332      static_cast<unsigned>(strlen(program)));
333  i::ScriptDataImpl* data = i::PreParserApi::PreParse(isolate, &stream);
334  CHECK(data->HasError());
335  delete data;
336}
337
338
339TEST(Regress928) {
340  v8::V8::Initialize();
341  i::Isolate* isolate = CcTest::i_isolate();
342  i::Factory* factory = isolate->factory();
343
344  // Preparsing didn't consider the catch clause of a try statement
345  // as with-content, which made it assume that a function inside
346  // the block could be lazily compiled, and an extra, unexpected,
347  // entry was added to the data.
348  int marker;
349  isolate->stack_guard()->SetStackLimit(
350      reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
351
352  const char* program =
353      "try { } catch (e) { var foo = function () { /* first */ } }"
354      "var bar = function () { /* second */ }";
355
356  v8::HandleScope handles(CcTest::isolate());
357  i::Handle<i::String> source(
358      factory->NewStringFromAscii(i::CStrVector(program)));
359  i::GenericStringUtf16CharacterStream stream(source, 0, source->length());
360  i::ScriptDataImpl* data = i::PreParserApi::PreParse(isolate, &stream);
361  CHECK(!data->HasError());
362
363  data->Initialize();
364
365  int first_function =
366      static_cast<int>(strstr(program, "function") - program);
367  int first_lbrace = first_function + i::StrLength("function () ");
368  CHECK_EQ('{', program[first_lbrace]);
369  i::FunctionEntry entry1 = data->GetFunctionEntry(first_lbrace);
370  CHECK(!entry1.is_valid());
371
372  int second_function =
373      static_cast<int>(strstr(program + first_lbrace, "function") - program);
374  int second_lbrace =
375      second_function + i::StrLength("function () ");
376  CHECK_EQ('{', program[second_lbrace]);
377  i::FunctionEntry entry2 = data->GetFunctionEntry(second_lbrace);
378  CHECK(entry2.is_valid());
379  CHECK_EQ('}', program[entry2.end_pos() - 1]);
380  delete data;
381}
382
383
384TEST(PreParseOverflow) {
385  v8::V8::Initialize();
386
387  int marker;
388  CcTest::i_isolate()->stack_guard()->SetStackLimit(
389      reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
390
391  size_t kProgramSize = 1024 * 1024;
392  i::SmartArrayPointer<char> program(i::NewArray<char>(kProgramSize + 1));
393  memset(*program, '(', kProgramSize);
394  program[kProgramSize] = '\0';
395
396  uintptr_t stack_limit = CcTest::i_isolate()->stack_guard()->real_climit();
397
398  i::Utf8ToUtf16CharacterStream stream(
399      reinterpret_cast<const i::byte*>(*program),
400      static_cast<unsigned>(kProgramSize));
401  i::CompleteParserRecorder log;
402  i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
403  scanner.Initialize(&stream);
404
405  i::PreParser preparser(&scanner, &log, stack_limit);
406  preparser.set_allow_lazy(true);
407  i::PreParser::PreParseResult result = preparser.PreParseProgram();
408  CHECK_EQ(i::PreParser::kPreParseStackOverflow, result);
409}
410
411
412class TestExternalResource: public v8::String::ExternalStringResource {
413 public:
414  explicit TestExternalResource(uint16_t* data, int length)
415      : data_(data), length_(static_cast<size_t>(length)) { }
416
417  ~TestExternalResource() { }
418
419  const uint16_t* data() const {
420    return data_;
421  }
422
423  size_t length() const {
424    return length_;
425  }
426 private:
427  uint16_t* data_;
428  size_t length_;
429};
430
431
432#define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
433
434void TestCharacterStream(const char* ascii_source,
435                         unsigned length,
436                         unsigned start = 0,
437                         unsigned end = 0) {
438  if (end == 0) end = length;
439  unsigned sub_length = end - start;
440  i::Isolate* isolate = CcTest::i_isolate();
441  i::Factory* factory = isolate->factory();
442  i::HandleScope test_scope(isolate);
443  i::SmartArrayPointer<i::uc16> uc16_buffer(new i::uc16[length]);
444  for (unsigned i = 0; i < length; i++) {
445    uc16_buffer[i] = static_cast<i::uc16>(ascii_source[i]);
446  }
447  i::Vector<const char> ascii_vector(ascii_source, static_cast<int>(length));
448  i::Handle<i::String> ascii_string(
449      factory->NewStringFromAscii(ascii_vector));
450  TestExternalResource resource(*uc16_buffer, length);
451  i::Handle<i::String> uc16_string(
452      factory->NewExternalStringFromTwoByte(&resource));
453
454  i::ExternalTwoByteStringUtf16CharacterStream uc16_stream(
455      i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);
456  i::GenericStringUtf16CharacterStream string_stream(ascii_string, start, end);
457  i::Utf8ToUtf16CharacterStream utf8_stream(
458      reinterpret_cast<const i::byte*>(ascii_source), end);
459  utf8_stream.SeekForward(start);
460
461  unsigned i = start;
462  while (i < end) {
463    // Read streams one char at a time
464    CHECK_EQU(i, uc16_stream.pos());
465    CHECK_EQU(i, string_stream.pos());
466    CHECK_EQU(i, utf8_stream.pos());
467    int32_t c0 = ascii_source[i];
468    int32_t c1 = uc16_stream.Advance();
469    int32_t c2 = string_stream.Advance();
470    int32_t c3 = utf8_stream.Advance();
471    i++;
472    CHECK_EQ(c0, c1);
473    CHECK_EQ(c0, c2);
474    CHECK_EQ(c0, c3);
475    CHECK_EQU(i, uc16_stream.pos());
476    CHECK_EQU(i, string_stream.pos());
477    CHECK_EQU(i, utf8_stream.pos());
478  }
479  while (i > start + sub_length / 4) {
480    // Pushback, re-read, pushback again.
481    int32_t c0 = ascii_source[i - 1];
482    CHECK_EQU(i, uc16_stream.pos());
483    CHECK_EQU(i, string_stream.pos());
484    CHECK_EQU(i, utf8_stream.pos());
485    uc16_stream.PushBack(c0);
486    string_stream.PushBack(c0);
487    utf8_stream.PushBack(c0);
488    i--;
489    CHECK_EQU(i, uc16_stream.pos());
490    CHECK_EQU(i, string_stream.pos());
491    CHECK_EQU(i, utf8_stream.pos());
492    int32_t c1 = uc16_stream.Advance();
493    int32_t c2 = string_stream.Advance();
494    int32_t c3 = utf8_stream.Advance();
495    i++;
496    CHECK_EQU(i, uc16_stream.pos());
497    CHECK_EQU(i, string_stream.pos());
498    CHECK_EQU(i, utf8_stream.pos());
499    CHECK_EQ(c0, c1);
500    CHECK_EQ(c0, c2);
501    CHECK_EQ(c0, c3);
502    uc16_stream.PushBack(c0);
503    string_stream.PushBack(c0);
504    utf8_stream.PushBack(c0);
505    i--;
506    CHECK_EQU(i, uc16_stream.pos());
507    CHECK_EQU(i, string_stream.pos());
508    CHECK_EQU(i, utf8_stream.pos());
509  }
510  unsigned halfway = start + sub_length / 2;
511  uc16_stream.SeekForward(halfway - i);
512  string_stream.SeekForward(halfway - i);
513  utf8_stream.SeekForward(halfway - i);
514  i = halfway;
515  CHECK_EQU(i, uc16_stream.pos());
516  CHECK_EQU(i, string_stream.pos());
517  CHECK_EQU(i, utf8_stream.pos());
518
519  while (i < end) {
520    // Read streams one char at a time
521    CHECK_EQU(i, uc16_stream.pos());
522    CHECK_EQU(i, string_stream.pos());
523    CHECK_EQU(i, utf8_stream.pos());
524    int32_t c0 = ascii_source[i];
525    int32_t c1 = uc16_stream.Advance();
526    int32_t c2 = string_stream.Advance();
527    int32_t c3 = utf8_stream.Advance();
528    i++;
529    CHECK_EQ(c0, c1);
530    CHECK_EQ(c0, c2);
531    CHECK_EQ(c0, c3);
532    CHECK_EQU(i, uc16_stream.pos());
533    CHECK_EQU(i, string_stream.pos());
534    CHECK_EQU(i, utf8_stream.pos());
535  }
536
537  int32_t c1 = uc16_stream.Advance();
538  int32_t c2 = string_stream.Advance();
539  int32_t c3 = utf8_stream.Advance();
540  CHECK_LT(c1, 0);
541  CHECK_LT(c2, 0);
542  CHECK_LT(c3, 0);
543}
544
545
546TEST(CharacterStreams) {
547  v8::Isolate* isolate = CcTest::isolate();
548  v8::HandleScope handles(isolate);
549  v8::Local<v8::Context> context = v8::Context::New(isolate);
550  v8::Context::Scope context_scope(context);
551
552  TestCharacterStream("abc\0\n\r\x7f", 7);
553  static const unsigned kBigStringSize = 4096;
554  char buffer[kBigStringSize + 1];
555  for (unsigned i = 0; i < kBigStringSize; i++) {
556    buffer[i] = static_cast<char>(i & 0x7f);
557  }
558  TestCharacterStream(buffer, kBigStringSize);
559
560  TestCharacterStream(buffer, kBigStringSize, 576, 3298);
561
562  TestCharacterStream("\0", 1);
563  TestCharacterStream("", 0);
564}
565
566
567TEST(Utf8CharacterStream) {
568  static const unsigned kMaxUC16CharU = unibrow::Utf8::kMaxThreeByteChar;
569  static const int kMaxUC16Char = static_cast<int>(kMaxUC16CharU);
570
571  static const int kAllUtf8CharsSize =
572      (unibrow::Utf8::kMaxOneByteChar + 1) +
573      (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 +
574      (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3;
575  static const unsigned kAllUtf8CharsSizeU =
576      static_cast<unsigned>(kAllUtf8CharsSize);
577
578  char buffer[kAllUtf8CharsSizeU];
579  unsigned cursor = 0;
580  for (int i = 0; i <= kMaxUC16Char; i++) {
581    cursor += unibrow::Utf8::Encode(buffer + cursor,
582                                    i,
583                                    unibrow::Utf16::kNoPreviousCharacter);
584  }
585  ASSERT(cursor == kAllUtf8CharsSizeU);
586
587  i::Utf8ToUtf16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),
588                                       kAllUtf8CharsSizeU);
589  for (int i = 0; i <= kMaxUC16Char; i++) {
590    CHECK_EQU(i, stream.pos());
591    int32_t c = stream.Advance();
592    CHECK_EQ(i, c);
593    CHECK_EQU(i + 1, stream.pos());
594  }
595  for (int i = kMaxUC16Char; i >= 0; i--) {
596    CHECK_EQU(i + 1, stream.pos());
597    stream.PushBack(i);
598    CHECK_EQU(i, stream.pos());
599  }
600  int i = 0;
601  while (stream.pos() < kMaxUC16CharU) {
602    CHECK_EQU(i, stream.pos());
603    unsigned progress = stream.SeekForward(12);
604    i += progress;
605    int32_t c = stream.Advance();
606    if (i <= kMaxUC16Char) {
607      CHECK_EQ(i, c);
608    } else {
609      CHECK_EQ(-1, c);
610    }
611    i += 1;
612    CHECK_EQU(i, stream.pos());
613  }
614}
615
616#undef CHECK_EQU
617
618void TestStreamScanner(i::Utf16CharacterStream* stream,
619                       i::Token::Value* expected_tokens,
620                       int skip_pos = 0,  // Zero means not skipping.
621                       int skip_to = 0) {
622  i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
623  scanner.Initialize(stream);
624
625  int i = 0;
626  do {
627    i::Token::Value expected = expected_tokens[i];
628    i::Token::Value actual = scanner.Next();
629    CHECK_EQ(i::Token::String(expected), i::Token::String(actual));
630    if (scanner.location().end_pos == skip_pos) {
631      scanner.SeekForward(skip_to);
632    }
633    i++;
634  } while (expected_tokens[i] != i::Token::ILLEGAL);
635}
636
637
638TEST(StreamScanner) {
639  v8::V8::Initialize();
640
641  const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
642  i::Utf8ToUtf16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),
643                                        static_cast<unsigned>(strlen(str1)));
644  i::Token::Value expectations1[] = {
645      i::Token::LBRACE,
646      i::Token::IDENTIFIER,
647      i::Token::IDENTIFIER,
648      i::Token::FOR,
649      i::Token::COLON,
650      i::Token::MUL,
651      i::Token::DIV,
652      i::Token::LT,
653      i::Token::SUB,
654      i::Token::IDENTIFIER,
655      i::Token::EOS,
656      i::Token::ILLEGAL
657  };
658  TestStreamScanner(&stream1, expectations1, 0, 0);
659
660  const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
661  i::Utf8ToUtf16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),
662                                        static_cast<unsigned>(strlen(str2)));
663  i::Token::Value expectations2[] = {
664      i::Token::CASE,
665      i::Token::DEFAULT,
666      i::Token::CONST,
667      i::Token::LBRACE,
668      // Skipped part here
669      i::Token::RBRACE,
670      i::Token::DO,
671      i::Token::EOS,
672      i::Token::ILLEGAL
673  };
674  ASSERT_EQ('{', str2[19]);
675  ASSERT_EQ('}', str2[37]);
676  TestStreamScanner(&stream2, expectations2, 20, 37);
677
678  const char* str3 = "{}}}}";
679  i::Token::Value expectations3[] = {
680      i::Token::LBRACE,
681      i::Token::RBRACE,
682      i::Token::RBRACE,
683      i::Token::RBRACE,
684      i::Token::RBRACE,
685      i::Token::EOS,
686      i::Token::ILLEGAL
687  };
688  // Skip zero-four RBRACEs.
689  for (int i = 0; i <= 4; i++) {
690     expectations3[6 - i] = i::Token::ILLEGAL;
691     expectations3[5 - i] = i::Token::EOS;
692     i::Utf8ToUtf16CharacterStream stream3(
693         reinterpret_cast<const i::byte*>(str3),
694         static_cast<unsigned>(strlen(str3)));
695     TestStreamScanner(&stream3, expectations3, 1, 1 + i);
696  }
697}
698
699
700void TestScanRegExp(const char* re_source, const char* expected) {
701  i::Utf8ToUtf16CharacterStream stream(
702       reinterpret_cast<const i::byte*>(re_source),
703       static_cast<unsigned>(strlen(re_source)));
704  i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
705  scanner.Initialize(&stream);
706
707  i::Token::Value start = scanner.peek();
708  CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV);
709  CHECK(scanner.ScanRegExpPattern(start == i::Token::ASSIGN_DIV));
710  scanner.Next();  // Current token is now the regexp literal.
711  CHECK(scanner.is_literal_ascii());
712  i::Vector<const char> actual = scanner.literal_ascii_string();
713  for (int i = 0; i < actual.length(); i++) {
714    CHECK_NE('\0', expected[i]);
715    CHECK_EQ(expected[i], actual[i]);
716  }
717}
718
719
720TEST(RegExpScanning) {
721  v8::V8::Initialize();
722
723  // RegExp token with added garbage at the end. The scanner should only
724  // scan the RegExp until the terminating slash just before "flipperwald".
725  TestScanRegExp("/b/flipperwald", "b");
726  // Incomplete escape sequences doesn't hide the terminating slash.
727  TestScanRegExp("/\\x/flipperwald", "\\x");
728  TestScanRegExp("/\\u/flipperwald", "\\u");
729  TestScanRegExp("/\\u1/flipperwald", "\\u1");
730  TestScanRegExp("/\\u12/flipperwald", "\\u12");
731  TestScanRegExp("/\\u123/flipperwald", "\\u123");
732  TestScanRegExp("/\\c/flipperwald", "\\c");
733  TestScanRegExp("/\\c//flipperwald", "\\c");
734  // Slashes inside character classes are not terminating.
735  TestScanRegExp("/[/]/flipperwald", "[/]");
736  TestScanRegExp("/[\\s-/]/flipperwald", "[\\s-/]");
737  // Incomplete escape sequences inside a character class doesn't hide
738  // the end of the character class.
739  TestScanRegExp("/[\\c/]/flipperwald", "[\\c/]");
740  TestScanRegExp("/[\\c]/flipperwald", "[\\c]");
741  TestScanRegExp("/[\\x]/flipperwald", "[\\x]");
742  TestScanRegExp("/[\\x1]/flipperwald", "[\\x1]");
743  TestScanRegExp("/[\\u]/flipperwald", "[\\u]");
744  TestScanRegExp("/[\\u1]/flipperwald", "[\\u1]");
745  TestScanRegExp("/[\\u12]/flipperwald", "[\\u12]");
746  TestScanRegExp("/[\\u123]/flipperwald", "[\\u123]");
747  // Escaped ']'s wont end the character class.
748  TestScanRegExp("/[\\]/]/flipperwald", "[\\]/]");
749  // Escaped slashes are not terminating.
750  TestScanRegExp("/\\//flipperwald", "\\/");
751  // Starting with '=' works too.
752  TestScanRegExp("/=/", "=");
753  TestScanRegExp("/=?/", "=?");
754}
755
756
757static int Utf8LengthHelper(const char* s) {
758  int len = i::StrLength(s);
759  int character_length = len;
760  for (int i = 0; i < len; i++) {
761    unsigned char c = s[i];
762    int input_offset = 0;
763    int output_adjust = 0;
764    if (c > 0x7f) {
765      if (c < 0xc0) continue;
766      if (c >= 0xf0) {
767        if (c >= 0xf8) {
768          // 5 and 6 byte UTF-8 sequences turn into a kBadChar for each UTF-8
769          // byte.
770          continue;  // Handle first UTF-8 byte.
771        }
772        if ((c & 7) == 0 && ((s[i + 1] & 0x30) == 0)) {
773          // This 4 byte sequence could have been coded as a 3 byte sequence.
774          // Record a single kBadChar for the first byte and continue.
775          continue;
776        }
777        input_offset = 3;
778        // 4 bytes of UTF-8 turn into 2 UTF-16 code units.
779        character_length -= 2;
780      } else if (c >= 0xe0) {
781        if ((c & 0xf) == 0 && ((s[i + 1] & 0x20) == 0)) {
782          // This 3 byte sequence could have been coded as a 2 byte sequence.
783          // Record a single kBadChar for the first byte and continue.
784          continue;
785        }
786        input_offset = 2;
787        // 3 bytes of UTF-8 turn into 1 UTF-16 code unit.
788        output_adjust = 2;
789      } else {
790        if ((c & 0x1e) == 0) {
791          // This 2 byte sequence could have been coded as a 1 byte sequence.
792          // Record a single kBadChar for the first byte and continue.
793          continue;
794        }
795        input_offset = 1;
796        // 2 bytes of UTF-8 turn into 1 UTF-16 code unit.
797        output_adjust = 1;
798      }
799      bool bad = false;
800      for (int j = 1; j <= input_offset; j++) {
801        if ((s[i + j] & 0xc0) != 0x80) {
802          // Bad UTF-8 sequence turns the first in the sequence into kBadChar,
803          // which is a single UTF-16 code unit.
804          bad = true;
805          break;
806        }
807      }
808      if (!bad) {
809        i += input_offset;
810        character_length -= output_adjust;
811      }
812    }
813  }
814  return character_length;
815}
816
817
818TEST(ScopePositions) {
819  // Test the parser for correctly setting the start and end positions
820  // of a scope. We check the scope positions of exactly one scope
821  // nested in the global scope of a program. 'inner source' is the
822  // source code that determines the part of the source belonging
823  // to the nested scope. 'outer_prefix' and 'outer_suffix' are
824  // parts of the source that belong to the global scope.
825  struct SourceData {
826    const char* outer_prefix;
827    const char* inner_source;
828    const char* outer_suffix;
829    i::ScopeType scope_type;
830    i::LanguageMode language_mode;
831  };
832
833  const SourceData source_data[] = {
834    { "  with ({}) ", "{ block; }", " more;", i::WITH_SCOPE, i::CLASSIC_MODE },
835    { "  with ({}) ", "{ block; }", "; more;", i::WITH_SCOPE, i::CLASSIC_MODE },
836    { "  with ({}) ", "{\n"
837      "    block;\n"
838      "  }", "\n"
839      "  more;", i::WITH_SCOPE, i::CLASSIC_MODE },
840    { "  with ({}) ", "statement;", " more;", i::WITH_SCOPE, i::CLASSIC_MODE },
841    { "  with ({}) ", "statement", "\n"
842      "  more;", i::WITH_SCOPE, i::CLASSIC_MODE },
843    { "  with ({})\n"
844      "    ", "statement;", "\n"
845      "  more;", i::WITH_SCOPE, i::CLASSIC_MODE },
846    { "  try {} catch ", "(e) { block; }", " more;",
847      i::CATCH_SCOPE, i::CLASSIC_MODE },
848    { "  try {} catch ", "(e) { block; }", "; more;",
849      i::CATCH_SCOPE, i::CLASSIC_MODE },
850    { "  try {} catch ", "(e) {\n"
851      "    block;\n"
852      "  }", "\n"
853      "  more;", i::CATCH_SCOPE, i::CLASSIC_MODE },
854    { "  try {} catch ", "(e) { block; }", " finally { block; } more;",
855      i::CATCH_SCOPE, i::CLASSIC_MODE },
856    { "  start;\n"
857      "  ", "{ let block; }", " more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
858    { "  start;\n"
859      "  ", "{ let block; }", "; more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
860    { "  start;\n"
861      "  ", "{\n"
862      "    let block;\n"
863      "  }", "\n"
864      "  more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
865    { "  start;\n"
866      "  function fun", "(a,b) { infunction; }", " more;",
867      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
868    { "  start;\n"
869      "  function fun", "(a,b) {\n"
870      "    infunction;\n"
871      "  }", "\n"
872      "  more;", i::FUNCTION_SCOPE, i::CLASSIC_MODE },
873    { "  (function fun", "(a,b) { infunction; }", ")();",
874      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
875    { "  for ", "(let x = 1 ; x < 10; ++ x) { block; }", " more;",
876      i::BLOCK_SCOPE, i::EXTENDED_MODE },
877    { "  for ", "(let x = 1 ; x < 10; ++ x) { block; }", "; more;",
878      i::BLOCK_SCOPE, i::EXTENDED_MODE },
879    { "  for ", "(let x = 1 ; x < 10; ++ x) {\n"
880      "    block;\n"
881      "  }", "\n"
882      "  more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
883    { "  for ", "(let x = 1 ; x < 10; ++ x) statement;", " more;",
884      i::BLOCK_SCOPE, i::EXTENDED_MODE },
885    { "  for ", "(let x = 1 ; x < 10; ++ x) statement", "\n"
886      "  more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
887    { "  for ", "(let x = 1 ; x < 10; ++ x)\n"
888      "    statement;", "\n"
889      "  more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
890    { "  for ", "(let x in {}) { block; }", " more;",
891      i::BLOCK_SCOPE, i::EXTENDED_MODE },
892    { "  for ", "(let x in {}) { block; }", "; more;",
893      i::BLOCK_SCOPE, i::EXTENDED_MODE },
894    { "  for ", "(let x in {}) {\n"
895      "    block;\n"
896      "  }", "\n"
897      "  more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
898    { "  for ", "(let x in {}) statement;", " more;",
899      i::BLOCK_SCOPE, i::EXTENDED_MODE },
900    { "  for ", "(let x in {}) statement", "\n"
901      "  more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
902    { "  for ", "(let x in {})\n"
903      "    statement;", "\n"
904      "  more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
905    // Check that 6-byte and 4-byte encodings of UTF-8 strings do not throw
906    // the preparser off in terms of byte offsets.
907    // 6 byte encoding.
908    { "  'foo\355\240\201\355\260\211';\n"
909      "  (function fun", "(a,b) { infunction; }", ")();",
910      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
911    // 4 byte encoding.
912    { "  'foo\360\220\220\212';\n"
913      "  (function fun", "(a,b) { infunction; }", ")();",
914      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
915    // 3 byte encoding of \u0fff.
916    { "  'foo\340\277\277';\n"
917      "  (function fun", "(a,b) { infunction; }", ")();",
918      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
919    // Broken 6 byte encoding with missing last byte.
920    { "  'foo\355\240\201\355\211';\n"
921      "  (function fun", "(a,b) { infunction; }", ")();",
922      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
923    // Broken 3 byte encoding of \u0fff with missing last byte.
924    { "  'foo\340\277';\n"
925      "  (function fun", "(a,b) { infunction; }", ")();",
926      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
927    // Broken 3 byte encoding of \u0fff with missing 2 last bytes.
928    { "  'foo\340';\n"
929      "  (function fun", "(a,b) { infunction; }", ")();",
930      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
931    // Broken 3 byte encoding of \u00ff should be a 2 byte encoding.
932    { "  'foo\340\203\277';\n"
933      "  (function fun", "(a,b) { infunction; }", ")();",
934      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
935    // Broken 3 byte encoding of \u007f should be a 2 byte encoding.
936    { "  'foo\340\201\277';\n"
937      "  (function fun", "(a,b) { infunction; }", ")();",
938      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
939    // Unpaired lead surrogate.
940    { "  'foo\355\240\201';\n"
941      "  (function fun", "(a,b) { infunction; }", ")();",
942      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
943    // Unpaired lead surrogate where following code point is a 3 byte sequence.
944    { "  'foo\355\240\201\340\277\277';\n"
945      "  (function fun", "(a,b) { infunction; }", ")();",
946      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
947    // Unpaired lead surrogate where following code point is a 4 byte encoding
948    // of a trail surrogate.
949    { "  'foo\355\240\201\360\215\260\211';\n"
950      "  (function fun", "(a,b) { infunction; }", ")();",
951      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
952    // Unpaired trail surrogate.
953    { "  'foo\355\260\211';\n"
954      "  (function fun", "(a,b) { infunction; }", ")();",
955      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
956    // 2 byte encoding of \u00ff.
957    { "  'foo\303\277';\n"
958      "  (function fun", "(a,b) { infunction; }", ")();",
959      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
960    // Broken 2 byte encoding of \u00ff with missing last byte.
961    { "  'foo\303';\n"
962      "  (function fun", "(a,b) { infunction; }", ")();",
963      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
964    // Broken 2 byte encoding of \u007f should be a 1 byte encoding.
965    { "  'foo\301\277';\n"
966      "  (function fun", "(a,b) { infunction; }", ")();",
967      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
968    // Illegal 5 byte encoding.
969    { "  'foo\370\277\277\277\277';\n"
970      "  (function fun", "(a,b) { infunction; }", ")();",
971      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
972    // Illegal 6 byte encoding.
973    { "  'foo\374\277\277\277\277\277';\n"
974      "  (function fun", "(a,b) { infunction; }", ")();",
975      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
976    // Illegal 0xfe byte
977    { "  'foo\376\277\277\277\277\277\277';\n"
978      "  (function fun", "(a,b) { infunction; }", ")();",
979      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
980    // Illegal 0xff byte
981    { "  'foo\377\277\277\277\277\277\277\277';\n"
982      "  (function fun", "(a,b) { infunction; }", ")();",
983      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
984    { "  'foo';\n"
985      "  (function fun", "(a,b) { 'bar\355\240\201\355\260\213'; }", ")();",
986      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
987    { "  'foo';\n"
988      "  (function fun", "(a,b) { 'bar\360\220\220\214'; }", ")();",
989      i::FUNCTION_SCOPE, i::CLASSIC_MODE },
990    { NULL, NULL, NULL, i::EVAL_SCOPE, i::CLASSIC_MODE }
991  };
992
993  i::Isolate* isolate = CcTest::i_isolate();
994  i::Factory* factory = isolate->factory();
995
996  v8::HandleScope handles(CcTest::isolate());
997  v8::Handle<v8::Context> context = v8::Context::New(CcTest::isolate());
998  v8::Context::Scope context_scope(context);
999
1000  int marker;
1001  isolate->stack_guard()->SetStackLimit(
1002      reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
1003
1004  for (int i = 0; source_data[i].outer_prefix; i++) {
1005    int kPrefixLen = Utf8LengthHelper(source_data[i].outer_prefix);
1006    int kInnerLen = Utf8LengthHelper(source_data[i].inner_source);
1007    int kSuffixLen = Utf8LengthHelper(source_data[i].outer_suffix);
1008    int kPrefixByteLen = i::StrLength(source_data[i].outer_prefix);
1009    int kInnerByteLen = i::StrLength(source_data[i].inner_source);
1010    int kSuffixByteLen = i::StrLength(source_data[i].outer_suffix);
1011    int kProgramSize = kPrefixLen + kInnerLen + kSuffixLen;
1012    int kProgramByteSize = kPrefixByteLen + kInnerByteLen + kSuffixByteLen;
1013    i::Vector<char> program = i::Vector<char>::New(kProgramByteSize + 1);
1014    i::OS::SNPrintF(program, "%s%s%s",
1015                             source_data[i].outer_prefix,
1016                             source_data[i].inner_source,
1017                             source_data[i].outer_suffix);
1018
1019    // Parse program source.
1020    i::Handle<i::String> source(
1021        factory->NewStringFromUtf8(i::CStrVector(program.start())));
1022    CHECK_EQ(source->length(), kProgramSize);
1023    i::Handle<i::Script> script = factory->NewScript(source);
1024    i::CompilationInfoWithZone info(script);
1025    i::Parser parser(&info);
1026    parser.set_allow_lazy(true);
1027    parser.set_allow_harmony_scoping(true);
1028    info.MarkAsGlobal();
1029    info.SetLanguageMode(source_data[i].language_mode);
1030    parser.Parse();
1031    CHECK(info.function() != NULL);
1032
1033    // Check scope types and positions.
1034    i::Scope* scope = info.function()->scope();
1035    CHECK(scope->is_global_scope());
1036    CHECK_EQ(scope->start_position(), 0);
1037    CHECK_EQ(scope->end_position(), kProgramSize);
1038    CHECK_EQ(scope->inner_scopes()->length(), 1);
1039
1040    i::Scope* inner_scope = scope->inner_scopes()->at(0);
1041    CHECK_EQ(inner_scope->scope_type(), source_data[i].scope_type);
1042    CHECK_EQ(inner_scope->start_position(), kPrefixLen);
1043    // The end position of a token is one position after the last
1044    // character belonging to that token.
1045    CHECK_EQ(inner_scope->end_position(), kPrefixLen + kInnerLen);
1046  }
1047}
1048
1049
1050i::Handle<i::String> FormatMessage(i::ScriptDataImpl* data) {
1051  i::Isolate* isolate = CcTest::i_isolate();
1052  i::Factory* factory = isolate->factory();
1053  const char* message = data->BuildMessage();
1054  i::Handle<i::String> format = v8::Utils::OpenHandle(
1055      *v8::String::NewFromUtf8(CcTest::isolate(), message));
1056  i::Vector<const char*> args = data->BuildArgs();
1057  i::Handle<i::JSArray> args_array = factory->NewJSArray(args.length());
1058  for (int i = 0; i < args.length(); i++) {
1059    i::JSArray::SetElement(
1060        args_array, i, v8::Utils::OpenHandle(*v8::String::NewFromUtf8(
1061                                                  CcTest::isolate(), args[i])),
1062        NONE, i::kNonStrictMode);
1063  }
1064  i::Handle<i::JSObject> builtins(isolate->js_builtins_object());
1065  i::Handle<i::Object> format_fun =
1066      i::GetProperty(builtins, "FormatMessage");
1067  i::Handle<i::Object> arg_handles[] = { format, args_array };
1068  bool has_exception = false;
1069  i::Handle<i::Object> result = i::Execution::Call(
1070      isolate, format_fun, builtins, 2, arg_handles, &has_exception);
1071  CHECK(!has_exception);
1072  CHECK(result->IsString());
1073  for (int i = 0; i < args.length(); i++) {
1074    i::DeleteArray(args[i]);
1075  }
1076  i::DeleteArray(args.start());
1077  i::DeleteArray(message);
1078  return i::Handle<i::String>::cast(result);
1079}
1080
1081
1082enum ParserFlag {
1083  kAllowLazy,
1084  kAllowNativesSyntax,
1085  kAllowHarmonyScoping,
1086  kAllowModules,
1087  kAllowGenerators,
1088  kAllowForOf,
1089  kAllowHarmonyNumericLiterals
1090};
1091
1092
1093void SetParserFlags(i::ParserBase* parser, i::EnumSet<ParserFlag> flags) {
1094  parser->set_allow_lazy(flags.Contains(kAllowLazy));
1095  parser->set_allow_natives_syntax(flags.Contains(kAllowNativesSyntax));
1096  parser->set_allow_harmony_scoping(flags.Contains(kAllowHarmonyScoping));
1097  parser->set_allow_modules(flags.Contains(kAllowModules));
1098  parser->set_allow_generators(flags.Contains(kAllowGenerators));
1099  parser->set_allow_for_of(flags.Contains(kAllowForOf));
1100  parser->set_allow_harmony_numeric_literals(
1101      flags.Contains(kAllowHarmonyNumericLiterals));
1102}
1103
1104
1105void TestParserSyncWithFlags(i::Handle<i::String> source,
1106                             i::EnumSet<ParserFlag> flags) {
1107  i::Isolate* isolate = CcTest::i_isolate();
1108  i::Factory* factory = isolate->factory();
1109
1110  uintptr_t stack_limit = isolate->stack_guard()->real_climit();
1111
1112  // Preparse the data.
1113  i::CompleteParserRecorder log;
1114  {
1115    i::Scanner scanner(isolate->unicode_cache());
1116    i::GenericStringUtf16CharacterStream stream(source, 0, source->length());
1117    i::PreParser preparser(&scanner, &log, stack_limit);
1118    SetParserFlags(&preparser, flags);
1119    scanner.Initialize(&stream);
1120    i::PreParser::PreParseResult result = preparser.PreParseProgram();
1121    CHECK_EQ(i::PreParser::kPreParseSuccess, result);
1122  }
1123  i::ScriptDataImpl data(log.ExtractData());
1124
1125  // Parse the data
1126  i::FunctionLiteral* function;
1127  {
1128    i::Handle<i::Script> script = factory->NewScript(source);
1129    i::CompilationInfoWithZone info(script);
1130    i::Parser parser(&info);
1131    SetParserFlags(&parser, flags);
1132    info.MarkAsGlobal();
1133    parser.Parse();
1134    function = info.function();
1135  }
1136
1137  // Check that preparsing fails iff parsing fails.
1138  if (function == NULL) {
1139    // Extract exception from the parser.
1140    CHECK(isolate->has_pending_exception());
1141    i::MaybeObject* maybe_object = isolate->pending_exception();
1142    i::JSObject* exception = NULL;
1143    CHECK(maybe_object->To(&exception));
1144    i::Handle<i::JSObject> exception_handle(exception);
1145    i::Handle<i::String> message_string =
1146        i::Handle<i::String>::cast(i::GetProperty(exception_handle, "message"));
1147
1148    if (!data.has_error()) {
1149      i::OS::Print(
1150          "Parser failed on:\n"
1151          "\t%s\n"
1152          "with error:\n"
1153          "\t%s\n"
1154          "However, the preparser succeeded",
1155          *source->ToCString(), *message_string->ToCString());
1156      CHECK(false);
1157    }
1158    // Check that preparser and parser produce the same error.
1159    i::Handle<i::String> preparser_message = FormatMessage(&data);
1160    if (!message_string->Equals(*preparser_message)) {
1161      i::OS::Print(
1162          "Expected parser and preparser to produce the same error on:\n"
1163          "\t%s\n"
1164          "However, found the following error messages\n"
1165          "\tparser:    %s\n"
1166          "\tpreparser: %s\n",
1167          *source->ToCString(),
1168          *message_string->ToCString(),
1169          *preparser_message->ToCString());
1170      CHECK(false);
1171    }
1172  } else if (data.has_error()) {
1173    i::OS::Print(
1174        "Preparser failed on:\n"
1175        "\t%s\n"
1176        "with error:\n"
1177        "\t%s\n"
1178        "However, the parser succeeded",
1179        *source->ToCString(), *FormatMessage(&data)->ToCString());
1180    CHECK(false);
1181  }
1182}
1183
1184
1185void TestParserSync(const char* source,
1186                    const ParserFlag* flag_list,
1187                    size_t flag_list_length) {
1188  i::Handle<i::String> str =
1189      CcTest::i_isolate()->factory()->NewStringFromAscii(i::CStrVector(source));
1190  for (int bits = 0; bits < (1 << flag_list_length); bits++) {
1191    i::EnumSet<ParserFlag> flags;
1192    for (size_t flag_index = 0; flag_index < flag_list_length; flag_index++) {
1193      if ((bits & (1 << flag_index)) != 0) flags.Add(flag_list[flag_index]);
1194    }
1195    TestParserSyncWithFlags(str, flags);
1196  }
1197}
1198
1199
1200TEST(ParserSync) {
1201  const char* context_data[][2] = {
1202    { "", "" },
1203    { "{", "}" },
1204    { "if (true) ", " else {}" },
1205    { "if (true) {} else ", "" },
1206    { "if (true) ", "" },
1207    { "do ", " while (false)" },
1208    { "while (false) ", "" },
1209    { "for (;;) ", "" },
1210    { "with ({})", "" },
1211    { "switch (12) { case 12: ", "}" },
1212    { "switch (12) { default: ", "}" },
1213    { "switch (12) { ", "case 12: }" },
1214    { "label2: ", "" },
1215    { NULL, NULL }
1216  };
1217
1218  const char* statement_data[] = {
1219    "{}",
1220    "var x",
1221    "var x = 1",
1222    "const x",
1223    "const x = 1",
1224    ";",
1225    "12",
1226    "if (false) {} else ;",
1227    "if (false) {} else {}",
1228    "if (false) {} else 12",
1229    "if (false) ;"
1230    "if (false) {}",
1231    "if (false) 12",
1232    "do {} while (false)",
1233    "for (;;) ;",
1234    "for (;;) {}",
1235    "for (;;) 12",
1236    "continue",
1237    "continue label",
1238    "continue\nlabel",
1239    "break",
1240    "break label",
1241    "break\nlabel",
1242    "return",
1243    "return  12",
1244    "return\n12",
1245    "with ({}) ;",
1246    "with ({}) {}",
1247    "with ({}) 12",
1248    "switch ({}) { default: }"
1249    "label3: "
1250    "throw",
1251    "throw  12",
1252    "throw\n12",
1253    "try {} catch(e) {}",
1254    "try {} finally {}",
1255    "try {} catch(e) {} finally {}",
1256    "debugger",
1257    NULL
1258  };
1259
1260  const char* termination_data[] = {
1261    "",
1262    ";",
1263    "\n",
1264    ";\n",
1265    "\n;",
1266    NULL
1267  };
1268
1269  v8::HandleScope handles(CcTest::isolate());
1270  v8::Handle<v8::Context> context = v8::Context::New(CcTest::isolate());
1271  v8::Context::Scope context_scope(context);
1272
1273  int marker;
1274  CcTest::i_isolate()->stack_guard()->SetStackLimit(
1275      reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
1276
1277  static const ParserFlag flags1[] = {
1278    kAllowLazy, kAllowHarmonyScoping, kAllowModules, kAllowGenerators,
1279    kAllowForOf
1280  };
1281  for (int i = 0; context_data[i][0] != NULL; ++i) {
1282    for (int j = 0; statement_data[j] != NULL; ++j) {
1283      for (int k = 0; termination_data[k] != NULL; ++k) {
1284        int kPrefixLen = i::StrLength(context_data[i][0]);
1285        int kStatementLen = i::StrLength(statement_data[j]);
1286        int kTerminationLen = i::StrLength(termination_data[k]);
1287        int kSuffixLen = i::StrLength(context_data[i][1]);
1288        int kProgramSize = kPrefixLen + kStatementLen + kTerminationLen
1289            + kSuffixLen + i::StrLength("label: for (;;) {  }");
1290
1291        // Plug the source code pieces together.
1292        i::ScopedVector<char> program(kProgramSize + 1);
1293        int length = i::OS::SNPrintF(program,
1294            "label: for (;;) { %s%s%s%s }",
1295            context_data[i][0],
1296            statement_data[j],
1297            termination_data[k],
1298            context_data[i][1]);
1299        CHECK(length == kProgramSize);
1300        TestParserSync(program.start(), flags1, ARRAY_SIZE(flags1));
1301      }
1302    }
1303  }
1304
1305  // Neither Harmony numeric literals nor our natives syntax have any
1306  // interaction with the flags above, so test these separately to reduce
1307  // the combinatorial explosion.
1308  static const ParserFlag flags2[] = { kAllowHarmonyNumericLiterals };
1309  TestParserSync("0o1234", flags2, ARRAY_SIZE(flags2));
1310  TestParserSync("0b1011", flags2, ARRAY_SIZE(flags2));
1311
1312  static const ParserFlag flags3[] = { kAllowNativesSyntax };
1313  TestParserSync("%DebugPrint(123)", flags3, ARRAY_SIZE(flags3));
1314}
1315
1316
1317TEST(PreparserStrictOctal) {
1318  // Test that syntax error caused by octal literal is reported correctly as
1319  // such (issue 2220).
1320  v8::internal::FLAG_min_preparse_length = 1;  // Force preparsing.
1321  v8::V8::Initialize();
1322  v8::HandleScope scope(CcTest::isolate());
1323  v8::Context::Scope context_scope(
1324      v8::Context::New(CcTest::isolate()));
1325  v8::TryCatch try_catch;
1326  const char* script =
1327      "\"use strict\";       \n"
1328      "a = function() {      \n"
1329      "  b = function() {    \n"
1330      "    01;               \n"
1331      "  };                  \n"
1332      "};                    \n";
1333  v8::Script::Compile(v8::String::NewFromUtf8(CcTest::isolate(), script));
1334  CHECK(try_catch.HasCaught());
1335  v8::String::Utf8Value exception(try_catch.Exception());
1336  CHECK_EQ("SyntaxError: Octal literals are not allowed in strict mode.",
1337           *exception);
1338}
1339