regexp.js revision 791712a13f1814dd3ab5d1a5ab8ff5dbc476f6d6
1// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28function testEscape(str, regex) {
29  assertEquals("foo:bar:baz", str.split(regex).join(":"));
30}
31
32testEscape("foo\nbar\nbaz", /\n/);
33testEscape("foo bar baz", /\s/);
34testEscape("foo\tbar\tbaz", /\s/);
35testEscape("foo-bar-baz", /\u002D/);
36
37// Test containing null char in regexp.
38var s = '[' + String.fromCharCode(0) + ']';
39var re = new RegExp(s);
40assertEquals(s.match(re).length, 1);
41assertEquals(s.match(re)[0], String.fromCharCode(0));
42
43// Test strings containing all line separators
44s = 'aA\nbB\rcC\r\ndD\u2028eE\u2029fF';
45re = /^./gm; // any non-newline character at the beginning of a line
46var result = s.match(re);
47assertEquals(result.length, 6);
48assertEquals(result[0], 'a');
49assertEquals(result[1], 'b');
50assertEquals(result[2], 'c');
51assertEquals(result[3], 'd');
52assertEquals(result[4], 'e');
53assertEquals(result[5], 'f');
54
55re = /.$/gm; // any non-newline character at the end of a line
56result = s.match(re);
57assertEquals(result.length, 6);
58assertEquals(result[0], 'A');
59assertEquals(result[1], 'B');
60assertEquals(result[2], 'C');
61assertEquals(result[3], 'D');
62assertEquals(result[4], 'E');
63assertEquals(result[5], 'F');
64
65re = /^[^]/gm; // *any* character at the beginning of a line
66result = s.match(re);
67assertEquals(result.length, 7);
68assertEquals(result[0], 'a');
69assertEquals(result[1], 'b');
70assertEquals(result[2], 'c');
71assertEquals(result[3], '\n');
72assertEquals(result[4], 'd');
73assertEquals(result[5], 'e');
74assertEquals(result[6], 'f');
75
76re = /[^]$/gm; // *any* character at the end of a line
77result = s.match(re);
78assertEquals(result.length, 7);
79assertEquals(result[0], 'A');
80assertEquals(result[1], 'B');
81assertEquals(result[2], 'C');
82assertEquals(result[3], '\r');
83assertEquals(result[4], 'D');
84assertEquals(result[5], 'E');
85assertEquals(result[6], 'F');
86
87// Some tests from the Mozilla tests, where our behavior differs from
88// SpiderMonkey.
89// From ecma_3/RegExp/regress-334158.js
90assertTrue(/\ca/.test( "\x01" ));
91assertFalse(/\ca/.test( "\\ca" ));
92// Passes in KJS, fails in IrregularExpressions.
93// See http://code.google.com/p/v8/issues/detail?id=152
94//assertTrue(/\c[a/]/.test( "\x1ba/]" ));
95
96
97// Test \c in character class
98re = /^[\cM]$/;
99assertTrue(re.test("\r"));
100assertFalse(re.test("M"));
101assertFalse(re.test("c"));
102assertFalse(re.test("\\"));
103assertFalse(re.test("\x03"));  // I.e., read as \cc
104
105re = /^[\c]]$/;
106assertTrue(re.test("c]"));
107assertFalse(re.test("\\]"));
108assertFalse(re.test("\x1d"));  // ']' & 0x1f
109assertFalse(re.test("\\]"));
110assertFalse(re.test("\x03]"));  // I.e., read as \cc
111
112
113// Test that we handle \s and \S correctly inside some bizarre
114// character classes.
115re = /[\s-:]/;
116assertTrue(re.test('-'));
117assertTrue(re.test(':'));
118assertTrue(re.test(' '));
119assertTrue(re.test('\t'));
120assertTrue(re.test('\n'));
121assertFalse(re.test('a'));
122assertFalse(re.test('Z'));
123
124re = /[\S-:]/;
125assertTrue(re.test('-'));
126assertTrue(re.test(':'));
127assertFalse(re.test(' '));
128assertFalse(re.test('\t'));
129assertFalse(re.test('\n'));
130assertTrue(re.test('a'));
131assertTrue(re.test('Z'));
132
133re = /[^\s-:]/;
134assertFalse(re.test('-'));
135assertFalse(re.test(':'));
136assertFalse(re.test(' '));
137assertFalse(re.test('\t'));
138assertFalse(re.test('\n'));
139assertTrue(re.test('a'));
140assertTrue(re.test('Z'));
141
142re = /[^\S-:]/;
143assertFalse(re.test('-'));
144assertFalse(re.test(':'));
145assertTrue(re.test(' '));
146assertTrue(re.test('\t'));
147assertTrue(re.test('\n'));
148assertFalse(re.test('a'));
149assertFalse(re.test('Z'));
150
151re = /[\s]/;
152assertFalse(re.test('-'));
153assertFalse(re.test(':'));
154assertTrue(re.test(' '));
155assertTrue(re.test('\t'));
156assertTrue(re.test('\n'));
157assertFalse(re.test('a'));
158assertFalse(re.test('Z'));
159
160re = /[^\s]/;
161assertTrue(re.test('-'));
162assertTrue(re.test(':'));
163assertFalse(re.test(' '));
164assertFalse(re.test('\t'));
165assertFalse(re.test('\n'));
166assertTrue(re.test('a'));
167assertTrue(re.test('Z'));
168
169re = /[\S]/;
170assertTrue(re.test('-'));
171assertTrue(re.test(':'));
172assertFalse(re.test(' '));
173assertFalse(re.test('\t'));
174assertFalse(re.test('\n'));
175assertTrue(re.test('a'));
176assertTrue(re.test('Z'));
177
178re = /[^\S]/;
179assertFalse(re.test('-'));
180assertFalse(re.test(':'));
181assertTrue(re.test(' '));
182assertTrue(re.test('\t'));
183assertTrue(re.test('\n'));
184assertFalse(re.test('a'));
185assertFalse(re.test('Z'));
186
187re = /[\s\S]/;
188assertTrue(re.test('-'));
189assertTrue(re.test(':'));
190assertTrue(re.test(' '));
191assertTrue(re.test('\t'));
192assertTrue(re.test('\n'));
193assertTrue(re.test('a'));
194assertTrue(re.test('Z'));
195
196re = /[^\s\S]/;
197assertFalse(re.test('-'));
198assertFalse(re.test(':'));
199assertFalse(re.test(' '));
200assertFalse(re.test('\t'));
201assertFalse(re.test('\n'));
202assertFalse(re.test('a'));
203assertFalse(re.test('Z'));
204
205// Test beginning and end of line assertions with or without the
206// multiline flag.
207re = /^\d+/;
208assertFalse(re.test("asdf\n123"));
209re = /^\d+/m;
210assertTrue(re.test("asdf\n123"));
211
212re = /\d+$/;
213assertFalse(re.test("123\nasdf"));
214re = /\d+$/m;
215assertTrue(re.test("123\nasdf"));
216
217// Test that empty matches are handled correctly for multiline global
218// regexps.
219re = /^(.*)/mg;
220assertEquals(3, "a\n\rb".match(re).length);
221assertEquals("*a\n*b\r*c\n*\r*d\r*\n*e", "a\nb\rc\n\rd\r\ne".replace(re, "*$1"));
222
223// Test that empty matches advance one character
224re = new RegExp("", "g");
225assertEquals("xAx", "A".replace(re, "x"));
226assertEquals(3, String.fromCharCode(161).replace(re, "x").length);
227
228// Test that we match the KJS behavior with regard to undefined constructor
229// arguments:
230re = new RegExp();
231// KJS actually shows this as '//'.  Here we match the Firefox behavior (ie,
232// giving a syntactically legal regexp literal).
233assertEquals('/(?:)/', re.toString());
234re = new RegExp(void 0);
235assertEquals('/(?:)/', re.toString());
236re.compile();
237assertEquals('/(?:)/', re.toString());
238re.compile(void 0);
239assertEquals('/undefined/', re.toString());
240
241
242// Check for lazy RegExp literal creation
243function lazyLiteral(doit) {
244  if (doit) return "".replace(/foo(/gi, "");
245  return true;
246}
247
248assertTrue(lazyLiteral(false));
249assertThrows("lazyLiteral(true)");
250
251// Check $01 and $10
252re = new RegExp("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)");
253assertEquals("t", "123456789t".replace(re, "$10"), "$10");
254assertEquals("15", "123456789t".replace(re, "$15"), "$10");
255assertEquals("1", "123456789t".replace(re, "$01"), "$01");
256assertEquals("$001", "123456789t".replace(re, "$001"), "$001");
257re = new RegExp("foo(.)");
258assertEquals("bar$0", "foox".replace(re, "bar$0"), "$0");
259assertEquals("bar$00", "foox".replace(re, "bar$00"), "$00");
260assertEquals("bar$000", "foox".replace(re, "bar$000"), "$000");
261assertEquals("barx", "foox".replace(re, "bar$01"), "$01 2");
262assertEquals("barx5", "foox".replace(re, "bar$15"), "$15");
263
264assertFalse(/()foo$\1/.test("football"), "football1");
265assertFalse(/foo$(?=ball)/.test("football"), "football2");
266assertFalse(/foo$(?!bar)/.test("football"), "football3");
267assertTrue(/()foo$\1/.test("foo"), "football4");
268assertTrue(/foo$(?=(ball)?)/.test("foo"), "football5");
269assertTrue(/()foo$(?!bar)/.test("foo"), "football6");
270assertFalse(/(x?)foo$\1/.test("football"), "football7");
271assertFalse(/foo$(?=ball)/.test("football"), "football8");
272assertFalse(/foo$(?!bar)/.test("football"), "football9");
273assertTrue(/(x?)foo$\1/.test("foo"), "football10");
274assertTrue(/foo$(?=(ball)?)/.test("foo"), "football11");
275assertTrue(/foo$(?!bar)/.test("foo"), "football12");
276
277// Check that the back reference has two successors.  See
278// BackReferenceNode::PropagateForward.
279assertFalse(/f(o)\b\1/.test('foo'));
280assertTrue(/f(o)\B\1/.test('foo'));
281
282// Back-reference, ignore case:
283// ASCII
284assertEquals("xaAx,a", String(/x(a)\1x/i.exec("xaAx")), "backref-ASCII");
285assertFalse(/x(...)\1/i.test("xaaaaa"), "backref-ASCII-short");
286assertTrue(/x((?:))\1\1x/i.test("xx"), "backref-ASCII-empty");
287assertTrue(/x(?:...|(...))\1x/i.test("xabcx"), "backref-ASCII-uncaptured");
288assertTrue(/x(?:...|(...))\1x/i.test("xabcABCx"), "backref-ASCII-backtrack");
289assertEquals("xaBcAbCABCx,aBc",
290             String(/x(...)\1\1x/i.exec("xaBcAbCABCx")),
291             "backref-ASCII-twice");
292
293for (var i = 0; i < 128; i++) {
294  var testName = "backref-ASCII-char-" + i + "," + (i^0x20);
295  var test = /^(.)\1$/i.test(String.fromCharCode(i, i ^ 0x20))
296  var c = String.fromCharCode(i);
297  if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
298    assertTrue(test, testName);
299  } else {
300    assertFalse(test, testName);
301  }
302}
303
304assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end");
305
306// Check decimal escapes doesn't overflow.
307// (Note: \214 is interpreted as octal).
308assertEquals(/\2147483648/.exec("\x8c7483648"),
309             ["\x8c7483648"],
310             "Overflow decimal escape");
311
312
313// Check numbers in quantifiers doesn't overflow and doesn't throw on
314// too large numbers.
315assertFalse(/a{111111111111111111111111111111111111111111111}/.test('b'),
316            "overlarge1");
317assertFalse(/a{999999999999999999999999999999999999999999999}/.test('b'),
318            "overlarge2");
319assertFalse(/a{1,111111111111111111111111111111111111111111111}/.test('b'),
320            "overlarge3");
321assertFalse(/a{1,999999999999999999999999999999999999999999999}/.test('b'),
322            "overlarge4");
323assertFalse(/a{2147483648}/.test('b'),
324            "overlarge5");
325assertFalse(/a{21474836471}/.test('b'),
326            "overlarge6");
327assertFalse(/a{1,2147483648}/.test('b'),
328            "overlarge7");
329assertFalse(/a{1,21474836471}/.test('b'),
330            "overlarge8");
331assertFalse(/a{2147483648,2147483648}/.test('b'),
332            "overlarge9");
333assertFalse(/a{21474836471,21474836471}/.test('b'),
334            "overlarge10");
335assertFalse(/a{2147483647}/.test('b'),
336            "overlarge11");
337assertFalse(/a{1,2147483647}/.test('b'),
338            "overlarge12");
339assertTrue(/a{1,2147483647}/.test('a'),
340            "overlarge13");
341assertFalse(/a{2147483647,2147483647}/.test('a'),
342            "overlarge14");
343
344
345// Check that we don't read past the end of the string.
346assertFalse(/f/.test('b'));
347assertFalse(/[abc]f/.test('x'));
348assertFalse(/[abc]f/.test('xa'));
349assertFalse(/[abc]</.test('x'));
350assertFalse(/[abc]</.test('xa'));
351assertFalse(/f/i.test('b'));
352assertFalse(/[abc]f/i.test('x'));
353assertFalse(/[abc]f/i.test('xa'));
354assertFalse(/[abc]</i.test('x'));
355assertFalse(/[abc]</i.test('xa'));
356assertFalse(/f[abc]/.test('x'));
357assertFalse(/f[abc]/.test('xa'));
358assertFalse(/<[abc]/.test('x'));
359assertFalse(/<[abc]/.test('xa'));
360assertFalse(/f[abc]/i.test('x'));
361assertFalse(/f[abc]/i.test('xa'));
362assertFalse(/<[abc]/i.test('x'));
363assertFalse(/<[abc]/i.test('xa'));
364
365// Test that merging of quick test masks gets it right.
366assertFalse(/x([0-7]%%x|[0-6]%%y)/.test('x7%%y'), 'qt');
367assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy7%%%y'), 'qt2');
368assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt3');
369assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt4');
370assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt5');
371assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt6');
372assertFalse(/xy([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt7');
373assertFalse(/x([0-7]%%%x|[0-6]%%%y)/.test('x7%%%y'), 'qt8');
374
375
376// Don't hang on this one.
377/[^\xfe-\xff]*/.test("");
378
379
380var long = "a";
381for (var i = 0; i < 100000; i++) {
382  long = "a?" + long;
383}
384// Don't crash on this one, but maybe throw an exception.
385try {
386  RegExp(long).exec("a");
387} catch (e) {
388  assertTrue(String(e).indexOf("Stack overflow") >= 0, "overflow");
389}
390
391
392// Test that compile works on modified objects
393var re = /re+/;
394assertEquals("re+", re.source);
395assertFalse(re.global);
396assertFalse(re.ignoreCase);
397assertFalse(re.multiline);
398assertEquals(0, re.lastIndex);
399
400re.compile("ro+", "gim");
401assertEquals("ro+", re.source);
402assertTrue(re.global);
403assertTrue(re.ignoreCase);
404assertTrue(re.multiline);
405assertEquals(0, re.lastIndex);
406
407re.lastIndex = 42;
408re.someOtherProperty = 42;
409re.someDeletableProperty = 42;
410re[37] = 37;
411re[42] = 42;
412
413re.compile("ra+", "i");
414assertEquals("ra+", re.source);
415assertFalse(re.global);
416assertTrue(re.ignoreCase);
417assertFalse(re.multiline);
418assertEquals(0, re.lastIndex);
419
420assertEquals(42, re.someOtherProperty);
421assertEquals(42, re.someDeletableProperty);
422assertEquals(37, re[37]);
423assertEquals(42, re[42]);
424
425re.lastIndex = -1;
426re.someOtherProperty = 37;
427re[42] = 37;
428assertTrue(delete re[37]);
429assertTrue(delete re.someDeletableProperty);
430re.compile("ri+", "gm");
431
432assertEquals("ri+", re.source);
433assertTrue(re.global);
434assertFalse(re.ignoreCase);
435assertTrue(re.multiline);
436assertEquals(0, re.lastIndex);
437assertEquals(37, re.someOtherProperty);
438assertEquals(37, re[42]);
439
440// Test boundary-checks.
441function assertRegExpTest(re, input, test) {
442  assertEquals(test, re.test(input), "test:" + re + ":" + input);
443}
444
445assertRegExpTest(/b\b/, "b", true);
446assertRegExpTest(/b\b$/, "b", true);
447assertRegExpTest(/\bb/, "b", true);
448assertRegExpTest(/^\bb/, "b", true);
449assertRegExpTest(/,\b/, ",", false);
450assertRegExpTest(/,\b$/, ",", false);
451assertRegExpTest(/\b,/, ",", false);
452assertRegExpTest(/^\b,/, ",", false);
453
454assertRegExpTest(/b\B/, "b", false);
455assertRegExpTest(/b\B$/, "b", false);
456assertRegExpTest(/\Bb/, "b", false);
457assertRegExpTest(/^\Bb/, "b", false);
458assertRegExpTest(/,\B/, ",", true);
459assertRegExpTest(/,\B$/, ",", true);
460assertRegExpTest(/\B,/, ",", true);
461assertRegExpTest(/^\B,/, ",", true);
462
463assertRegExpTest(/b\b/, "b,", true);
464assertRegExpTest(/b\b/, "ba", false);
465assertRegExpTest(/b\B/, "b,", false);
466assertRegExpTest(/b\B/, "ba", true);
467
468assertRegExpTest(/b\Bb/, "bb", true);
469assertRegExpTest(/b\bb/, "bb", false);
470
471assertRegExpTest(/b\b[,b]/, "bb", false);
472assertRegExpTest(/b\B[,b]/, "bb", true);
473assertRegExpTest(/b\b[,b]/, "b,", true);
474assertRegExpTest(/b\B[,b]/, "b,", false);
475
476assertRegExpTest(/[,b]\bb/, "bb", false);
477assertRegExpTest(/[,b]\Bb/, "bb", true);
478assertRegExpTest(/[,b]\bb/, ",b", true);
479assertRegExpTest(/[,b]\Bb/, ",b", false);
480
481assertRegExpTest(/[,b]\b[,b]/, "bb", false);
482assertRegExpTest(/[,b]\B[,b]/, "bb", true);
483assertRegExpTest(/[,b]\b[,b]/, ",b", true);
484assertRegExpTest(/[,b]\B[,b]/, ",b", false);
485assertRegExpTest(/[,b]\b[,b]/, "b,", true);
486assertRegExpTest(/[,b]\B[,b]/, "b,", false);
487
488// Test that caching of result doesn't share result objects.
489// More iterations increases the chance of hitting a GC.
490for (var i = 0; i < 100; i++) {
491  var re = /x(y)z/;
492  var res = re.exec("axyzb");
493  assertTrue(!!res);
494  assertEquals(2, res.length);
495  assertEquals("xyz", res[0]);
496  assertEquals("y", res[1]);
497  assertEquals(1, res.index);
498  assertEquals("axyzb", res.input);
499  assertEquals(undefined, res.foobar);
500
501  res.foobar = "Arglebargle";
502  res[3] = "Glopglyf";
503  assertEquals("Arglebargle", res.foobar);
504}
505