1// Copyright 2012 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28// This file relies on the fact that the following declaration has been made
29// in runtime.js:
30// var $Object = global.Object;
31// var $Array = global.Array;
32
33var $RegExp = global.RegExp;
34
35// -------------------------------------------------------------------
36
37// A recursive descent parser for Patterns according to the grammar of
38// ECMA-262 15.10.1, with deviations noted below.
39function DoConstructRegExp(object, pattern, flags) {
40  // RegExp : Called as constructor; see ECMA-262, section 15.10.4.
41  if (IS_REGEXP(pattern)) {
42    if (!IS_UNDEFINED(flags)) {
43      throw MakeTypeError('regexp_flags', []);
44    }
45    flags = (pattern.global ? 'g' : '')
46        + (pattern.ignoreCase ? 'i' : '')
47        + (pattern.multiline ? 'm' : '');
48    pattern = pattern.source;
49  }
50
51  pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern);
52  flags = IS_UNDEFINED(flags) ? '' : ToString(flags);
53
54  var global = false;
55  var ignoreCase = false;
56  var multiline = false;
57  for (var i = 0; i < flags.length; i++) {
58    var c = %_CallFunction(flags, i, StringCharAt);
59    switch (c) {
60      case 'g':
61        if (global) {
62          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
63        }
64        global = true;
65        break;
66      case 'i':
67        if (ignoreCase) {
68          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
69        }
70        ignoreCase = true;
71        break;
72      case 'm':
73        if (multiline) {
74          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
75        }
76        multiline = true;
77        break;
78      default:
79        throw MakeSyntaxError("invalid_regexp_flags", [flags]);
80    }
81  }
82
83  %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline);
84
85  // Call internal function to compile the pattern.
86  %RegExpCompile(object, pattern, flags);
87}
88
89
90function RegExpConstructor(pattern, flags) {
91  if (%_IsConstructCall()) {
92    DoConstructRegExp(this, pattern, flags);
93  } else {
94    // RegExp : Called as function; see ECMA-262, section 15.10.3.1.
95    if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) {
96      return pattern;
97    }
98    return new $RegExp(pattern, flags);
99  }
100}
101
102// Deprecated RegExp.prototype.compile method.  We behave like the constructor
103// were called again.  In SpiderMonkey, this method returns the regexp object.
104// In JSC, it returns undefined.  For compatibility with JSC, we match their
105// behavior.
106function RegExpCompile(pattern, flags) {
107  // Both JSC and SpiderMonkey treat a missing pattern argument as the
108  // empty subject string, and an actual undefined value passed as the
109  // pattern as the string 'undefined'.  Note that JSC is inconsistent
110  // here, treating undefined values differently in
111  // RegExp.prototype.compile and in the constructor, where they are
112  // the empty string.  For compatibility with JSC, we match their
113  // behavior.
114  if (this == $RegExp.prototype) {
115    // We don't allow recompiling RegExp.prototype.
116    throw MakeTypeError('incompatible_method_receiver',
117                        ['RegExp.prototype.compile', this]);
118  }
119  if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) {
120    DoConstructRegExp(this, 'undefined', flags);
121  } else {
122    DoConstructRegExp(this, pattern, flags);
123  }
124}
125
126
127function DoRegExpExec(regexp, string, index) {
128  var result = %_RegExpExec(regexp, string, index, lastMatchInfo);
129  if (result !== null) lastMatchInfoOverride = null;
130  return result;
131}
132
133
134function BuildResultFromMatchInfo(lastMatchInfo, s) {
135  var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1;
136  var start = lastMatchInfo[CAPTURE0];
137  var end = lastMatchInfo[CAPTURE1];
138  var result = %_RegExpConstructResult(numResults, start, s);
139  result[0] = %_SubString(s, start, end);
140  var j = REGEXP_FIRST_CAPTURE + 2;
141  for (var i = 1; i < numResults; i++) {
142    start = lastMatchInfo[j++];
143    if (start != -1) {
144      end = lastMatchInfo[j];
145      result[i] = %_SubString(s, start, end);
146    }
147    j++;
148  }
149  return result;
150}
151
152
153function RegExpExecNoTests(regexp, string, start) {
154  // Must be called with RegExp, string and positive integer as arguments.
155  var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo);
156  if (matchInfo !== null) {
157    lastMatchInfoOverride = null;
158    return BuildResultFromMatchInfo(matchInfo, string);
159  }
160  regexp.lastIndex = 0;
161  return null;
162}
163
164
165function RegExpExec(string) {
166  if (!IS_REGEXP(this)) {
167    throw MakeTypeError('incompatible_method_receiver',
168                        ['RegExp.prototype.exec', this]);
169  }
170
171  string = TO_STRING_INLINE(string);
172  var lastIndex = this.lastIndex;
173
174  // Conversion is required by the ES5 specification (RegExp.prototype.exec
175  // algorithm, step 5) even if the value is discarded for non-global RegExps.
176  var i = TO_INTEGER(lastIndex);
177
178  var global = this.global;
179  if (global) {
180    if (i < 0 || i > string.length) {
181      this.lastIndex = 0;
182      return null;
183    }
184  } else {
185    i = 0;
186  }
187
188  %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
189  // matchIndices is either null or the lastMatchInfo array.
190  var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
191
192  if (matchIndices === null) {
193    this.lastIndex = 0;
194    return null;
195  }
196
197  // Successful match.
198  lastMatchInfoOverride = null;
199  if (global) {
200    this.lastIndex = lastMatchInfo[CAPTURE1];
201  }
202  return BuildResultFromMatchInfo(matchIndices, string);
203}
204
205
206// One-element cache for the simplified test regexp.
207var regexp_key;
208var regexp_val;
209
210// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
211// that test is defined in terms of String.prototype.exec. However, it probably
212// means the original value of String.prototype.exec, which is what everybody
213// else implements.
214function RegExpTest(string) {
215  if (!IS_REGEXP(this)) {
216    throw MakeTypeError('incompatible_method_receiver',
217                        ['RegExp.prototype.test', this]);
218  }
219  string = TO_STRING_INLINE(string);
220
221  var lastIndex = this.lastIndex;
222
223  // Conversion is required by the ES5 specification (RegExp.prototype.exec
224  // algorithm, step 5) even if the value is discarded for non-global RegExps.
225  var i = TO_INTEGER(lastIndex);
226
227  if (this.global) {
228    if (i < 0 || i > string.length) {
229      this.lastIndex = 0;
230      return false;
231    }
232    %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
233    // matchIndices is either null or the lastMatchInfo array.
234    var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
235    if (matchIndices === null) {
236      this.lastIndex = 0;
237      return false;
238    }
239    lastMatchInfoOverride = null;
240    this.lastIndex = lastMatchInfo[CAPTURE1];
241    return true;
242  } else {
243    // Non-global regexp.
244    // Remove irrelevant preceeding '.*' in a non-global test regexp.
245    // The expression checks whether this.source starts with '.*' and
246    // that the third char is not a '?'.
247    var regexp = this;
248    if (%_StringCharCodeAt(regexp.source, 0) == 46 &&  // '.'
249        %_StringCharCodeAt(regexp.source, 1) == 42 &&  // '*'
250        %_StringCharCodeAt(regexp.source, 2) != 63) {  // '?'
251      regexp = TrimRegExp(regexp);
252    }
253    %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [regexp, string, lastIndex]);
254    // matchIndices is either null or the lastMatchInfo array.
255    var matchIndices = %_RegExpExec(regexp, string, 0, lastMatchInfo);
256    if (matchIndices === null) {
257      this.lastIndex = 0;
258      return false;
259    }
260    lastMatchInfoOverride = null;
261    return true;
262  }
263}
264
265function TrimRegExp(regexp) {
266  if (!%_ObjectEquals(regexp_key, regexp)) {
267    regexp_key = regexp;
268    regexp_val =
269      new $RegExp(%_SubString(regexp.source, 2, regexp.source.length),
270                  (regexp.ignoreCase ? regexp.multiline ? "im" : "i"
271                                     : regexp.multiline ? "m" : ""));
272  }
273  return regexp_val;
274}
275
276
277function RegExpToString() {
278  if (!IS_REGEXP(this)) {
279    throw MakeTypeError('incompatible_method_receiver',
280                        ['RegExp.prototype.toString', this]);
281  }
282  var result = '/' + this.source + '/';
283  if (this.global) result += 'g';
284  if (this.ignoreCase) result += 'i';
285  if (this.multiline) result += 'm';
286  return result;
287}
288
289
290// Getters for the static properties lastMatch, lastParen, leftContext, and
291// rightContext of the RegExp constructor.  The properties are computed based
292// on the captures array of the last successful match and the subject string
293// of the last successful match.
294function RegExpGetLastMatch() {
295  if (lastMatchInfoOverride !== null) {
296    return OVERRIDE_MATCH(lastMatchInfoOverride);
297  }
298  var regExpSubject = LAST_SUBJECT(lastMatchInfo);
299  return %_SubString(regExpSubject,
300                     lastMatchInfo[CAPTURE0],
301                     lastMatchInfo[CAPTURE1]);
302}
303
304
305function RegExpGetLastParen() {
306  if (lastMatchInfoOverride) {
307    var override = lastMatchInfoOverride;
308    if (override.length <= 3) return '';
309    return override[override.length - 3];
310  }
311  var length = NUMBER_OF_CAPTURES(lastMatchInfo);
312  if (length <= 2) return '';  // There were no captures.
313  // We match the SpiderMonkey behavior: return the substring defined by the
314  // last pair (after the first pair) of elements of the capture array even if
315  // it is empty.
316  var regExpSubject = LAST_SUBJECT(lastMatchInfo);
317  var start = lastMatchInfo[CAPTURE(length - 2)];
318  var end = lastMatchInfo[CAPTURE(length - 1)];
319  if (start != -1 && end != -1) {
320    return %_SubString(regExpSubject, start, end);
321  }
322  return "";
323}
324
325
326function RegExpGetLeftContext() {
327  var start_index;
328  var subject;
329  if (!lastMatchInfoOverride) {
330    start_index = lastMatchInfo[CAPTURE0];
331    subject = LAST_SUBJECT(lastMatchInfo);
332  } else {
333    var override = lastMatchInfoOverride;
334    start_index = OVERRIDE_POS(override);
335    subject = OVERRIDE_SUBJECT(override);
336  }
337  return %_SubString(subject, 0, start_index);
338}
339
340
341function RegExpGetRightContext() {
342  var start_index;
343  var subject;
344  if (!lastMatchInfoOverride) {
345    start_index = lastMatchInfo[CAPTURE1];
346    subject = LAST_SUBJECT(lastMatchInfo);
347  } else {
348    var override = lastMatchInfoOverride;
349    subject = OVERRIDE_SUBJECT(override);
350    var match = OVERRIDE_MATCH(override);
351    start_index = OVERRIDE_POS(override) + match.length;
352  }
353  return %_SubString(subject, start_index, subject.length);
354}
355
356
357// The properties $1..$9 are the first nine capturing substrings of the last
358// successful match, or ''.  The function RegExpMakeCaptureGetter will be
359// called with indices from 1 to 9.
360function RegExpMakeCaptureGetter(n) {
361  return function() {
362    if (lastMatchInfoOverride) {
363      if (n < lastMatchInfoOverride.length - 2) {
364        return OVERRIDE_CAPTURE(lastMatchInfoOverride, n);
365      }
366      return '';
367    }
368    var index = n * 2;
369    if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
370    var matchStart = lastMatchInfo[CAPTURE(index)];
371    var matchEnd = lastMatchInfo[CAPTURE(index + 1)];
372    if (matchStart == -1 || matchEnd == -1) return '';
373    return %_SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd);
374  };
375}
376
377
378// Property of the builtins object for recording the result of the last
379// regexp match.  The property lastMatchInfo includes the matchIndices
380// array of the last successful regexp match (an array of start/end index
381// pairs for the match and all the captured substrings), the invariant is
382// that there are at least two capture indeces.  The array also contains
383// the subject string for the last successful match.
384var lastMatchInfo = new InternalPackedArray(
385    2,                 // REGEXP_NUMBER_OF_CAPTURES
386    "",                // Last subject.
387    void 0,            // Last input - settable with RegExpSetInput.
388    0,                 // REGEXP_FIRST_CAPTURE + 0
389    0                  // REGEXP_FIRST_CAPTURE + 1
390);
391
392// Override last match info with an array of actual substrings.
393// Used internally by replace regexp with function.
394// The array has the format of an "apply" argument for a replacement
395// function.
396var lastMatchInfoOverride = null;
397
398// -------------------------------------------------------------------
399
400function SetUpRegExp() {
401  %CheckIsBootstrapping();
402  %FunctionSetInstanceClassName($RegExp, 'RegExp');
403  %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM);
404  %SetCode($RegExp, RegExpConstructor);
405
406  InstallFunctions($RegExp.prototype, DONT_ENUM, $Array(
407    "exec", RegExpExec,
408    "test", RegExpTest,
409    "toString", RegExpToString,
410    "compile", RegExpCompile
411  ));
412
413  // The length of compile is 1 in SpiderMonkey.
414  %FunctionSetLength($RegExp.prototype.compile, 1);
415
416  // The properties input, $input, and $_ are aliases for each other.  When this
417  // value is set the value it is set to is coerced to a string.
418  // Getter and setter for the input.
419  var RegExpGetInput = function() {
420    var regExpInput = LAST_INPUT(lastMatchInfo);
421    return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
422  };
423  var RegExpSetInput = function(string) {
424    LAST_INPUT(lastMatchInfo) = ToString(string);
425  };
426
427  %OptimizeObjectForAddingMultipleProperties($RegExp, 22);
428  %DefineOrRedefineAccessorProperty($RegExp, 'input', RegExpGetInput,
429                                    RegExpSetInput, DONT_DELETE);
430  %DefineOrRedefineAccessorProperty($RegExp, '$_', RegExpGetInput,
431                                    RegExpSetInput, DONT_ENUM | DONT_DELETE);
432  %DefineOrRedefineAccessorProperty($RegExp, '$input', RegExpGetInput,
433                                    RegExpSetInput, DONT_ENUM | DONT_DELETE);
434
435  // The properties multiline and $* are aliases for each other.  When this
436  // value is set in SpiderMonkey, the value it is set to is coerced to a
437  // boolean.  We mimic that behavior with a slight difference: in SpiderMonkey
438  // the value of the expression 'RegExp.multiline = null' (for instance) is the
439  // boolean false (i.e., the value after coercion), while in V8 it is the value
440  // null (i.e., the value before coercion).
441
442  // Getter and setter for multiline.
443  var multiline = false;
444  var RegExpGetMultiline = function() { return multiline; };
445  var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; };
446
447  %DefineOrRedefineAccessorProperty($RegExp, 'multiline', RegExpGetMultiline,
448                                    RegExpSetMultiline, DONT_DELETE);
449  %DefineOrRedefineAccessorProperty($RegExp, '$*', RegExpGetMultiline,
450                                    RegExpSetMultiline,
451                                    DONT_ENUM | DONT_DELETE);
452
453
454  var NoOpSetter = function(ignored) {};
455
456
457  // Static properties set by a successful match.
458  %DefineOrRedefineAccessorProperty($RegExp, 'lastMatch', RegExpGetLastMatch,
459                                    NoOpSetter, DONT_DELETE);
460  %DefineOrRedefineAccessorProperty($RegExp, '$&', RegExpGetLastMatch,
461                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
462  %DefineOrRedefineAccessorProperty($RegExp, 'lastParen', RegExpGetLastParen,
463                                    NoOpSetter, DONT_DELETE);
464  %DefineOrRedefineAccessorProperty($RegExp, '$+', RegExpGetLastParen,
465                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
466  %DefineOrRedefineAccessorProperty($RegExp, 'leftContext',
467                                    RegExpGetLeftContext, NoOpSetter,
468                                    DONT_DELETE);
469  %DefineOrRedefineAccessorProperty($RegExp, '$`', RegExpGetLeftContext,
470                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
471  %DefineOrRedefineAccessorProperty($RegExp, 'rightContext',
472                                    RegExpGetRightContext, NoOpSetter,
473                                    DONT_DELETE);
474  %DefineOrRedefineAccessorProperty($RegExp, "$'", RegExpGetRightContext,
475                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
476
477  for (var i = 1; i < 10; ++i) {
478    %DefineOrRedefineAccessorProperty($RegExp, '$' + i,
479                                      RegExpMakeCaptureGetter(i), NoOpSetter,
480                                      DONT_DELETE);
481  }
482  %ToFastProperties($RegExp);
483}
484
485SetUpRegExp();
486