1// Copyright 2012 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28// Expect $Object = global.Object;
29// Expect $Array = global.Array;
30
31var $RegExp = global.RegExp;
32
33// A recursive descent parser for Patterns according to the grammar of
34// ECMA-262 15.10.1, with deviations noted below.
35function DoConstructRegExp(object, pattern, flags) {
36  // RegExp : Called as constructor; see ECMA-262, section 15.10.4.
37  if (IS_REGEXP(pattern)) {
38    if (!IS_UNDEFINED(flags)) {
39      throw MakeTypeError('regexp_flags', []);
40    }
41    flags = (pattern.global ? 'g' : '')
42        + (pattern.ignoreCase ? 'i' : '')
43        + (pattern.multiline ? 'm' : '');
44    pattern = pattern.source;
45  }
46
47  pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern);
48  flags = IS_UNDEFINED(flags) ? '' : ToString(flags);
49
50  var global = false;
51  var ignoreCase = false;
52  var multiline = false;
53  for (var i = 0; i < flags.length; i++) {
54    var c = %_CallFunction(flags, i, StringCharAt);
55    switch (c) {
56      case 'g':
57        if (global) {
58          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
59        }
60        global = true;
61        break;
62      case 'i':
63        if (ignoreCase) {
64          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
65        }
66        ignoreCase = true;
67        break;
68      case 'm':
69        if (multiline) {
70          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
71        }
72        multiline = true;
73        break;
74      default:
75        throw MakeSyntaxError("invalid_regexp_flags", [flags]);
76    }
77  }
78
79  %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline);
80
81  // Call internal function to compile the pattern.
82  %RegExpCompile(object, pattern, flags);
83}
84
85
86function RegExpConstructor(pattern, flags) {
87  if (%_IsConstructCall()) {
88    DoConstructRegExp(this, pattern, flags);
89  } else {
90    // RegExp : Called as function; see ECMA-262, section 15.10.3.1.
91    if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) {
92      return pattern;
93    }
94    return new $RegExp(pattern, flags);
95  }
96}
97
98// Deprecated RegExp.prototype.compile method.  We behave like the constructor
99// were called again.  In SpiderMonkey, this method returns the regexp object.
100// In JSC, it returns undefined.  For compatibility with JSC, we match their
101// behavior.
102function RegExpCompile(pattern, flags) {
103  // Both JSC and SpiderMonkey treat a missing pattern argument as the
104  // empty subject string, and an actual undefined value passed as the
105  // pattern as the string 'undefined'.  Note that JSC is inconsistent
106  // here, treating undefined values differently in
107  // RegExp.prototype.compile and in the constructor, where they are
108  // the empty string.  For compatibility with JSC, we match their
109  // behavior.
110  if (this == $RegExp.prototype) {
111    // We don't allow recompiling RegExp.prototype.
112    throw MakeTypeError('incompatible_method_receiver',
113                        ['RegExp.prototype.compile', this]);
114  }
115  if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) {
116    DoConstructRegExp(this, 'undefined', flags);
117  } else {
118    DoConstructRegExp(this, pattern, flags);
119  }
120}
121
122
123function DoRegExpExec(regexp, string, index) {
124  var result = %_RegExpExec(regexp, string, index, lastMatchInfo);
125  if (result !== null) lastMatchInfoOverride = null;
126  return result;
127}
128
129
130function BuildResultFromMatchInfo(lastMatchInfo, s) {
131  var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1;
132  var start = lastMatchInfo[CAPTURE0];
133  var end = lastMatchInfo[CAPTURE1];
134  var result = %_RegExpConstructResult(numResults, start, s);
135  if (start + 1 == end) {
136    result[0] = %_StringCharAt(s, start);
137  } else {
138    result[0] = %_SubString(s, start, end);
139  }
140  var j = REGEXP_FIRST_CAPTURE + 2;
141  for (var i = 1; i < numResults; i++) {
142    start = lastMatchInfo[j++];
143    end = lastMatchInfo[j++];
144    if (end != -1) {
145      if (start + 1 == end) {
146        result[i] = %_StringCharAt(s, start);
147      } else {
148        result[i] = %_SubString(s, start, end);
149      }
150    } else {
151      // Make sure the element is present. Avoid reading the undefined
152      // property from the global object since this may change.
153      result[i] = void 0;
154    }
155  }
156  return result;
157}
158
159
160function RegExpExecNoTests(regexp, string, start) {
161  // Must be called with RegExp, string and positive integer as arguments.
162  var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo);
163  if (matchInfo !== null) {
164    lastMatchInfoOverride = null;
165    return BuildResultFromMatchInfo(matchInfo, string);
166  }
167  return null;
168}
169
170
171function RegExpExec(string) {
172  if (!IS_REGEXP(this)) {
173    throw MakeTypeError('incompatible_method_receiver',
174                        ['RegExp.prototype.exec', this]);
175  }
176
177  string = TO_STRING_INLINE(string);
178  var lastIndex = this.lastIndex;
179
180  // Conversion is required by the ES5 specification (RegExp.prototype.exec
181  // algorithm, step 5) even if the value is discarded for non-global RegExps.
182  var i = TO_INTEGER(lastIndex);
183
184  var global = this.global;
185  if (global) {
186    if (i < 0 || i > string.length) {
187      this.lastIndex = 0;
188      return null;
189    }
190  } else {
191    i = 0;
192  }
193
194  %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
195  // matchIndices is either null or the lastMatchInfo array.
196  var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
197
198  if (matchIndices === null) {
199    if (global) this.lastIndex = 0;
200    return null;
201  }
202
203  // Successful match.
204  lastMatchInfoOverride = null;
205  if (global) {
206    this.lastIndex = lastMatchInfo[CAPTURE1];
207  }
208  return BuildResultFromMatchInfo(matchIndices, string);
209}
210
211
212// One-element cache for the simplified test regexp.
213var regexp_key;
214var regexp_val;
215
216// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
217// that test is defined in terms of String.prototype.exec. However, it probably
218// means the original value of String.prototype.exec, which is what everybody
219// else implements.
220function RegExpTest(string) {
221  if (!IS_REGEXP(this)) {
222    throw MakeTypeError('incompatible_method_receiver',
223                        ['RegExp.prototype.test', this]);
224  }
225  string = TO_STRING_INLINE(string);
226
227  var lastIndex = this.lastIndex;
228
229  // Conversion is required by the ES5 specification (RegExp.prototype.exec
230  // algorithm, step 5) even if the value is discarded for non-global RegExps.
231  var i = TO_INTEGER(lastIndex);
232
233  if (this.global) {
234    if (i < 0 || i > string.length) {
235      this.lastIndex = 0;
236      return false;
237    }
238    %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
239    // matchIndices is either null or the lastMatchInfo array.
240    var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
241    if (matchIndices === null) {
242      this.lastIndex = 0;
243      return false;
244    }
245    lastMatchInfoOverride = null;
246    this.lastIndex = lastMatchInfo[CAPTURE1];
247    return true;
248  } else {
249    // Non-global regexp.
250    // Remove irrelevant preceeding '.*' in a non-global test regexp.
251    // The expression checks whether this.source starts with '.*' and
252    // that the third char is not a '?'.
253    var regexp = this;
254    if (%_StringCharCodeAt(regexp.source, 0) == 46 &&  // '.'
255        %_StringCharCodeAt(regexp.source, 1) == 42 &&  // '*'
256        %_StringCharCodeAt(regexp.source, 2) != 63) {  // '?'
257      regexp = TrimRegExp(regexp);
258    }
259    %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [regexp, string, lastIndex]);
260    // matchIndices is either null or the lastMatchInfo array.
261    var matchIndices = %_RegExpExec(regexp, string, 0, lastMatchInfo);
262    if (matchIndices === null) return false;
263    lastMatchInfoOverride = null;
264    return true;
265  }
266}
267
268function TrimRegExp(regexp) {
269  if (!%_ObjectEquals(regexp_key, regexp)) {
270    regexp_key = regexp;
271    regexp_val =
272      new $RegExp(SubString(regexp.source, 2, regexp.source.length),
273                  (regexp.ignoreCase ? regexp.multiline ? "im" : "i"
274                                     : regexp.multiline ? "m" : ""));
275  }
276  return regexp_val;
277}
278
279
280function RegExpToString() {
281  // If this.source is an empty string, output /(?:)/.
282  // http://bugzilla.mozilla.org/show_bug.cgi?id=225550
283  // ecma_2/RegExp/properties-001.js.
284  var src = this.source ? this.source : '(?:)';
285  var result = '/' + src + '/';
286  if (this.global) result += 'g';
287  if (this.ignoreCase) result += 'i';
288  if (this.multiline) result += 'm';
289  return result;
290}
291
292
293// Getters for the static properties lastMatch, lastParen, leftContext, and
294// rightContext of the RegExp constructor.  The properties are computed based
295// on the captures array of the last successful match and the subject string
296// of the last successful match.
297function RegExpGetLastMatch() {
298  if (lastMatchInfoOverride !== null) {
299    return lastMatchInfoOverride[0];
300  }
301  var regExpSubject = LAST_SUBJECT(lastMatchInfo);
302  return SubString(regExpSubject,
303                   lastMatchInfo[CAPTURE0],
304                   lastMatchInfo[CAPTURE1]);
305}
306
307
308function RegExpGetLastParen() {
309  if (lastMatchInfoOverride) {
310    var override = lastMatchInfoOverride;
311    if (override.length <= 3) return '';
312    return override[override.length - 3];
313  }
314  var length = NUMBER_OF_CAPTURES(lastMatchInfo);
315  if (length <= 2) return '';  // There were no captures.
316  // We match the SpiderMonkey behavior: return the substring defined by the
317  // last pair (after the first pair) of elements of the capture array even if
318  // it is empty.
319  var regExpSubject = LAST_SUBJECT(lastMatchInfo);
320  var start = lastMatchInfo[CAPTURE(length - 2)];
321  var end = lastMatchInfo[CAPTURE(length - 1)];
322  if (start != -1 && end != -1) {
323    return SubString(regExpSubject, start, end);
324  }
325  return "";
326}
327
328
329function RegExpGetLeftContext() {
330  var start_index;
331  var subject;
332  if (!lastMatchInfoOverride) {
333    start_index = lastMatchInfo[CAPTURE0];
334    subject = LAST_SUBJECT(lastMatchInfo);
335  } else {
336    var override = lastMatchInfoOverride;
337    start_index = override[override.length - 2];
338    subject = override[override.length - 1];
339  }
340  return SubString(subject, 0, start_index);
341}
342
343
344function RegExpGetRightContext() {
345  var start_index;
346  var subject;
347  if (!lastMatchInfoOverride) {
348    start_index = lastMatchInfo[CAPTURE1];
349    subject = LAST_SUBJECT(lastMatchInfo);
350  } else {
351    var override = lastMatchInfoOverride;
352    subject = override[override.length - 1];
353    var pattern = override[override.length - 3];
354    start_index = override[override.length - 2] + pattern.length;
355  }
356  return SubString(subject, start_index, subject.length);
357}
358
359
360// The properties $1..$9 are the first nine capturing substrings of the last
361// successful match, or ''.  The function RegExpMakeCaptureGetter will be
362// called with indices from 1 to 9.
363function RegExpMakeCaptureGetter(n) {
364  return function() {
365    if (lastMatchInfoOverride) {
366      if (n < lastMatchInfoOverride.length - 2) return lastMatchInfoOverride[n];
367      return '';
368    }
369    var index = n * 2;
370    if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
371    var matchStart = lastMatchInfo[CAPTURE(index)];
372    var matchEnd = lastMatchInfo[CAPTURE(index + 1)];
373    if (matchStart == -1 || matchEnd == -1) return '';
374    return SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd);
375  };
376}
377
378
379// Property of the builtins object for recording the result of the last
380// regexp match.  The property lastMatchInfo includes the matchIndices
381// array of the last successful regexp match (an array of start/end index
382// pairs for the match and all the captured substrings), the invariant is
383// that there are at least two capture indeces.  The array also contains
384// the subject string for the last successful match.
385var lastMatchInfo = new InternalArray(
386    2,                 // REGEXP_NUMBER_OF_CAPTURES
387    "",                // Last subject.
388    void 0,            // Last input - settable with RegExpSetInput.
389    0,                 // REGEXP_FIRST_CAPTURE + 0
390    0                  // REGEXP_FIRST_CAPTURE + 1
391);
392
393// Override last match info with an array of actual substrings.
394// Used internally by replace regexp with function.
395// The array has the format of an "apply" argument for a replacement
396// function.
397var lastMatchInfoOverride = null;
398
399// -------------------------------------------------------------------
400
401function SetUpRegExp() {
402  %CheckIsBootstrapping();
403  %FunctionSetInstanceClassName($RegExp, 'RegExp');
404  %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM);
405  %SetCode($RegExp, RegExpConstructor);
406
407  InstallFunctions($RegExp.prototype, DONT_ENUM, $Array(
408    "exec", RegExpExec,
409    "test", RegExpTest,
410    "toString", RegExpToString,
411    "compile", RegExpCompile
412  ));
413
414  // The length of compile is 1 in SpiderMonkey.
415  %FunctionSetLength($RegExp.prototype.compile, 1);
416
417  // The properties input, $input, and $_ are aliases for each other.  When this
418  // value is set the value it is set to is coerced to a string.
419  // Getter and setter for the input.
420  var RegExpGetInput = function() {
421    var regExpInput = LAST_INPUT(lastMatchInfo);
422    return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
423  };
424  var RegExpSetInput = function(string) {
425    LAST_INPUT(lastMatchInfo) = ToString(string);
426  };
427
428  %DefineOrRedefineAccessorProperty($RegExp, 'input', RegExpGetInput,
429                                    RegExpSetInput, DONT_DELETE);
430  %DefineOrRedefineAccessorProperty($RegExp, '$_', RegExpGetInput,
431                                    RegExpSetInput, DONT_ENUM | DONT_DELETE);
432  %DefineOrRedefineAccessorProperty($RegExp, '$input', RegExpGetInput,
433                                    RegExpSetInput, DONT_ENUM | DONT_DELETE);
434
435  // The properties multiline and $* are aliases for each other.  When this
436  // value is set in SpiderMonkey, the value it is set to is coerced to a
437  // boolean.  We mimic that behavior with a slight difference: in SpiderMonkey
438  // the value of the expression 'RegExp.multiline = null' (for instance) is the
439  // boolean false (i.e., the value after coercion), while in V8 it is the value
440  // null (i.e., the value before coercion).
441
442  // Getter and setter for multiline.
443  var multiline = false;
444  var RegExpGetMultiline = function() { return multiline; };
445  var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; };
446
447  %DefineOrRedefineAccessorProperty($RegExp, 'multiline', RegExpGetMultiline,
448                                    RegExpSetMultiline, DONT_DELETE);
449  %DefineOrRedefineAccessorProperty($RegExp, '$*', RegExpGetMultiline,
450                                    RegExpSetMultiline,
451                                    DONT_ENUM | DONT_DELETE);
452
453
454  var NoOpSetter = function(ignored) {};
455
456
457  // Static properties set by a successful match.
458  %DefineOrRedefineAccessorProperty($RegExp, 'lastMatch', RegExpGetLastMatch,
459                                    NoOpSetter, DONT_DELETE);
460  %DefineOrRedefineAccessorProperty($RegExp, '$&', RegExpGetLastMatch,
461                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
462  %DefineOrRedefineAccessorProperty($RegExp, 'lastParen', RegExpGetLastParen,
463                                    NoOpSetter, DONT_DELETE);
464  %DefineOrRedefineAccessorProperty($RegExp, '$+', RegExpGetLastParen,
465                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
466  %DefineOrRedefineAccessorProperty($RegExp, 'leftContext',
467                                    RegExpGetLeftContext, NoOpSetter,
468                                    DONT_DELETE);
469  %DefineOrRedefineAccessorProperty($RegExp, '$`', RegExpGetLeftContext,
470                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
471  %DefineOrRedefineAccessorProperty($RegExp, 'rightContext',
472                                    RegExpGetRightContext, NoOpSetter,
473                                    DONT_DELETE);
474  %DefineOrRedefineAccessorProperty($RegExp, "$'", RegExpGetRightContext,
475                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
476
477  for (var i = 1; i < 10; ++i) {
478    %DefineOrRedefineAccessorProperty($RegExp, '$' + i,
479                                      RegExpMakeCaptureGetter(i), NoOpSetter,
480                                      DONT_DELETE);
481  }
482}
483
484SetUpRegExp();
485