1// Copyright 2012 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28// Expect $Object = global.Object; 29// Expect $Array = global.Array; 30 31var $RegExp = global.RegExp; 32 33// A recursive descent parser for Patterns according to the grammar of 34// ECMA-262 15.10.1, with deviations noted below. 35function DoConstructRegExp(object, pattern, flags) { 36 // RegExp : Called as constructor; see ECMA-262, section 15.10.4. 37 if (IS_REGEXP(pattern)) { 38 if (!IS_UNDEFINED(flags)) { 39 throw MakeTypeError('regexp_flags', []); 40 } 41 flags = (pattern.global ? 'g' : '') 42 + (pattern.ignoreCase ? 'i' : '') 43 + (pattern.multiline ? 'm' : ''); 44 pattern = pattern.source; 45 } 46 47 pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern); 48 flags = IS_UNDEFINED(flags) ? '' : ToString(flags); 49 50 var global = false; 51 var ignoreCase = false; 52 var multiline = false; 53 for (var i = 0; i < flags.length; i++) { 54 var c = %_CallFunction(flags, i, StringCharAt); 55 switch (c) { 56 case 'g': 57 if (global) { 58 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 59 } 60 global = true; 61 break; 62 case 'i': 63 if (ignoreCase) { 64 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 65 } 66 ignoreCase = true; 67 break; 68 case 'm': 69 if (multiline) { 70 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 71 } 72 multiline = true; 73 break; 74 default: 75 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 76 } 77 } 78 79 %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline); 80 81 // Call internal function to compile the pattern. 82 %RegExpCompile(object, pattern, flags); 83} 84 85 86function RegExpConstructor(pattern, flags) { 87 if (%_IsConstructCall()) { 88 DoConstructRegExp(this, pattern, flags); 89 } else { 90 // RegExp : Called as function; see ECMA-262, section 15.10.3.1. 91 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { 92 return pattern; 93 } 94 return new $RegExp(pattern, flags); 95 } 96} 97 98// Deprecated RegExp.prototype.compile method. We behave like the constructor 99// were called again. In SpiderMonkey, this method returns the regexp object. 100// In JSC, it returns undefined. For compatibility with JSC, we match their 101// behavior. 102function RegExpCompile(pattern, flags) { 103 // Both JSC and SpiderMonkey treat a missing pattern argument as the 104 // empty subject string, and an actual undefined value passed as the 105 // pattern as the string 'undefined'. Note that JSC is inconsistent 106 // here, treating undefined values differently in 107 // RegExp.prototype.compile and in the constructor, where they are 108 // the empty string. For compatibility with JSC, we match their 109 // behavior. 110 if (this == $RegExp.prototype) { 111 // We don't allow recompiling RegExp.prototype. 112 throw MakeTypeError('incompatible_method_receiver', 113 ['RegExp.prototype.compile', this]); 114 } 115 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { 116 DoConstructRegExp(this, 'undefined', flags); 117 } else { 118 DoConstructRegExp(this, pattern, flags); 119 } 120} 121 122 123function DoRegExpExec(regexp, string, index) { 124 var result = %_RegExpExec(regexp, string, index, lastMatchInfo); 125 if (result !== null) lastMatchInfoOverride = null; 126 return result; 127} 128 129 130function BuildResultFromMatchInfo(lastMatchInfo, s) { 131 var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1; 132 var start = lastMatchInfo[CAPTURE0]; 133 var end = lastMatchInfo[CAPTURE1]; 134 var result = %_RegExpConstructResult(numResults, start, s); 135 if (start + 1 == end) { 136 result[0] = %_StringCharAt(s, start); 137 } else { 138 result[0] = %_SubString(s, start, end); 139 } 140 var j = REGEXP_FIRST_CAPTURE + 2; 141 for (var i = 1; i < numResults; i++) { 142 start = lastMatchInfo[j++]; 143 end = lastMatchInfo[j++]; 144 if (end != -1) { 145 if (start + 1 == end) { 146 result[i] = %_StringCharAt(s, start); 147 } else { 148 result[i] = %_SubString(s, start, end); 149 } 150 } else { 151 // Make sure the element is present. Avoid reading the undefined 152 // property from the global object since this may change. 153 result[i] = void 0; 154 } 155 } 156 return result; 157} 158 159 160function RegExpExecNoTests(regexp, string, start) { 161 // Must be called with RegExp, string and positive integer as arguments. 162 var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo); 163 if (matchInfo !== null) { 164 lastMatchInfoOverride = null; 165 return BuildResultFromMatchInfo(matchInfo, string); 166 } 167 return null; 168} 169 170 171function RegExpExec(string) { 172 if (!IS_REGEXP(this)) { 173 throw MakeTypeError('incompatible_method_receiver', 174 ['RegExp.prototype.exec', this]); 175 } 176 177 string = TO_STRING_INLINE(string); 178 var lastIndex = this.lastIndex; 179 180 // Conversion is required by the ES5 specification (RegExp.prototype.exec 181 // algorithm, step 5) even if the value is discarded for non-global RegExps. 182 var i = TO_INTEGER(lastIndex); 183 184 var global = this.global; 185 if (global) { 186 if (i < 0 || i > string.length) { 187 this.lastIndex = 0; 188 return null; 189 } 190 } else { 191 i = 0; 192 } 193 194 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]); 195 // matchIndices is either null or the lastMatchInfo array. 196 var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo); 197 198 if (matchIndices === null) { 199 if (global) this.lastIndex = 0; 200 return null; 201 } 202 203 // Successful match. 204 lastMatchInfoOverride = null; 205 if (global) { 206 this.lastIndex = lastMatchInfo[CAPTURE1]; 207 } 208 return BuildResultFromMatchInfo(matchIndices, string); 209} 210 211 212// One-element cache for the simplified test regexp. 213var regexp_key; 214var regexp_val; 215 216// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be 217// that test is defined in terms of String.prototype.exec. However, it probably 218// means the original value of String.prototype.exec, which is what everybody 219// else implements. 220function RegExpTest(string) { 221 if (!IS_REGEXP(this)) { 222 throw MakeTypeError('incompatible_method_receiver', 223 ['RegExp.prototype.test', this]); 224 } 225 string = TO_STRING_INLINE(string); 226 227 var lastIndex = this.lastIndex; 228 229 // Conversion is required by the ES5 specification (RegExp.prototype.exec 230 // algorithm, step 5) even if the value is discarded for non-global RegExps. 231 var i = TO_INTEGER(lastIndex); 232 233 if (this.global) { 234 if (i < 0 || i > string.length) { 235 this.lastIndex = 0; 236 return false; 237 } 238 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]); 239 // matchIndices is either null or the lastMatchInfo array. 240 var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo); 241 if (matchIndices === null) { 242 this.lastIndex = 0; 243 return false; 244 } 245 lastMatchInfoOverride = null; 246 this.lastIndex = lastMatchInfo[CAPTURE1]; 247 return true; 248 } else { 249 // Non-global regexp. 250 // Remove irrelevant preceeding '.*' in a non-global test regexp. 251 // The expression checks whether this.source starts with '.*' and 252 // that the third char is not a '?'. 253 var regexp = this; 254 if (%_StringCharCodeAt(regexp.source, 0) == 46 && // '.' 255 %_StringCharCodeAt(regexp.source, 1) == 42 && // '*' 256 %_StringCharCodeAt(regexp.source, 2) != 63) { // '?' 257 regexp = TrimRegExp(regexp); 258 } 259 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [regexp, string, lastIndex]); 260 // matchIndices is either null or the lastMatchInfo array. 261 var matchIndices = %_RegExpExec(regexp, string, 0, lastMatchInfo); 262 if (matchIndices === null) return false; 263 lastMatchInfoOverride = null; 264 return true; 265 } 266} 267 268function TrimRegExp(regexp) { 269 if (!%_ObjectEquals(regexp_key, regexp)) { 270 regexp_key = regexp; 271 regexp_val = 272 new $RegExp(SubString(regexp.source, 2, regexp.source.length), 273 (regexp.ignoreCase ? regexp.multiline ? "im" : "i" 274 : regexp.multiline ? "m" : "")); 275 } 276 return regexp_val; 277} 278 279 280function RegExpToString() { 281 // If this.source is an empty string, output /(?:)/. 282 // http://bugzilla.mozilla.org/show_bug.cgi?id=225550 283 // ecma_2/RegExp/properties-001.js. 284 var src = this.source ? this.source : '(?:)'; 285 var result = '/' + src + '/'; 286 if (this.global) result += 'g'; 287 if (this.ignoreCase) result += 'i'; 288 if (this.multiline) result += 'm'; 289 return result; 290} 291 292 293// Getters for the static properties lastMatch, lastParen, leftContext, and 294// rightContext of the RegExp constructor. The properties are computed based 295// on the captures array of the last successful match and the subject string 296// of the last successful match. 297function RegExpGetLastMatch() { 298 if (lastMatchInfoOverride !== null) { 299 return lastMatchInfoOverride[0]; 300 } 301 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 302 return SubString(regExpSubject, 303 lastMatchInfo[CAPTURE0], 304 lastMatchInfo[CAPTURE1]); 305} 306 307 308function RegExpGetLastParen() { 309 if (lastMatchInfoOverride) { 310 var override = lastMatchInfoOverride; 311 if (override.length <= 3) return ''; 312 return override[override.length - 3]; 313 } 314 var length = NUMBER_OF_CAPTURES(lastMatchInfo); 315 if (length <= 2) return ''; // There were no captures. 316 // We match the SpiderMonkey behavior: return the substring defined by the 317 // last pair (after the first pair) of elements of the capture array even if 318 // it is empty. 319 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 320 var start = lastMatchInfo[CAPTURE(length - 2)]; 321 var end = lastMatchInfo[CAPTURE(length - 1)]; 322 if (start != -1 && end != -1) { 323 return SubString(regExpSubject, start, end); 324 } 325 return ""; 326} 327 328 329function RegExpGetLeftContext() { 330 var start_index; 331 var subject; 332 if (!lastMatchInfoOverride) { 333 start_index = lastMatchInfo[CAPTURE0]; 334 subject = LAST_SUBJECT(lastMatchInfo); 335 } else { 336 var override = lastMatchInfoOverride; 337 start_index = override[override.length - 2]; 338 subject = override[override.length - 1]; 339 } 340 return SubString(subject, 0, start_index); 341} 342 343 344function RegExpGetRightContext() { 345 var start_index; 346 var subject; 347 if (!lastMatchInfoOverride) { 348 start_index = lastMatchInfo[CAPTURE1]; 349 subject = LAST_SUBJECT(lastMatchInfo); 350 } else { 351 var override = lastMatchInfoOverride; 352 subject = override[override.length - 1]; 353 var pattern = override[override.length - 3]; 354 start_index = override[override.length - 2] + pattern.length; 355 } 356 return SubString(subject, start_index, subject.length); 357} 358 359 360// The properties $1..$9 are the first nine capturing substrings of the last 361// successful match, or ''. The function RegExpMakeCaptureGetter will be 362// called with indices from 1 to 9. 363function RegExpMakeCaptureGetter(n) { 364 return function() { 365 if (lastMatchInfoOverride) { 366 if (n < lastMatchInfoOverride.length - 2) return lastMatchInfoOverride[n]; 367 return ''; 368 } 369 var index = n * 2; 370 if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; 371 var matchStart = lastMatchInfo[CAPTURE(index)]; 372 var matchEnd = lastMatchInfo[CAPTURE(index + 1)]; 373 if (matchStart == -1 || matchEnd == -1) return ''; 374 return SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd); 375 }; 376} 377 378 379// Property of the builtins object for recording the result of the last 380// regexp match. The property lastMatchInfo includes the matchIndices 381// array of the last successful regexp match (an array of start/end index 382// pairs for the match and all the captured substrings), the invariant is 383// that there are at least two capture indeces. The array also contains 384// the subject string for the last successful match. 385var lastMatchInfo = new InternalArray( 386 2, // REGEXP_NUMBER_OF_CAPTURES 387 "", // Last subject. 388 void 0, // Last input - settable with RegExpSetInput. 389 0, // REGEXP_FIRST_CAPTURE + 0 390 0 // REGEXP_FIRST_CAPTURE + 1 391); 392 393// Override last match info with an array of actual substrings. 394// Used internally by replace regexp with function. 395// The array has the format of an "apply" argument for a replacement 396// function. 397var lastMatchInfoOverride = null; 398 399// ------------------------------------------------------------------- 400 401function SetUpRegExp() { 402 %CheckIsBootstrapping(); 403 %FunctionSetInstanceClassName($RegExp, 'RegExp'); 404 %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM); 405 %SetCode($RegExp, RegExpConstructor); 406 407 InstallFunctions($RegExp.prototype, DONT_ENUM, $Array( 408 "exec", RegExpExec, 409 "test", RegExpTest, 410 "toString", RegExpToString, 411 "compile", RegExpCompile 412 )); 413 414 // The length of compile is 1 in SpiderMonkey. 415 %FunctionSetLength($RegExp.prototype.compile, 1); 416 417 // The properties input, $input, and $_ are aliases for each other. When this 418 // value is set the value it is set to is coerced to a string. 419 // Getter and setter for the input. 420 var RegExpGetInput = function() { 421 var regExpInput = LAST_INPUT(lastMatchInfo); 422 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; 423 }; 424 var RegExpSetInput = function(string) { 425 LAST_INPUT(lastMatchInfo) = ToString(string); 426 }; 427 428 %DefineOrRedefineAccessorProperty($RegExp, 'input', RegExpGetInput, 429 RegExpSetInput, DONT_DELETE); 430 %DefineOrRedefineAccessorProperty($RegExp, '$_', RegExpGetInput, 431 RegExpSetInput, DONT_ENUM | DONT_DELETE); 432 %DefineOrRedefineAccessorProperty($RegExp, '$input', RegExpGetInput, 433 RegExpSetInput, DONT_ENUM | DONT_DELETE); 434 435 // The properties multiline and $* are aliases for each other. When this 436 // value is set in SpiderMonkey, the value it is set to is coerced to a 437 // boolean. We mimic that behavior with a slight difference: in SpiderMonkey 438 // the value of the expression 'RegExp.multiline = null' (for instance) is the 439 // boolean false (i.e., the value after coercion), while in V8 it is the value 440 // null (i.e., the value before coercion). 441 442 // Getter and setter for multiline. 443 var multiline = false; 444 var RegExpGetMultiline = function() { return multiline; }; 445 var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; }; 446 447 %DefineOrRedefineAccessorProperty($RegExp, 'multiline', RegExpGetMultiline, 448 RegExpSetMultiline, DONT_DELETE); 449 %DefineOrRedefineAccessorProperty($RegExp, '$*', RegExpGetMultiline, 450 RegExpSetMultiline, 451 DONT_ENUM | DONT_DELETE); 452 453 454 var NoOpSetter = function(ignored) {}; 455 456 457 // Static properties set by a successful match. 458 %DefineOrRedefineAccessorProperty($RegExp, 'lastMatch', RegExpGetLastMatch, 459 NoOpSetter, DONT_DELETE); 460 %DefineOrRedefineAccessorProperty($RegExp, '$&', RegExpGetLastMatch, 461 NoOpSetter, DONT_ENUM | DONT_DELETE); 462 %DefineOrRedefineAccessorProperty($RegExp, 'lastParen', RegExpGetLastParen, 463 NoOpSetter, DONT_DELETE); 464 %DefineOrRedefineAccessorProperty($RegExp, '$+', RegExpGetLastParen, 465 NoOpSetter, DONT_ENUM | DONT_DELETE); 466 %DefineOrRedefineAccessorProperty($RegExp, 'leftContext', 467 RegExpGetLeftContext, NoOpSetter, 468 DONT_DELETE); 469 %DefineOrRedefineAccessorProperty($RegExp, '$`', RegExpGetLeftContext, 470 NoOpSetter, DONT_ENUM | DONT_DELETE); 471 %DefineOrRedefineAccessorProperty($RegExp, 'rightContext', 472 RegExpGetRightContext, NoOpSetter, 473 DONT_DELETE); 474 %DefineOrRedefineAccessorProperty($RegExp, "$'", RegExpGetRightContext, 475 NoOpSetter, DONT_ENUM | DONT_DELETE); 476 477 for (var i = 1; i < 10; ++i) { 478 %DefineOrRedefineAccessorProperty($RegExp, '$' + i, 479 RegExpMakeCaptureGetter(i), NoOpSetter, 480 DONT_DELETE); 481 } 482} 483 484SetUpRegExp(); 485