16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org************************************************************************** 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 2002-2013 International Business Machines Corporation * 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* and others. All rights reserved. * 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org************************************************************************** 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// file: rematch.cpp 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Contains the implementation of class RegexMatcher, 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// which is one of the main API classes for the ICU regular expression package. 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_REGULAR_EXPRESSIONS 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/regex.h" 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uniset.h" 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchar.h" 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h" 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/rbbi.h" 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf.h" 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h" 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uassert.h" 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h" 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvector.h" 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvectr32.h" 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvectr64.h" 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "regeximp.h" 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "regexst.h" 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "regextxt.h" 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucase.h" 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// #include <malloc.h> // Needed for heapcheck testing 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Find progress callback 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// ---------------------- 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Macro to inline test & call to ReportFindProgress(). Eliminates unnecessary function call. 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define REGEXFINDPROGRESS_INTERRUPT(pos, status) \ 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (fFindProgressCallbackFn != NULL) && (ReportFindProgress(pos, status) == FALSE) 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Smart Backtracking 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// ------------------ 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// When a failure would go back to a LOOP_C instruction, 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// strings, characters, and setrefs scan backwards for a valid start 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// character themselves, pop the stack, and save state, emulating the 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// LOOP_C's effect but assured that the next character of input is a 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// possible matching character. 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Good idea in theory; unfortunately it only helps out a few specific 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// cases and slows the engine down a little in the rest. 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Default limit for the size of the back track stack, to avoid system 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// failures causedby heap exhaustion. Units are in 32 bit words, not bytes. 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// This value puts ICU's limits higher than most other regexp implementations, 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// which use recursion rather than the heap, and take more storage per 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// backtrack point. 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const int32_t DEFAULT_BACKTRACK_STACK_CAPACITY = 8000000; 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Time limit counter constant. 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Time limits for expression evaluation are in terms of quanta of work by 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// the engine, each of which is 10,000 state saves. 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// This constant determines that state saves per tick number. 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const int32_t TIMER_INITIAL_VALUE = 10000; 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//----------------------------------------------------------------------------- 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Constructor and Destructor 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//----------------------------------------------------------------------------- 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher::RegexMatcher(const RegexPattern *pat) { 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fDeferredStatus = U_ZERO_ERROR; 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init(fDeferredStatus); 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pat==NULL) { 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fDeferredStatus = U_ILLEGAL_ARGUMENT_ERROR; 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPattern = pat; 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init2(RegexStaticSets::gStaticSets->fEmptyText, fDeferredStatus); 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher::RegexMatcher(const UnicodeString ®exp, const UnicodeString &input, 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t flags, UErrorCode &status) { 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init(status); 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPatternOwned = RegexPattern::compile(regexp, flags, pe, status); 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPattern = fPatternOwned; 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText inputText = UTEXT_INITIALIZER; 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openConstUnicodeString(&inputText, &input, &status); 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init2(&inputText, status); 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&inputText); 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputUniStrMaybeMutable = TRUE; 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher::RegexMatcher(UText *regexp, UText *input, 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t flags, UErrorCode &status) { 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init(status); 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPatternOwned = RegexPattern::compile(regexp, flags, pe, status); 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPattern = fPatternOwned; 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init2(input, status); 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher::RegexMatcher(const UnicodeString ®exp, 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t flags, UErrorCode &status) { 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init(status); 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPatternOwned = RegexPattern::compile(regexp, flags, pe, status); 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPattern = fPatternOwned; 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init2(RegexStaticSets::gStaticSets->fEmptyText, status); 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher::RegexMatcher(UText *regexp, 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t flags, UErrorCode &status) { 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init(status); 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError pe; 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPatternOwned = RegexPattern::compile(regexp, flags, pe, status); 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPattern = fPatternOwned; 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init2(RegexStaticSets::gStaticSets->fEmptyText, status); 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher::~RegexMatcher() { 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fStack; 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fData != fSmallData) { 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(fData); 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData = NULL; 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fPatternOwned) { 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fPatternOwned; 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPatternOwned = NULL; 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPattern = NULL; 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fInput) { 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fInput; 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fInputText) { 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(fInputText); 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fAltInputText) { 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(fAltInputText); 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org #if UCONFIG_NO_BREAK_ITERATION==0 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fWordBreakItr; 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org #endif 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// init() common initialization for use by all constructors. 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Initialize all fields, get the object into a consistent state. 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// This must be done even when the initial status shows an error, 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// so that the object is initialized sufficiently well for the destructor 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// to run safely. 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::init(UErrorCode &status) { 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPattern = NULL; 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPatternOwned = NULL; 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFrameSize = 0; 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRegionStart = 0; 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRegionLimit = 0; 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAnchorStart = 0; 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAnchorLimit = 0; 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fLookStart = 0; 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fLookLimit = 0; 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveStart = 0; 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = 0; 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fTransparentBounds = FALSE; 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAnchoringBounds = TRUE; 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatchStart = 0; 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatchEnd = 0; 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fLastMatchEnd = -1; 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAppendPosition = 0; 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = FALSE; 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = FALSE; 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fStack = NULL; 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFrame = NULL; 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fTimeLimit = 0; 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fTime = 0; 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fTickCounter = 0; 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fStackLimit = DEFAULT_BACKTRACK_STACK_CAPACITY; 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fCallbackFn = NULL; 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fCallbackContext = NULL; 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFindProgressCallbackFn = NULL; 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFindProgressCallbackContext = NULL; 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fTraceDebug = FALSE; 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fDeferredStatus = status; 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData = fSmallData; 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fWordBreakItr = NULL; 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fStack = NULL; 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputText = NULL; 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAltInputText = NULL; 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInput = NULL; 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputLength = 0; 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputUniStrMaybeMutable = FALSE; 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fDeferredStatus = status; 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// init2() Common initialization for use by RegexMatcher constructors, part 2. 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// This handles the common setup to be done after the Pattern is available. 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::init2(UText *input, UErrorCode &status) { 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fDeferredStatus = status; 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(fSmallData[0]))) { 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData = (int64_t *)uprv_malloc(fPattern->fDataSize * sizeof(int64_t)); 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fData == NULL) { 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fStack = new UVector64(status); 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fStack == NULL) { 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(input); 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org setStackLimit(DEFAULT_BACKTRACK_STACK_CAPACITY, status); 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fDeferredStatus = status; 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar BACKSLASH = 0x5c; 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar DOLLARSIGN = 0x24; 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// appendReplacement 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::appendReplacement(UnicodeString &dest, 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString &replacement, 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) { 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText replacementText = UTEXT_INITIALIZER; 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openConstUnicodeString(&replacementText, &replacement, &status); 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(status)) { 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText resultText = UTEXT_INITIALIZER; 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUnicodeString(&resultText, &dest, &status); 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(status)) { 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org appendReplacement(&resultText, &replacementText, status); 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&resultText); 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&replacementText); 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// appendReplacement, UText mode 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::appendReplacement(UText *dest, 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText *replacement, 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) { 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch == FALSE) { 3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_REGEX_INVALID_STATE; 3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Copy input string from the end of previous match to start of current match 3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t destLen = utext_nativeLength(dest); 3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatchStart > fAppendPosition) { 3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destLen += utext_replace(dest, destLen, destLen, fInputText->chunkContents+fAppendPosition, 3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (int32_t)(fMatchStart-fAppendPosition), &status); 3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t len16; 3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_USES_U16(fInputText)) { 3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len16 = (int32_t)(fMatchStart-fAppendPosition); 3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode lengthStatus = U_ZERO_ERROR; 3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len16 = utext_extract(fInputText, fAppendPosition, fMatchStart, NULL, 0, &lengthStatus); 3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1)); 3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (inputChars == NULL) { 3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_extract(fInputText, fAppendPosition, fMatchStart, inputChars, len16+1, &status); 3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destLen += utext_replace(dest, destLen, destLen, inputChars, len16, &status); 3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(inputChars); 3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAppendPosition = fMatchEnd; 3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // scan the replacement text, looking for substitutions ($n) and \escapes. 3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TODO: optimize this loop by efficiently scanning for '$' or '\', 3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // move entire ranges not containing substitutions. 3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(replacement, 0); 3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_NEXT32(replacement); 3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (c != U_SENTINEL) { 3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == BACKSLASH) { 3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Backslash Escape. Copy the following char out without further checks. 3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note: Surrogate pairs don't need any special handling 3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The second half wont be a '$' or a '\', and 3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // will move to the dest normally on the next 3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // loop iteration. 3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_CURRENT32(replacement); 3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == U_SENTINEL) { 3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c==0x55/*U*/ || c==0x75/*u*/) { 3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We have a \udddd or \Udddddddd escape sequence. 3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t offset = 0; 3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org struct URegexUTextUnescapeCharContext context = U_REGEX_UTEXT_UNESCAPE_CONTEXT(replacement); 3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 escapedChar = u_unescapeAt(uregex_utext_unescape_charAt, &offset, INT32_MAX, &context); 3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (escapedChar != (UChar32)0xFFFFFFFF) { 3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_IS_BMP(escapedChar)) { 3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c16 = (UChar)escapedChar; 3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status); 3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar surrogate[2]; 3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org surrogate[0] = U16_LEAD(escapedChar); 3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org surrogate[1] = U16_TRAIL(escapedChar); 3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(status)) { 3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status); 3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TODO: Report errors for mal-formed \u escapes? 3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // As this is, the original sequence is output, which may be OK. 3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (context.lastOffset == offset) { 3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (void)UTEXT_PREVIOUS32(replacement); 3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (context.lastOffset != offset-1) { 3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_moveIndex32(replacement, offset - context.lastOffset - 1); 3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (void)UTEXT_NEXT32(replacement); 3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Plain backslash escape. Just put out the escaped character. 3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_IS_BMP(c)) { 3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c16 = (UChar)c; 3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status); 3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar surrogate[2]; 4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org surrogate[0] = U16_LEAD(c); 4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org surrogate[1] = U16_TRAIL(c); 4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(status)) { 4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status); 4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (c != DOLLARSIGN) { 4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Normal char, not a $. Copy it out without further checks. 4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_IS_BMP(c)) { 4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c16 = (UChar)c; 4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status); 4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar surrogate[2]; 4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org surrogate[0] = U16_LEAD(c); 4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org surrogate[1] = U16_TRAIL(c); 4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(status)) { 4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status); 4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We've got a $. Pick up a capture group number if one follows. 4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Consume at most the number of digits necessary for the largest capture 4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // number that is valid for this pattern. 4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t numDigits = 0; 4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t groupNum = 0; 4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 digitC; 4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org digitC = UTEXT_CURRENT32(replacement); 4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (digitC == U_SENTINEL) { 4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (u_isdigit(digitC) == FALSE) { 4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (void)UTEXT_NEXT32(replacement); 4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org groupNum=groupNum*10 + u_charDigitValue(digitC); 4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org numDigits++; 4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (numDigits >= fPattern->fMaxCaptureDigits) { 4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (numDigits == 0) { 4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The $ didn't introduce a group number at all. 4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Treat it as just part of the substitution text. 4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c16 = DOLLARSIGN; 4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status); 4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Finally, append the capture group data to the destination. 4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destLen += appendGroup(groupNum, dest, status); 4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Can fail if group number is out of range. 4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(replacement); 4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// appendTail Intended to be used in conjunction with appendReplacement() 4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// To the destination string, append everything following 4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// the last match position from the input string. 4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Note: Match ranges do not affect appendTail or appendReplacement 4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString &RegexMatcher::appendTail(UnicodeString &dest) { 4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText resultText = UTEXT_INITIALIZER; 4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUnicodeString(&resultText, &dest, &status); 4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(status)) { 4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org appendTail(&resultText, status); 4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&resultText); 4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// appendTail, UText mode 4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::appendTail(UText *dest, UErrorCode &status) { 4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool bailOut = FALSE; 4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bailOut = TRUE; 5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bailOut = TRUE; 5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (bailOut) { 5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // dest must not be NULL 5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest) { 5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest, utext_nativeLength(dest), utext_nativeLength(dest), NULL, 0, &status); 5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fInputLength > fAppendPosition) { 5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t destLen = utext_nativeLength(dest); 5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest, destLen, destLen, fInputText->chunkContents+fAppendPosition, 5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (int32_t)(fInputLength-fAppendPosition), &status); 5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t len16; 5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_USES_U16(fInputText)) { 5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len16 = (int32_t)(fInputLength-fAppendPosition); 5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len16 = utext_extract(fInputText, fAppendPosition, fInputLength, NULL, 0, &status); 5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; // buffer overflow 5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16)); 5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (inputChars == NULL) { 5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_extract(fInputText, fAppendPosition, fInputLength, inputChars, len16, &status); // unterminated 5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t destLen = utext_nativeLength(dest); 5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest, destLen, destLen, inputChars, len16, &status); 5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(inputChars); 5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// end 5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::end(UErrorCode &err) const { 5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return end(0, err); 5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t RegexMatcher::end64(UErrorCode &err) const { 5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return end64(0, err); 5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t RegexMatcher::end64(int32_t group, UErrorCode &err) const { 5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(err)) { 5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return -1; 5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch == FALSE) { 5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org err = U_REGEX_INVALID_STATE; 5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return -1; 5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (group < 0 || group > fPattern->fGroupMap->size()) { 5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org err = U_INDEX_OUTOFBOUNDS_ERROR; 5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return -1; 5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t e = -1; 5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (group == 0) { 5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org e = fMatchEnd; 5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Get the position within the stack frame of the variables for 5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // this capture group. 5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t groupOffset = fPattern->fGroupMap->elementAti(group-1); 5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(groupOffset < fPattern->fFrameSize); 5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(groupOffset >= 0); 5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org e = fFrame->fExtra[groupOffset + 1]; 5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return e; 5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::end(int32_t group, UErrorCode &err) const { 5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (int32_t)end64(group, err); 5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// find() 5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::find() { 5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Start at the position of the last match end. (Will be zero if the 5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // matcher has been reset.) 5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return findUsingChunk(); 6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t startPos = fMatchEnd; 6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos==0) { 6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org startPos = fActiveStart; 6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch) { 6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save the position of any previous successful match. 6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fLastMatchEnd = fMatchEnd; 6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatchStart == fMatchEnd) { 6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Previous match had zero length. Move start position up one position 6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // to avoid sending find() into a loop on zero-length matches. 6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos >= fActiveLimit) { 6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, startPos); 6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (void)UTEXT_NEXT32(fInputText); 6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org startPos = UTEXT_GETNATIVEINDEX(fInputText); 6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fLastMatchEnd >= 0) { 6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // A previous find() failed to match. Don't try again. 6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (without this test, a pattern with a zero-length match 6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // could match again at the end of an input string.) 6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Compute the position in the input string beyond which a match can not begin, because 6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the minimum length match would extend past the end of the input. 6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note: some patterns that cannot match anything will have fMinMatchLength==Max Int. 6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Be aware of possible overflows if making changes here. 6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t testStartLimit; 6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_USES_U16(fInputText)) { 6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org testStartLimit = fActiveLimit - fPattern->fMinMatchLen; 6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos > testStartLimit) { 6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // For now, let the matcher discover that it can't match on its own 6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We don't know how long the match len is in native characters 6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org testStartLimit = fActiveLimit; 6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(startPos >= 0); 6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (fPattern->fStartType) { 6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case START_NO_INFO: 6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // No optimization was found. 6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Try a match at each input position. 6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchAt(startPos, FALSE, fDeferredStatus); 6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch) { 6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos >= testStartLimit) { 6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, startPos); 6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (void)UTEXT_NEXT32(fInputText); 6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org startPos = UTEXT_GETNATIVEINDEX(fInputText); 6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note that it's perfectly OK for a pattern to have a zero-length 6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match at the end of a string, so we must make sure that the loop 6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // runs with startPos == testStartLimit the last time through. 6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(FALSE); 6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case START_START: 6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Matches are only possible at the start of the input string 6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (pattern begins with ^ or \A) 6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos > fActiveStart) { 6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchAt(startPos, FALSE, fDeferredStatus); 6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fMatch; 6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case START_SET: 7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Match may start on any char from a pre-computed set. 7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fPattern->fMinMatchLen > 0); 7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t pos; 7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, startPos); 7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fInputText); 7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pos = UTEXT_GETNATIVEINDEX(fInputText); 7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // c will be -1 (U_SENTINEL) at end of text, in which case we 7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // skip this next block (so we don't have a negative array index) 7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // and handle end of text in the following block. 7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c >= 0 && ((c<256 && fPattern->fInitialChars8->contains(c)) || 7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (c>=256 && fPattern->fInitialChars->contains(c)))) { 7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchAt(startPos, FALSE, fDeferredStatus); 7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch) { 7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, pos); 7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos >= testStartLimit) { 7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org startPos = pos; 7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(FALSE); 7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case START_STRING: 7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case START_CHAR: 7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Match starts on exactly one char. 7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fPattern->fMinMatchLen > 0); 7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 theChar = fPattern->fInitialChar; 7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t pos; 7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, startPos); 7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fInputText); 7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pos = UTEXT_GETNATIVEINDEX(fInputText); 7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == theChar) { 7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchAt(startPos, FALSE, fDeferredStatus); 7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch) { 7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, pos); 7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos >= testStartLimit) { 7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org startPos = pos; 7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(FALSE); 7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case START_LINE: 7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos == fAnchorStart) { 7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchAt(startPos, FALSE, fDeferredStatus); 7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch) { 7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, startPos); 7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fInputText); 7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org startPos = UTEXT_GETNATIVEINDEX(fInputText); 7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, startPos); 7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_PREVIOUS32(fInputText); 7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, startPos); 7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fPattern->fFlags & UREGEX_UNIX_LINES) { 7906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 7916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x0a) { 7926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchAt(startPos, FALSE, fDeferredStatus); 7936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 7946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 7956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch) { 7976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 7986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, startPos); 8006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos >= testStartLimit) { 8026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 8036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 8046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 8056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fInputText); 8076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org startPos = UTEXT_GETNATIVEINDEX(fInputText); 8086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note that it's perfectly OK for a pattern to have a zero-length 8096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match at the end of a string, so we must make sure that the loop 8106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // runs with startPos == testStartLimit the last time through. 8116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 8126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 8136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 8156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 8166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (((c & 0x7f) <= 0x29) && // First quickly bypass as many chars as possible 8176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029 )) { 8186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x0d && startPos < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) { 8196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (void)UTEXT_NEXT32(fInputText); 8206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org startPos = UTEXT_GETNATIVEINDEX(fInputText); 8216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchAt(startPos, FALSE, fDeferredStatus); 8236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 8246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 8256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch) { 8276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 8286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, startPos); 8306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos >= testStartLimit) { 8326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 8336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 8346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 8356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fInputText); 8376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org startPos = UTEXT_GETNATIVEINDEX(fInputText); 8386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note that it's perfectly OK for a pattern to have a zero-length 8396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match at the end of a string, so we must make sure that the loop 8406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // runs with startPos == testStartLimit the last time through. 8416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 8426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 8436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 8486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(FALSE); 8496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(FALSE); 8526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 8536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 8546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::find(int64_t start, UErrorCode &status) { 8586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 8596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 8606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 8626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 8636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 8646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org this->reset(); // Note: Reset() is specified by Java Matcher documentation. 8666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This will reset the region to be the full input length. 8676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (start < 0) { 8686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INDEX_OUTOFBOUNDS_ERROR; 8696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 8706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t nativeStart = start; 8736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (nativeStart < fActiveStart || nativeStart > fActiveLimit) { 8746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INDEX_OUTOFBOUNDS_ERROR; 8756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 8766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatchEnd = nativeStart; 8786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return find(); 8796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 8806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 8836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 8846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// findUsingChunk() -- like find(), but with the advance knowledge that the 8856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// entire string is available in the UText's chunk buffer. 8866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 8876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 8886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::findUsingChunk() { 8896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Start at the position of the last match end. (Will be zero if the 8906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // matcher has been reset. 8916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 8926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t startPos = (int32_t)fMatchEnd; 8946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos==0) { 8956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org startPos = (int32_t)fActiveStart; 8966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *inputBuf = fInputText->chunkContents; 8996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch) { 9016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save the position of any previous successful match. 9026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fLastMatchEnd = fMatchEnd; 9036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatchStart == fMatchEnd) { 9056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Previous match had zero length. Move start position up one position 9066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // to avoid sending find() into a loop on zero-length matches. 9076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos >= fActiveLimit) { 9086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 9096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 9106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 9116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_FWD_1(inputBuf, startPos, fInputLength); 9136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fLastMatchEnd >= 0) { 9166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // A previous find() failed to match. Don't try again. 9176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (without this test, a pattern with a zero-length match 9186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // could match again at the end of an input string.) 9196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 9206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 9216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Compute the position in the input string beyond which a match can not begin, because 9266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the minimum length match would extend past the end of the input. 9276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note: some patterns that cannot match anything will have fMinMatchLength==Max Int. 9286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Be aware of possible overflows if making changes here. 9296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t testLen = (int32_t)(fActiveLimit - fPattern->fMinMatchLen); 9306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos > testLen) { 9316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 9326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 9336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 9346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 9376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(startPos >= 0); 9386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (fPattern->fStartType) { 9406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case START_NO_INFO: 9416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // No optimization was found. 9426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Try a match at each input position. 9436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 9446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchChunkAt(startPos, FALSE, fDeferredStatus); 9456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 9466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 9476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch) { 9496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 9506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos >= testLen) { 9526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 9536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 9546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_FWD_1(inputBuf, startPos, fActiveLimit); 9566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note that it's perfectly OK for a pattern to have a zero-length 9576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match at the end of a string, so we must make sure that the loop 9586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // runs with startPos == testLen the last time through. 9596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 9606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 9616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(FALSE); 9636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case START_START: 9656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Matches are only possible at the start of the input string 9666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (pattern begins with ^ or \A) 9676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos > fActiveStart) { 9686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 9696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 9706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchChunkAt(startPos, FALSE, fDeferredStatus); 9726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 9736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 9746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fMatch; 9766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case START_SET: 9796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 9806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Match may start on any char from a pre-computed set. 9816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fPattern->fMinMatchLen > 0); 9826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 9836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t pos = startPos; 9846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, startPos, fActiveLimit, c); // like c = inputBuf[startPos++]; 9856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((c<256 && fPattern->fInitialChars8->contains(c)) || 9866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (c>=256 && fPattern->fInitialChars->contains(c))) { 9876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchChunkAt(pos, FALSE, fDeferredStatus); 9886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 9896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 9906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch) { 9926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 9936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pos >= testLen) { 9966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 9976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 9986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 9996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 10016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 10026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(FALSE); 10056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case START_STRING: 10076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case START_CHAR: 10086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 10096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Match starts on exactly one char. 10106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fPattern->fMinMatchLen > 0); 10116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 theChar = fPattern->fInitialChar; 10126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 10136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t pos = startPos; 10146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, startPos, fActiveLimit, c); // like c = inputBuf[startPos++]; 10156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == theChar) { 10166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchChunkAt(pos, FALSE, fDeferredStatus); 10176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 10186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 10196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch) { 10216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 10226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pos >= testLen) { 10256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 10266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 10276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 10286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 10306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 10316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(FALSE); 10346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case START_LINE: 10366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 10376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 10386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos == fAnchorStart) { 10396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchChunkAt(startPos, FALSE, fDeferredStatus); 10406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 10416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 10426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch) { 10446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 10456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_FWD_1(inputBuf, startPos, fActiveLimit); 10476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fPattern->fFlags & UREGEX_UNIX_LINES) { 10506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 10516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = inputBuf[startPos-1]; 10526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x0a) { 10536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchChunkAt(startPos, FALSE, fDeferredStatus); 10546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 10556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 10566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch) { 10586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 10596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos >= testLen) { 10626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 10636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 10646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 10656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_FWD_1(inputBuf, startPos, fActiveLimit); 10676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note that it's perfectly OK for a pattern to have a zero-length 10686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match at the end of a string, so we must make sure that the loop 10696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // runs with startPos == testLen the last time through. 10706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 10716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 10726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 10746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 10756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = inputBuf[startPos-1]; 10766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (((c & 0x7f) <= 0x29) && // First quickly bypass as many chars as possible 10776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029 )) { 10786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) { 10796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org startPos++; 10806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchChunkAt(startPos, FALSE, fDeferredStatus); 10826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 10836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 10846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch) { 10866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 10876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startPos >= testLen) { 10906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 10916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 10926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 10936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_FWD_1(inputBuf, startPos, fActiveLimit); 10956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note that it's perfectly OK for a pattern to have a zero-length 10966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match at the end of a string, so we must make sure that the loop 10976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // runs with startPos == testLen the last time through. 10986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 10996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 11006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 11056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(FALSE); 11066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(FALSE); 11096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 11106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 11116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 11156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 11166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// group() 11176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 11186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 11196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString RegexMatcher::group(UErrorCode &status) const { 11206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return group(0, status); 11216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 11226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Return immutable shallow clone 11246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::group(UText *dest, int64_t &group_len, UErrorCode &status) const { 11256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return group(0, dest, group_len, status); 11266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 11276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Return immutable shallow clone 11296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const { 11306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org group_len = 0; 11316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool bailOut = FALSE; 11326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 11336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 11346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 11366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 11376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bailOut = TRUE; 11386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch == FALSE) { 11406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_REGEX_INVALID_STATE; 11416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bailOut = TRUE; 11426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) { 11446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INDEX_OUTOFBOUNDS_ERROR; 11456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bailOut = TRUE; 11466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (bailOut) { 11496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (dest) ? dest : utext_openUChars(NULL, NULL, 0, &status); 11506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t s, e; 11536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (groupNum == 0) { 11546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org s = fMatchStart; 11556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org e = fMatchEnd; 11566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 11576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1); 11586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(groupOffset < fPattern->fFrameSize); 11596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(groupOffset >= 0); 11606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org s = fFrame->fExtra[groupOffset]; 11616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org e = fFrame->fExtra[groupOffset+1]; 11626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s < 0) { 11656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // A capture group wasn't part of the match 11666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return utext_clone(dest, fInputText, FALSE, TRUE, &status); 11676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(s <= e); 11696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org group_len = e - s; 11706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = utext_clone(dest, fInputText, FALSE, TRUE, &status); 11726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest) 11736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(dest, s); 11746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 11756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 11766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const { 11786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString result; 11796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 11806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return result; 11816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText resultText = UTEXT_INITIALIZER; 11836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUnicodeString(&resultText, &result, &status); 11846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org group(groupNum, &resultText, status); 11856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&resultText); 11866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return result; 11876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 11886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Return deep (mutable) clone 11916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Technology Preview (as an API), but note that the UnicodeString API is implemented 11926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// using this function. 11936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::group(int32_t groupNum, UText *dest, UErrorCode &status) const { 11946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool bailOut = FALSE; 11956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 11966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 11976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 11996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 12006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bailOut = TRUE; 12016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch == FALSE) { 12046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_REGEX_INVALID_STATE; 12056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bailOut = TRUE; 12066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) { 12086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INDEX_OUTOFBOUNDS_ERROR; 12096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bailOut = TRUE; 12106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (bailOut) { 12136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest) { 12146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status); 12156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 12166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 12176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return utext_openUChars(NULL, NULL, 0, &status); 12186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t s, e; 12226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (groupNum == 0) { 12236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org s = fMatchStart; 12246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org e = fMatchEnd; 12256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 12266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1); 12276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(groupOffset < fPattern->fFrameSize); 12286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(groupOffset >= 0); 12296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org s = fFrame->fExtra[groupOffset]; 12306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org e = fFrame->fExtra[groupOffset+1]; 12316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s < 0) { 12346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // A capture group wasn't part of the match 12356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest) { 12366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status); 12376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 12386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 12396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return utext_openUChars(NULL, NULL, 0, &status); 12406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(s <= e); 12436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 12456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(e <= fInputLength); 12466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest) { 12476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest, 0, utext_nativeLength(dest), fInputText->chunkContents+s, (int32_t)(e-s), &status); 12486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 12496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText groupText = UTEXT_INITIALIZER; 12506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUChars(&groupText, fInputText->chunkContents+s, e-s, &status); 12516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = utext_clone(NULL, &groupText, TRUE, FALSE, &status); 12526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&groupText); 12536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 12556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t len16; 12566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_USES_U16(fInputText)) { 12576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len16 = (int32_t)(e-s); 12586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 12596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode lengthStatus = U_ZERO_ERROR; 12606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len16 = utext_extract(fInputText, s, e, NULL, 0, &lengthStatus); 12616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *groupChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1)); 12636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (groupChars == NULL) { 12646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 12656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 12666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_extract(fInputText, s, e, groupChars, len16+1, &status); 12686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest) { 12706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest, 0, utext_nativeLength(dest), groupChars, len16, &status); 12716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 12726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText groupText = UTEXT_INITIALIZER; 12736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUChars(&groupText, groupChars, len16, &status); 12746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = utext_clone(NULL, &groupText, TRUE, FALSE, &status); 12756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&groupText); 12766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(groupChars); 12796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 12816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 12826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 12846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 12856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// appendGroup() -- currently internal only, appends a group to a UText rather 12866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// than replacing its contents 12876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 12886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 12896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t RegexMatcher::appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const { 12916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 12926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 12936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 12956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 12966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 12976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t destLen = utext_nativeLength(dest); 12996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch == FALSE) { 13016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_REGEX_INVALID_STATE; 13026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return utext_replace(dest, destLen, destLen, NULL, 0, &status); 13036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) { 13056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INDEX_OUTOFBOUNDS_ERROR; 13066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return utext_replace(dest, destLen, destLen, NULL, 0, &status); 13076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t s, e; 13106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (groupNum == 0) { 13116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org s = fMatchStart; 13126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org e = fMatchEnd; 13136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 13146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1); 13156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(groupOffset < fPattern->fFrameSize); 13166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(groupOffset >= 0); 13176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org s = fFrame->fExtra[groupOffset]; 13186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org e = fFrame->fExtra[groupOffset+1]; 13196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s < 0) { 13226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // A capture group wasn't part of the match 13236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return utext_replace(dest, destLen, destLen, NULL, 0, &status); 13246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(s <= e); 13266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t deltaLen; 13286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 13296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(e <= fInputLength); 13306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org deltaLen = utext_replace(dest, destLen, destLen, fInputText->chunkContents+s, (int32_t)(e-s), &status); 13316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 13326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t len16; 13336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_USES_U16(fInputText)) { 13346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len16 = (int32_t)(e-s); 13356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 13366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode lengthStatus = U_ZERO_ERROR; 13376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len16 = utext_extract(fInputText, s, e, NULL, 0, &lengthStatus); 13386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *groupChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1)); 13406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (groupChars == NULL) { 13416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 13426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 13436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_extract(fInputText, s, e, groupChars, len16+1, &status); 13456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org deltaLen = utext_replace(dest, destLen, destLen, groupChars, len16, &status); 13476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(groupChars); 13486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return deltaLen; 13506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 13516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 13556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 13566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// groupCount() 13576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 13586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 13596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::groupCount() const { 13606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fPattern->fGroupMap->size(); 13616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 13626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 13666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 13676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// hasAnchoringBounds() 13686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 13696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 13706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::hasAnchoringBounds() const { 13716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fAnchoringBounds; 13726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 13736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 13766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 13776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// hasTransparentBounds() 13786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 13796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 13806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::hasTransparentBounds() const { 13816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fTransparentBounds; 13826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 13836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 13876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 13886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// hitEnd() 13896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 13906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 13916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::hitEnd() const { 13926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fHitEnd; 13936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 13946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 13976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 13986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// input() 13996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 14006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 14016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UnicodeString &RegexMatcher::input() const { 14026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!fInput) { 14036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 14046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t len16; 14056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_USES_U16(fInputText)) { 14066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len16 = (int32_t)fInputLength; 14076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 14086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len16 = utext_extract(fInputText, 0, fInputLength, NULL, 0, &status); 14096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; // overflow, length status 14106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString *result = new UnicodeString(len16, 0, 0); 14126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *inputChars = result->getBuffer(len16); 14146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_extract(fInputText, 0, fInputLength, inputChars, len16, &status); // unterminated warning 14156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result->releaseBuffer(len16); 14166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (*(const UnicodeString **)&fInput) = result; // pointer assignment, rather than operator= 14186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *fInput; 14216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 14226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 14246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 14256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// inputText() 14266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 14276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 14286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::inputText() const { 14296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fInputText; 14306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 14316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 14346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 14356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// getInput() -- like inputText(), but makes a clone or copies into another UText 14366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 14376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 14386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::getInput (UText *dest, UErrorCode &status) const { 14396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool bailOut = FALSE; 14406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 14416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 14426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 14446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 14456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bailOut = TRUE; 14466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (bailOut) { 14496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest) { 14506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status); 14516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 14526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 14536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return utext_clone(NULL, fInputText, FALSE, TRUE, &status); 14546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest) { 14586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 14596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest, 0, utext_nativeLength(dest), fInputText->chunkContents, (int32_t)fInputLength, &status); 14606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 14616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t input16Len; 14626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_USES_U16(fInputText)) { 14636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org input16Len = (int32_t)fInputLength; 14646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 14656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode lengthStatus = U_ZERO_ERROR; 14666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org input16Len = utext_extract(fInputText, 0, fInputLength, NULL, 0, &lengthStatus); // buffer overflow error 14676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(input16Len)); 14696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (inputChars == NULL) { 14706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 14716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 14746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_extract(fInputText, 0, fInputLength, inputChars, input16Len, &status); // not terminated warning 14756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR; 14766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest, 0, utext_nativeLength(dest), inputChars, input16Len, &status); 14776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(inputChars); 14796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 14816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 14826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return utext_clone(NULL, fInputText, FALSE, TRUE, &status); 14836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 14856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool compat_SyncMutableUTextContents(UText *ut); 14886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool compat_SyncMutableUTextContents(UText *ut) { 14896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool retVal = FALSE; 14906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // In the following test, we're really only interested in whether the UText should switch 14926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // between heap and stack allocation. If length hasn't changed, we won't, so the chunkContents 14936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // will still point to the correct data. 14946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (utext_nativeLength(ut) != ut->nativeIndexingLimit) { 14956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString *us=(UnicodeString *)ut->context; 14966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Update to the latest length. 14986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // For example, (utext_nativeLength(ut) != ut->nativeIndexingLimit). 14996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t newLength = us->length(); 15006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Update the chunk description. 15026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The buffer may have switched between stack- and heap-based. 15036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ut->chunkContents = us->getBuffer(); 15046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ut->chunkLength = newLength; 15056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ut->chunkNativeLimit = newLength; 15066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ut->nativeIndexingLimit = newLength; 15076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retVal = TRUE; 15086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return retVal; 15116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 15126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 15146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 15156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// lookingAt() 15166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 15176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 15186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::lookingAt(UErrorCode &status) { 15196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 15206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 15216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 15236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 15246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 15256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fInputUniStrMaybeMutable) { 15286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (compat_SyncMutableUTextContents(fInputText)) { 15296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputLength = utext_nativeLength(fInputText); 15306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 15316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 15346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resetPreserveRegion(); 15356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 15376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchChunkAt((int32_t)fActiveStart, FALSE, status); 15386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 15396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchAt(fActiveStart, FALSE, status); 15406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fMatch; 15426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 15436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::lookingAt(int64_t start, UErrorCode &status) { 15466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 15476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 15486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 15506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 15516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 15526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 15546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (start < 0) { 15566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INDEX_OUTOFBOUNDS_ERROR; 15576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 15586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fInputUniStrMaybeMutable) { 15616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (compat_SyncMutableUTextContents(fInputText)) { 15626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputLength = utext_nativeLength(fInputText); 15636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 15646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t nativeStart; 15686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nativeStart = start; 15696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (nativeStart < fActiveStart || nativeStart > fActiveLimit) { 15706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INDEX_OUTOFBOUNDS_ERROR; 15716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 15726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 15756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchChunkAt((int32_t)nativeStart, FALSE, status); 15766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 15776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchAt(nativeStart, FALSE, status); 15786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fMatch; 15806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 15816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 15856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 15866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// matches() 15876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 15886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 15896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::matches(UErrorCode &status) { 15906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 15916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 15926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 15946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 15956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 15966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fInputUniStrMaybeMutable) { 15996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (compat_SyncMutableUTextContents(fInputText)) { 16006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputLength = utext_nativeLength(fInputText); 16016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 16026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else { 16056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resetPreserveRegion(); 16066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 16096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchChunkAt((int32_t)fActiveStart, TRUE, status); 16106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 16116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchAt(fActiveStart, TRUE, status); 16126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fMatch; 16146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 16156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::matches(int64_t start, UErrorCode &status) { 16186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 16196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 16206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 16226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 16236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 16246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 16266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (start < 0) { 16286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INDEX_OUTOFBOUNDS_ERROR; 16296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 16306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fInputUniStrMaybeMutable) { 16336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (compat_SyncMutableUTextContents(fInputText)) { 16346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputLength = utext_nativeLength(fInputText); 16356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 16366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t nativeStart; 16406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nativeStart = start; 16416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (nativeStart < fActiveStart || nativeStart > fActiveLimit) { 16426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INDEX_OUTOFBOUNDS_ERROR; 16436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 16446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 16476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchChunkAt((int32_t)nativeStart, TRUE, status); 16486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 16496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MatchAt(nativeStart, TRUE, status); 16506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fMatch; 16526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 16536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 16576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 16586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// pattern 16596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 16606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 16616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst RegexPattern &RegexMatcher::pattern() const { 16626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *fPattern; 16636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 16646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 16686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 16696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// region 16706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 16716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 16726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status) { 16736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 16746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 16756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (regionStart>regionLimit || regionStart<0 || regionLimit<0) { 16786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ILLEGAL_ARGUMENT_ERROR; 16796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t nativeStart = regionStart; 16826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t nativeLimit = regionLimit; 16836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (nativeStart > fInputLength || nativeLimit > fInputLength) { 16846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ILLEGAL_ARGUMENT_ERROR; 16856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startIndex == -1) 16886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org this->reset(); 16896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 16906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resetPreserveRegion(); 16916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRegionStart = nativeStart; 16936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRegionLimit = nativeLimit; 16946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveStart = nativeStart; 16956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = nativeLimit; 16966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startIndex != -1) { 16986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (startIndex < fActiveStart || startIndex > fActiveLimit) { 16996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INDEX_OUTOFBOUNDS_ERROR; 17006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatchEnd = startIndex; 17026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!fTransparentBounds) { 17056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fLookStart = nativeStart; 17066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fLookLimit = nativeLimit; 17076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fAnchoringBounds) { 17096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAnchorStart = nativeStart; 17106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAnchorLimit = nativeLimit; 17116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 17136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::region(int64_t start, int64_t limit, UErrorCode &status) { 17166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return region(start, limit, -1, status); 17176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 17206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 17216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// regionEnd 17226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 17236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 17246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::regionEnd() const { 17256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (int32_t)fRegionLimit; 17266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t RegexMatcher::regionEnd64() const { 17296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fRegionLimit; 17306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 17336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 17346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// regionStart 17356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 17366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 17376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::regionStart() const { 17386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (int32_t)fRegionStart; 17396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t RegexMatcher::regionStart64() const { 17426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fRegionStart; 17436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 17476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 17486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// replaceAll 17496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 17506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 17516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString RegexMatcher::replaceAll(const UnicodeString &replacement, UErrorCode &status) { 17526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText replacementText = UTEXT_INITIALIZER; 17536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText resultText = UTEXT_INITIALIZER; 17546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString resultString; 17556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 17566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return resultString; 17576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openConstUnicodeString(&replacementText, &replacement, &status); 17606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUnicodeString(&resultText, &resultString, &status); 17616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org replaceAll(&replacementText, &resultText, status); 17636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&resultText); 17656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&replacementText); 17666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return resultString; 17686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 17726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// replaceAll, UText mode 17736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 17746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::replaceAll(UText *replacement, UText *dest, UErrorCode &status) { 17756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 17766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 17776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 17796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 17806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 17816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest == NULL) { 17846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString emptyString; 17856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText empty = UTEXT_INITIALIZER; 17866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUnicodeString(&empty, &emptyString, &status); 17886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = utext_clone(NULL, &empty, TRUE, FALSE, &status); 17896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&empty); 17906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(status)) { 17936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 17946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (find()) { 17956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org appendReplacement(dest, replacement, status); 17966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 17976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 17986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org appendTail(dest, status); 18016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 18046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 18056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 18086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 18096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// replaceFirst 18106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 18116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 18126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString RegexMatcher::replaceFirst(const UnicodeString &replacement, UErrorCode &status) { 18136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText replacementText = UTEXT_INITIALIZER; 18146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText resultText = UTEXT_INITIALIZER; 18156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString resultString; 18166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openConstUnicodeString(&replacementText, &replacement, &status); 18186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUnicodeString(&resultText, &resultString, &status); 18196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org replaceFirst(&replacementText, &resultText, status); 18216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&resultText); 18236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&replacementText); 18246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return resultString; 18266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 18276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 18296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// replaceFirst, UText mode 18306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 18316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUText *RegexMatcher::replaceFirst(UText *replacement, UText *dest, UErrorCode &status) { 18326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 18336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 18346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 18366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 18376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 18386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 18416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!find()) { 18426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return getInput(dest, status); 18436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest == NULL) { 18466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString emptyString; 18476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText empty = UTEXT_INITIALIZER; 18486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUnicodeString(&empty, &emptyString, &status); 18506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest = utext_clone(NULL, &empty, TRUE, FALSE, &status); 18516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&empty); 18526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org appendReplacement(dest, replacement, status); 18556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org appendTail(dest, status); 18566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return dest; 18586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 18596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 18626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 18636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// requireEnd 18646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 18656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 18666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::requireEnd() const { 18676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fRequireEnd; 18686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 18696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 18726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 18736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// reset 18746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 18756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 18766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::reset() { 18776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRegionStart = 0; 18786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRegionLimit = fInputLength; 18796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveStart = 0; 18806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = fInputLength; 18816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAnchorStart = 0; 18826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAnchorLimit = fInputLength; 18836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fLookStart = 0; 18846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fLookLimit = fInputLength; 18856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org resetPreserveRegion(); 18866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 18876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 18886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::resetPreserveRegion() { 18926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatchStart = 0; 18936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatchEnd = 0; 18946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fLastMatchEnd = -1; 18956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAppendPosition = 0; 18966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = FALSE; 18976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = FALSE; 18986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = FALSE; 18996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fTime = 0; 19006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fTickCounter = TIMER_INITIAL_VALUE; 19016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org //resetStack(); // more expensive than it looks... 19026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 19036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::reset(const UnicodeString &input) { 19066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputText = utext_openConstUnicodeString(fInputText, &input, &fDeferredStatus); 19076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fPattern->fNeedsAltInput) { 19086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAltInputText = utext_clone(fAltInputText, fInputText, FALSE, TRUE, &fDeferredStatus); 19096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputLength = utext_nativeLength(fInputText); 19116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 19136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fInput; 19146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInput = NULL; 19156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Do the following for any UnicodeString. 19176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This is for compatibility for those clients who modify the input string "live" during regex operations. 19186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputUniStrMaybeMutable = TRUE; 19196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fWordBreakItr != NULL) { 19216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if UCONFIG_NO_BREAK_ITERATION==0 19226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 19236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fWordBreakItr->setText(fInputText, status); 19246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 19256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 19276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 19286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::reset(UText *input) { 19316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fInputText != input) { 19326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputText = utext_clone(fInputText, input, FALSE, TRUE, &fDeferredStatus); 19336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fPattern->fNeedsAltInput) fAltInputText = utext_clone(fAltInputText, fInputText, FALSE, TRUE, &fDeferredStatus); 19346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputLength = utext_nativeLength(fInputText); 19356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fInput; 19376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInput = NULL; 19386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fWordBreakItr != NULL) { 19406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if UCONFIG_NO_BREAK_ITERATION==0 19416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 19426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fWordBreakItr->setText(input, status); 19436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 19446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 19476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputUniStrMaybeMutable = FALSE; 19486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 19506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 19516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*RegexMatcher &RegexMatcher::reset(const UChar *) { 19536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fDeferredStatus = U_INTERNAL_PROGRAM_ERROR; 19546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 19556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}*/ 19566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::reset(int64_t position, UErrorCode &status) { 19586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 19596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 19606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); // Reset also resets the region to be the entire string. 19626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (position < 0 || position > fActiveLimit) { 19646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INDEX_OUTOFBOUNDS_ERROR; 19656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 19666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatchEnd = position; 19686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 19696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 19706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 19736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 19746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// refresh 19756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 19766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 19776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::refreshInputText(UText *input, UErrorCode &status) { 19786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 19796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 19806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (input == NULL) { 19826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ILLEGAL_ARGUMENT_ERROR; 19836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 19846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (utext_nativeLength(fInputText) != utext_nativeLength(input)) { 19866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ILLEGAL_ARGUMENT_ERROR; 19876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 19886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t pos = utext_getNativeIndex(fInputText); 19906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Shallow read-only clone of the new UText into the existing input UText 19916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fInputText = utext_clone(fInputText, input, FALSE, TRUE, &status); 19926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 19936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 19946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(fInputText, pos); 19966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fAltInputText != NULL) { 19986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pos = utext_getNativeIndex(fAltInputText); 19996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAltInputText = utext_clone(fAltInputText, input, FALSE, TRUE, &status); 20006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 20016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 20026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(fAltInputText, pos); 20046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 20066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 20076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 20116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 20126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// setTrace 20136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 20146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 20156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::setTrace(UBool state) { 20166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fTraceDebug = state; 20176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 20186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------- 20226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 20236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// split 20246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 20256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//--------------------------------------------------------------------- 20266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::split(const UnicodeString &input, 20276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString dest[], 20286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t destCapacity, 20296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) 20306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 20316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText inputText = UTEXT_INITIALIZER; 20326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openConstUnicodeString(&inputText, &input, &status); 20336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 20346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 20356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText **destText = (UText **)uprv_malloc(sizeof(UText*)*destCapacity); 20386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (destText == NULL) { 20396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 20406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 20416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 20436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i = 0; i < destCapacity; i++) { 20446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destText[i] = utext_openUnicodeString(NULL, &dest[i], &status); 20456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t fieldCount = split(&inputText, destText, destCapacity, status); 20486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i = 0; i < destCapacity; i++) { 20506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(destText[i]); 20516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(destText); 20546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&inputText); 20556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fieldCount; 20566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 20576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 20596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// split, UText mode 20606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 20616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::split(UText *input, 20626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText *dest[], 20636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t destCapacity, 20646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) 20656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 20666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 20676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Check arguements for validity 20686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 20696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 20706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 20716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }; 20726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (destCapacity < 1) { 20746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ILLEGAL_ARGUMENT_ERROR; 20756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 20766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 20796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Reset for the input text 20806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 20816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(input); 20826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t nextOutputStringStart = 0; 20836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fActiveLimit == 0) { 20846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 20856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 20886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop through the input text, searching for the delimiter pattern 20896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 20906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 20916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t numCaptureGroups = fPattern->fGroupMap->size(); 20926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; ; i++) { 20936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (i>=destCapacity-1) { 20946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // There is one or zero output string left. 20956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fill the last output string with whatever is left from the input, then exit the loop. 20966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // ( i will be == destCapacity if we filled the output array while processing 20976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // capture groups of the delimiter expression, in which case we will discard the 20986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // last capture group saved in favor of the unprocessed remainder of the 20996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // input string.) 21006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i = destCapacity-1; 21016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fActiveLimit > nextOutputStringStart) { 21026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) { 21036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest[i]) { 21046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest[i], 0, utext_nativeLength(dest[i]), 21056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org input->chunkContents+nextOutputStringStart, 21066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (int32_t)(fActiveLimit-nextOutputStringStart), &status); 21076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 21086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText remainingText = UTEXT_INITIALIZER; 21096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart, 21106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit-nextOutputStringStart, &status); 21116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 21126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&remainingText); 21136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 21156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode lengthStatus = U_ZERO_ERROR; 21166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t remaining16Length = 21176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_extract(input, nextOutputStringStart, fActiveLimit, NULL, 0, &lengthStatus); 21186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1)); 21196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (remainingChars == NULL) { 21206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 21216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 21226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_extract(input, nextOutputStringStart, fActiveLimit, remainingChars, remaining16Length+1, &status); 21256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest[i]) { 21266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status); 21276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 21286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText remainingText = UTEXT_INITIALIZER; 21296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUChars(&remainingText, remainingChars, remaining16Length, &status); 21306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 21316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&remainingText); 21326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(remainingChars); 21356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 21386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (find()) { 21406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We found another delimiter. Move everything from where we started looking 21416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // up until the start of the delimiter into the next output string. 21426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) { 21436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest[i]) { 21446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest[i], 0, utext_nativeLength(dest[i]), 21456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org input->chunkContents+nextOutputStringStart, 21466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (int32_t)(fMatchStart-nextOutputStringStart), &status); 21476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 21486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText remainingText = UTEXT_INITIALIZER; 21496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart, 21506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatchStart-nextOutputStringStart, &status); 21516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 21526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&remainingText); 21536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 21556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode lengthStatus = U_ZERO_ERROR; 21566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t remaining16Length = utext_extract(input, nextOutputStringStart, fMatchStart, NULL, 0, &lengthStatus); 21576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1)); 21586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (remainingChars == NULL) { 21596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 21606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 21616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_extract(input, nextOutputStringStart, fMatchStart, remainingChars, remaining16Length+1, &status); 21636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest[i]) { 21646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status); 21656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 21666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText remainingText = UTEXT_INITIALIZER; 21676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUChars(&remainingText, remainingChars, remaining16Length, &status); 21686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 21696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&remainingText); 21706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(remainingChars); 21736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextOutputStringStart = fMatchEnd; 21756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If the delimiter pattern has capturing parentheses, the captured 21776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // text goes out into the next n destination strings. 21786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t groupNum; 21796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) { 21806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (i >= destCapacity-2) { 21816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Never fill the last available output string with capture group text. 21826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // It will filled with the last field, the remainder of the 21836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // unsplit input text. 21846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 21856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i++; 21876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[i] = group(groupNum, dest[i], status); 21886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (nextOutputStringStart == fActiveLimit) { 21916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The delimiter was at the end of the string. We're done, but first 21926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // we output one last empty string, for the empty field following 21936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the delimiter at the end of input. 21946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (i+1 < destCapacity) { 21956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++i; 21966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest[i] == NULL) { 21976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[i] = utext_openUChars(NULL, NULL, 0, &status); 21986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 21996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org static UChar emptyString[] = {(UChar)0}; 22006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest[i], 0, utext_nativeLength(dest[i]), emptyString, 0, &status); 22016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 22046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 22086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 22096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We ran off the end of the input while looking for the next delimiter. 22106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // All the remaining text goes into the current output string. 22116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) { 22126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest[i]) { 22136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest[i], 0, utext_nativeLength(dest[i]), 22146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org input->chunkContents+nextOutputStringStart, 22156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (int32_t)(fActiveLimit-nextOutputStringStart), &status); 22166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 22176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText remainingText = UTEXT_INITIALIZER; 22186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart, 22196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit-nextOutputStringStart, &status); 22206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 22216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&remainingText); 22226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 22246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode lengthStatus = U_ZERO_ERROR; 22256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t remaining16Length = utext_extract(input, nextOutputStringStart, fActiveLimit, NULL, 0, &lengthStatus); 22266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1)); 22276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (remainingChars == NULL) { 22286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 22296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 22306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_extract(input, nextOutputStringStart, fActiveLimit, remainingChars, remaining16Length+1, &status); 22336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (dest[i]) { 22346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status); 22356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 22366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UText remainingText = UTEXT_INITIALIZER; 22376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_openUChars(&remainingText, remainingChars, remaining16Length, &status); 22386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 22396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_close(&remainingText); 22406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(remainingChars); 22436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 22456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 22476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 22486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } // end of for loop 22506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return i+1; 22516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 22526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 22556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 22566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// start 22576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 22586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 22596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::start(UErrorCode &status) const { 22606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return start(0, status); 22616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 22626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t RegexMatcher::start64(UErrorCode &status) const { 22646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return start64(0, status); 22656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 22666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 22686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 22696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// start(int32_t group, UErrorCode &status) 22706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 22716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 22726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint64_t RegexMatcher::start64(int32_t group, UErrorCode &status) const { 22746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 22756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return -1; 22766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 22786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 22796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return -1; 22806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fMatch == FALSE) { 22826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_REGEX_INVALID_STATE; 22836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return -1; 22846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (group < 0 || group > fPattern->fGroupMap->size()) { 22866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INDEX_OUTOFBOUNDS_ERROR; 22876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return -1; 22886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t s; 22906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (group == 0) { 22916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org s = fMatchStart; 22926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 22936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t groupOffset = fPattern->fGroupMap->elementAti(group-1); 22946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(groupOffset < fPattern->fFrameSize); 22956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(groupOffset >= 0); 22966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org s = fFrame->fExtra[groupOffset]; 22976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return s; 23006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 23016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::start(int32_t group, UErrorCode &status) const { 23046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (int32_t)start64(group, status); 23056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 23066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 23086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 23096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// useAnchoringBounds 23106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 23116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 23126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::useAnchoringBounds(UBool b) { 23136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAnchoringBounds = b; 23146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAnchorStart = (fAnchoringBounds ? fRegionStart : 0); 23156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fAnchorLimit = (fAnchoringBounds ? fRegionLimit : fInputLength); 23166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 23176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 23186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 23216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 23226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// useTransparentBounds 23236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 23246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 23256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRegexMatcher &RegexMatcher::useTransparentBounds(UBool b) { 23266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fTransparentBounds = b; 23276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fLookStart = (fTransparentBounds ? 0 : fRegionStart); 23286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fLookLimit = (fTransparentBounds ? fInputLength : fRegionLimit); 23296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 23306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 23316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 23336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 23346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// setTimeLimit 23356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 23366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 23376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::setTimeLimit(int32_t limit, UErrorCode &status) { 23386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 23396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 23406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 23426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 23436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 23446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (limit < 0) { 23466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ILLEGAL_ARGUMENT_ERROR; 23476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 23486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fTimeLimit = limit; 23506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 23516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 23546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 23556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// getTimeLimit 23566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 23576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 23586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::getTimeLimit() const { 23596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fTimeLimit; 23606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 23616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 23646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 23656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// setStackLimit 23666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 23676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 23686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::setStackLimit(int32_t limit, UErrorCode &status) { 23696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 23706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 23716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 23736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = fDeferredStatus; 23746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 23756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (limit < 0) { 23776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ILLEGAL_ARGUMENT_ERROR; 23786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 23796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Reset the matcher. This is needed here in case there is a current match 23826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // whose final stack frame (containing the match results, pointed to by fFrame) 23836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // would be lost by resizing to a smaller stack size. 23846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 23856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (limit == 0) { 23876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Unlimited stack expansion 23886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fStack->setMaxCapacity(0); 23896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 23906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Change the units of the limit from bytes to ints, and bump the size up 23916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // to be big enough to hold at least one stack frame for the pattern, 23926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // if it isn't there already. 23936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t adjustedLimit = limit / sizeof(int32_t); 23946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (adjustedLimit < fPattern->fFrameSize) { 23956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org adjustedLimit = fPattern->fFrameSize; 23966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fStack->setMaxCapacity(adjustedLimit); 23986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fStackLimit = limit; 24006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 24016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 24046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 24056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// getStackLimit 24066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 24076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 24086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t RegexMatcher::getStackLimit() const { 24096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fStackLimit; 24106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 24116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 24146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 24156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// setMatchCallback 24166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 24176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 24186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::setMatchCallback(URegexMatchCallback *callback, 24196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const void *context, 24206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) { 24216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 24226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 24236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 24246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fCallbackFn = callback; 24256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fCallbackContext = context; 24266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 24276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 24306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 24316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// getMatchCallback 24326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 24336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 24346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::getMatchCallback(URegexMatchCallback *&callback, 24356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const void *&context, 24366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) { 24376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 24386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 24396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 24406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org callback = fCallbackFn; 24416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org context = fCallbackContext; 24426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 24436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 24466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 24476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// setMatchCallback 24486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 24496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 24506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::setFindProgressCallback(URegexFindProgressCallback *callback, 24516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const void *context, 24526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) { 24536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 24546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 24556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 24566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFindProgressCallbackFn = callback; 24576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFindProgressCallbackContext = context; 24586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 24596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 24626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 24636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// getMatchCallback 24646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 24656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 24666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::getFindProgressCallback(URegexFindProgressCallback *&callback, 24676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const void *&context, 24686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) { 24696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 24706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 24716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 24726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org callback = fFindProgressCallbackFn; 24736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org context = fFindProgressCallbackContext; 24746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 24756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//================================================================================ 24786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 24796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Code following this point in this file is the internal 24806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Match Engine Implementation. 24816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 24826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//================================================================================ 24836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 24866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 24876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// resetStack 24886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Discard any previous contents of the state save stack, and initialize a 24896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// new stack frame to all -1. The -1s are needed for capture group limits, 24906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// where they indicate that a group has not yet matched anything. 24916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 24926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgREStackFrame *RegexMatcher::resetStack() { 24936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Discard any previous contents of the state save stack, and initialize a 24946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // new stack frame with all -1 data. The -1s are needed for capture group limits, 24956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // where they indicate that a group has not yet matched anything. 24966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fStack->removeAllElements(); 24976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REStackFrame *iFrame = (REStackFrame *)fStack->reserveBlock(fPattern->fFrameSize, fDeferredStatus); 24996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 25006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<fPattern->fFrameSize-RESTACKFRAME_HDRCOUNT; i++) { 25016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org iFrame->fExtra[i] = -1; 25026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return iFrame; 25046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 25056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 25096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 25106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// isWordBoundary 25116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// in perl, "xab..cd..", \b is true at positions 0,3,5,7 25126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// For us, 25136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// If the current char is a combining mark, 25146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// \b is FALSE. 25156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Else Scan backwards to the first non-combining char. 25166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// We are at a boundary if the this char and the original chars are 25176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// opposite in membership in \w set 25186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 25196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// parameters: pos - the current position in the input buffer 25206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 25216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// TODO: double-check edge cases at region boundaries. 25226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 25236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 25246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::isWordBoundary(int64_t pos) { 25256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool isBoundary = FALSE; 25266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool cIsWord = FALSE; 25276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pos >= fLookLimit) { 25296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 25306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 25316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Determine whether char c at current position is a member of the word set of chars. 25326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If we're off the end of the string, behave as though we're not at a word char. 25336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, pos); 25346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_CURRENT32(fInputText); 25356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND) || u_charType(c) == U_FORMAT_CHAR) { 25366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Current char is a combining one. Not a boundary. 25376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 25386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(c); 25406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Back up until we come to a non-combining char, determine whether 25436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // that char is a word char. 25446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool prevCIsWord = FALSE; 25456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 25466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_GETNATIVEINDEX(fInputText) <= fLookStart) { 25476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 25486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 prevChar = UTEXT_PREVIOUS32(fInputText); 25506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!(u_hasBinaryProperty(prevChar, UCHAR_GRAPHEME_EXTEND) 25516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org || u_charType(prevChar) == U_FORMAT_CHAR)) { 25526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevCIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(prevChar); 25536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 25546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isBoundary = cIsWord ^ prevCIsWord; 25576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return isBoundary; 25586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 25596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::isChunkWordBoundary(int32_t pos) { 25616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool isBoundary = FALSE; 25626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool cIsWord = FALSE; 25636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *inputBuf = fInputText->chunkContents; 25656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pos >= fLookLimit) { 25676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 25686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 25696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Determine whether char c at current position is a member of the word set of chars. 25706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If we're off the end of the string, behave as though we're not at a word char. 25716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 25726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_GET(inputBuf, fLookStart, pos, fLookLimit, c); 25736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND) || u_charType(c) == U_FORMAT_CHAR) { 25746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Current char is a combining one. Not a boundary. 25756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 25766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(c); 25786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Back up until we come to a non-combining char, determine whether 25816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // that char is a word char. 25826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool prevCIsWord = FALSE; 25836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 25846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pos <= fLookStart) { 25856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 25866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 prevChar; 25886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_PREV(inputBuf, fLookStart, pos, prevChar); 25896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!(u_hasBinaryProperty(prevChar, UCHAR_GRAPHEME_EXTEND) 25906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org || u_charType(prevChar) == U_FORMAT_CHAR)) { 25916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevCIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(prevChar); 25926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 25936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isBoundary = cIsWord ^ prevCIsWord; 25966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return isBoundary; 25976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 25986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 26006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// isUWordBoundary 26026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Test for a word boundary using RBBI word break. 26046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// parameters: pos - the current position in the input buffer 26066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 26086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::isUWordBoundary(int64_t pos) { 26096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool returnVal = FALSE; 26106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if UCONFIG_NO_BREAK_ITERATION==0 26116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If we haven't yet created a break iterator for this matcher, do it now. 26136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fWordBreakItr == NULL) { 26146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fWordBreakItr = 26156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), fDeferredStatus); 26166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(fDeferredStatus)) { 26176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 26186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fWordBreakItr->setText(fInputText, fDeferredStatus); 26206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pos >= fLookLimit) { 26236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 26246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org returnVal = TRUE; // With Unicode word rules, only positions within the interior of "real" 26256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // words are not boundaries. All non-word chars stand by themselves, 26266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // with word boundaries on both sides. 26276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 26286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!UTEXT_USES_U16(fInputText)) { 26296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // !!!: Would like a better way to do this! 26306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 26316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pos = utext_extract(fInputText, 0, pos, NULL, 0, &status); 26326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org returnVal = fWordBreakItr->isBoundary((int32_t)pos); 26346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 26366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return returnVal; 26376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 26386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 26406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// IncrementTime This function is called once each TIMER_INITIAL_VALUE state 26426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// saves. Increment the "time" counter, and call the 26436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// user callback function if there is one installed. 26446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// If the match operation needs to be aborted, either for a time-out 26466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// or because the user callback asked for it, just set an error status. 26476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// The engine will pick that up and stop in its outer loop. 26486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 26506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::IncrementTime(UErrorCode &status) { 26516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fTickCounter = TIMER_INITIAL_VALUE; 26526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fTime++; 26536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fCallbackFn != NULL) { 26546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((*fCallbackFn)(fCallbackContext, fTime) == FALSE) { 26556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_REGEX_STOPPED_BY_CALLER; 26566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 26576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fTimeLimit > 0 && fTime >= fTimeLimit) { 26606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_REGEX_TIME_OUT; 26616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 26636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 26656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// ReportFindProgress This function is called once for each advance in the target 26676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// string from the find() function, and calls the user progress callback 26686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// function if there is one installed. 26696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// NOTE: 26716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// If the match operation needs to be aborted because the user 26736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// callback asked for it, just set an error status. 26746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// The engine will pick that up and stop in its outer loop. 26756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 26776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool RegexMatcher::ReportFindProgress(int64_t matchIndex, UErrorCode &status) { 26786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fFindProgressCallbackFn != NULL) { 26796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((*fFindProgressCallbackFn)(fFindProgressCallbackContext, matchIndex) == FALSE) { 26806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ZERO_ERROR /*U_REGEX_STOPPED_BY_CALLER*/; 26816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 26826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 26856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 26866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 26886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// StateSave 26906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Make a new stack frame, initialized as a copy of the current stack frame. 26916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Set the pattern index in the original stack frame from the operand value 26926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// in the opcode. Execution of the engine continues with the state in 26936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// the newly created stack frame 26946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Note that reserveBlock() may grow the stack, resulting in the 26966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// whole thing being relocated in memory. 26976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 26986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Parameters: 26996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// fp The top frame pointer when called. At return, a new 27006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// fame will be present 27016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// savePatIdx An index into the compiled pattern. Goes into the original 27026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// (not new) frame. If execution ever back-tracks out of the 27036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// new frame, this will be where we continue from in the pattern. 27046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Return 27056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// The new frame pointer. 27066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 27076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 27086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orginline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status) { 27096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // push storage for a new frame. 27106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *newFP = fStack->reserveBlock(fFrameSize, status); 27116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (newFP == NULL) { 27126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Failure on attempted stack expansion. 27136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Stack function set some other error code, change it to a more 27146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // specific one for regular expressions. 27156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_REGEX_STACK_OVERFLOW; 27166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We need to return a writable stack frame, so just return the 27176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // previous frame. The match operation will stop quickly 27186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // because of the error status, after which the frame will never 27196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // be looked at again. 27206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fp; 27216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)(newFP - fFrameSize); // in case of realloc of stack. 27236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // New stack frame = copy of old top frame. 27256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *source = (int64_t *)fp; 27266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *dest = newFP; 27276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 27286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *dest++ = *source++; 27296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (source == newFP) { 27306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 27316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fTickCounter--; 27356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fTickCounter <= 0) { 27366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org IncrementTime(status); // Re-initializes fTickCounter 27376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = savePatIdx; 27396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (REStackFrame *)newFP; 27406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 27416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 27446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 27456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// MatchAt This is the actual matching engine. 27466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 27476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// startIdx: begin matching a this index. 27486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// toEnd: if true, match must extend to end of the input region 27496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 27506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 27516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { 27526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool isMatch = FALSE; // True if the we have a match. 27536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t backSearchIndex = U_INT64_MAX; // used after greedy single-character matches for searching backwards 27556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t op; // Operation from the compiled pattern, split into 27576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t opType; // the opcode 27586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t opValue; // and the operand value. 27596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org #ifdef REGEX_RUN_DEBUG 27616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fTraceDebug) 27626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 27636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("MatchAt(startIdx=%ld)\n", startIdx); 27646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("Original Pattern: "); 27656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = utext_next32From(fPattern->fPattern, 0); 27666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (c != U_SENTINEL) { 27676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c<32 || c>256) { 27686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = '.'; 27696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_DUMP_DEBUG_PRINTF(("%c", c)); 27716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fPattern->fPattern); 27736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("\n"); 27756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("Input String: "); 27766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = utext_next32From(fInputText, 0); 27776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (c != U_SENTINEL) { 27786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c<32 || c>256) { 27796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = '.'; 27806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("%c", c); 27826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fInputText); 27846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("\n"); 27866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("\n"); 27876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org #endif 27896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 27916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 27926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Cache frequently referenced items from the compiled pattern 27956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 27966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *pat = fPattern->fCompiledPat->getBuffer(); 27976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *litText = fPattern->fLiteralText.getBuffer(); 27996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UVector *sets = fPattern->fSets; 28006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFrameSize = fPattern->fFrameSize; 28026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REStackFrame *fp = resetStack(); 28036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = 0; 28056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = startIdx; 28066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Zero out the pattern's static data 28086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 28096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i = 0; i<fPattern->fDataSize; i++) { 28106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[i] = 0; 28116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 28146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Main loop for interpreting the compiled pattern. 28156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // One iteration of the loop per pattern operation performed. 28166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 28176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 28186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 28196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (_heapchk() != _HEAPOK) { 28206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr, "Heap Trouble\n"); 28216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 28236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org op = (int32_t)pat[fp->fPatIdx]; 28256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org opType = URX_TYPE(op); 28266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org opValue = URX_VAL(op); 28276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org #ifdef REGEX_RUN_DEBUG 28286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fTraceDebug) { 28296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 28306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("inputIdx=%ld inputChar=%x sp=%3ld activeLimit=%ld ", fp->fInputIdx, 28316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit); 28326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPattern->dumpOp(fp->fPatIdx); 28336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org #endif 28356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx++; 28366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (opType) { 28386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_NOP: 28416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 28426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKTRACK: 28456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Force a backtrack. In some circumstances, the pattern compiler 28466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // will notice that the pattern can't possibly match anything, and will 28476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // emit one of these at that point. 28486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 28496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 28506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_ONECHAR: 28536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx < fActiveLimit) { 28546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 28556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_NEXT32(fInputText); 28566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == opValue) { 28576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 28586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 28596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 28616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 28626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 28646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 28656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_STRING: 28686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 28696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test input against a literal string. 28706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Strings require two slots in the compiled pattern, one for the 28716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // offset to the string text, and one for the length. 28726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stringStartIdx = opValue; 28746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org op = (int32_t)pat[fp->fPatIdx]; // Fetch the second operand 28756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx++; 28766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org opType = URX_TYPE(op); 28776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stringLen = URX_VAL(op); 28786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opType == URX_STRING_LEN); 28796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(stringLen >= 2); 28806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *patternString = litText+stringStartIdx; 28826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t patternStringIndex = 0; 28836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 28846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 inputChar; 28856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 patternChar; 28866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = TRUE; 28876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (patternStringIndex < stringLen) { 28886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_GETNATIVEINDEX(fInputText) >= fActiveLimit) { 28896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 28906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 28916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 28926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inputChar = UTEXT_NEXT32(fInputText); 28946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(patternString, patternStringIndex, stringLen, patternChar); 28956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (patternChar != inputChar) { 28966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 28976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 28986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (success) { 29026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 29036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 29046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 29056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_STATE_SAVE: 29116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, opValue, status); 29126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_END: 29166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The match loop will exit via this path on a successful match, 29176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // when we reach the end of the pattern. 29186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (toEnd && fp->fInputIdx != fActiveLimit) { 29196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The pattern matched, but not to the end of input. Try some more. 29206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 29216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isMatch = TRUE; 29246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto breakFromLoop; 29256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Start and End Capture stack frame variables are laid out out like this: 29276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // fp->fExtra[opValue] - The start of a completed capture group 29286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // opValue+1 - The end of a completed capture group 29296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // opValue+2 - the start of a capture group whose end 29306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // has not yet been reached (and might not ever be). 29316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_START_CAPTURE: 29326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue >= 0 && opValue < fFrameSize-3); 29336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue+2] = fp->fInputIdx; 29346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_END_CAPTURE: 29386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue >= 0 && opValue < fFrameSize-3); 29396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fp->fExtra[opValue+2] >= 0); // Start pos for this group must be set. 29406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue] = fp->fExtra[opValue+2]; // Tentative start becomes real. 29416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue+1] = fp->fInputIdx; // End position 29426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fp->fExtra[opValue] <= fp->fExtra[opValue+1]); 29436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_DOLLAR: // $, test for End of line 29476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // or for position before new line at end of input 29486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 29496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fAnchorLimit) { 29506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We really are at the end of input. Success. 29516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 29526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; 29536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 29576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If we are positioned just before a new-line that is located at the 29596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // end of input, succeed. 29606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_NEXT32(fInputText); 29616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) { 29626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((c>=0x0a && c<=0x0d) || c==0x85 || c==0x2028 || c==0x2029) { 29636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If not in the middle of a CR/LF sequence 29646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && ((void)UTEXT_PREVIOUS32(fInputText), UTEXT_PREVIOUS32(fInputText))==0x0d)) { 29656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // At new-line at end of input. Success 29666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 29676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; 29686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 29736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 nextC = UTEXT_NEXT32(fInputText); 29746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x0d && nextC == 0x0a && UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) { 29756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 29766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; 29776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; // At CR/LF at end of input. Success 29786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 29826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_DOLLAR_D: // $, test for End of Line, in UNIX_LINES mode. 29876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fAnchorLimit) { 29886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Off the end of input. Success. 29896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 29906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; 29916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 29936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 29946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_NEXT32(fInputText); 29956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Either at the last character of input, or off the end. 29966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x0a && UTEXT_GETNATIVEINDEX(fInputText) == fAnchorLimit) { 29976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 29986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; 29996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 30006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Not at end of input. Back-track out. 30046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 30056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 30066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_DOLLAR_M: // $, test for End of line in multi-line mode 30096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 30106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fAnchorLimit) { 30116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We really are at the end of input. Success. 30126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 30136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; 30146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 30156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If we are positioned just before a new-line, succeed. 30176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // It makes no difference where the new-line is within the input. 30186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 30196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_CURRENT32(fInputText); 30206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((c>=0x0a && c<=0x0d) || c==0x85 ||c==0x2028 || c==0x2029) { 30216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // At a line end, except for the odd chance of being in the middle of a CR/LF sequence 30226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // In multi-line mode, hitting a new-line just before the end of input does not 30236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // set the hitEnd or requireEnd flags 30246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && UTEXT_PREVIOUS32(fInputText)==0x0d)) { 30256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 30266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // not at a new line. Fail. 30296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 30306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 30326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_DOLLAR_MD: // $, test for End of line in multi-line and UNIX_LINES mode 30356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 30366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fAnchorLimit) { 30376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We really are at the end of input. Success. 30386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 30396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; // Java set requireEnd in this case, even though 30406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; // adding a new-line would not lose the match. 30416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If we are not positioned just before a new-line, the test fails; backtrack out. 30436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // It makes no difference where the new-line is within the input. 30446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 30456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (UTEXT_CURRENT32(fInputText) != 0x0a) { 30466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 30476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 30506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_CARET: // ^, test for start of line 30536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx != fAnchorStart) { 30546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 30556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 30576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_CARET_M: // ^, test for start of line in mulit-line mode 30606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 30616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx == fAnchorStart) { 30626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We are at the start input. Success. 30636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 30646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Check whether character just before the current pos is a new-line 30666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // unless we are at the end of input 30676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 30686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_PREVIOUS32(fInputText); 30696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((fp->fInputIdx < fAnchorLimit) && 30706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) { 30716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // It's a new-line. ^ is true. Success. 30726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TODO: what should be done with positions between a CR and LF? 30736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 30746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Not at the start of a line. Fail. 30766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 30776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 30796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_CARET_M_UNIX: // ^, test for start of line in mulit-line + Unix-line mode 30826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 30836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fp->fInputIdx >= fAnchorStart); 30846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx <= fAnchorStart) { 30856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We are at the start input. Success. 30866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 30876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Check whether character just before the current pos is a new-line 30896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fp->fInputIdx <= fAnchorLimit); 30906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 30916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_PREVIOUS32(fInputText); 30926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c != 0x0a) { 30936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Not at the start of a line. Back-track out. 30946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 30956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 30986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKSLASH_B: // Test for word boundaries 31006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 31016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = isWordBoundary(fp->fInputIdx); 31026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success ^= (UBool)(opValue != 0); // flip sense for \B 31036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!success) { 31046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 31056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 31086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKSLASH_BU: // Test for word boundaries, Unicode-style 31116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 31126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = isUWordBoundary(fp->fInputIdx); 31136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success ^= (UBool)(opValue != 0); // flip sense for \B 31146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!success) { 31156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 31166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 31196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKSLASH_D: // Test for decimal digit 31226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 31236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 31246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 31256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 31266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 31276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 31306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_NEXT32(fInputText); 31326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t ctype = u_charType(c); // TODO: make a unicode set for this. Will be faster. 31336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = (ctype == U_DECIMAL_DIGIT_NUMBER); 31346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success ^= (UBool)(opValue != 0); // flip sense for \D 31356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (success) { 31366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 31376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 31386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 31396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 31426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKSLASH_G: // Test for position at end of previous match 31456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->fInputIdx==fActiveStart))) { 31466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 31476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 31496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKSLASH_X: 31526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Match a Grapheme, as defined by Unicode TR 29. 31536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Differs slightly from Perl, which consumes combining marks independently 31546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // of context. 31556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 31566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fail if at end of input 31586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 31596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 31606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 31616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 31626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 31656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Examine (and consume) the current char. 31676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Dispatch into a little state machine, based on the char. 31686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 31696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fInputText); 31706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 31716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet **sets = fPattern->fStaticSets; 31726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_NORMAL]->contains(c)) goto GC_Extend; 31736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_CONTROL]->contains(c)) goto GC_Control; 31746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_L]->contains(c)) goto GC_L; 31756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_LV]->contains(c)) goto GC_V; 31766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_LVT]->contains(c)) goto GC_T; 31776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_V]->contains(c)) goto GC_V; 31786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_T]->contains(c)) goto GC_T; 31796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto GC_Extend; 31806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_L: 31846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 31856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fInputText); 31866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 31876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_L]->contains(c)) goto GC_L; 31886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_LV]->contains(c)) goto GC_V; 31896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_LVT]->contains(c)) goto GC_T; 31906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_V]->contains(c)) goto GC_V; 31916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (void)UTEXT_PREVIOUS32(fInputText); 31926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 31936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto GC_Extend; 31946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_V: 31966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 31976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fInputText); 31986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 31996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_V]->contains(c)) goto GC_V; 32006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_T]->contains(c)) goto GC_T; 32016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (void)UTEXT_PREVIOUS32(fInputText); 32026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 32036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto GC_Extend; 32046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_T: 32066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 32076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fInputText); 32086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 32096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_T]->contains(c)) goto GC_T; 32106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (void)UTEXT_PREVIOUS32(fInputText); 32116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 32126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto GC_Extend; 32136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_Extend: 32156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Combining characters are consumed here 32166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 32176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 32186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 32196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_CURRENT32(fInputText); 32216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_EXTEND]->contains(c) == FALSE) { 32226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 32236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (void)UTEXT_NEXT32(fInputText); 32256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 32266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto GC_Done; 32286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_Control: 32306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Most control chars stand alone (don't combine with combining chars), 32316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // except for that CR/LF sequence is a single grapheme cluster. 32326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x0d && fp->fInputIdx < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) { 32336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fInputText); 32346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 32356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_Done: 32386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 32396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 32406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 32426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKSLASH_Z: // Test for end of Input 32486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx < fAnchorLimit) { 32496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 32506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 32516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 32526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; 32536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 32556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_STATIC_SETREF: 32596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 32606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test input character against one of the predefined sets 32616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (Word Characters, for example) 32626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The high bit of the op value is a flag for the match polarity. 32636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 0: success if input char is in set. 32646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 1: success if input char is not in set. 32656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 32666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 32676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 32686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 32696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = ((opValue & URX_NEG_SET) == URX_NEG_SET); 32726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org opValue &= ~URX_NEG_SET; 32736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue > 0 && opValue < URX_LAST_SET); 32746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 32766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_NEXT32(fInputText); 32776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c < 256) { 32786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 32796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s8->contains(c)) { 32806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = !success; 32816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 32836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeSet *s = fPattern->fStaticSets[opValue]; 32846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s->contains(c)) { 32856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = !success; 32866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (success) { 32896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 32906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 32916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the character wasn't in the set. 32926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 32936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 32966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_STAT_SETREF_N: 32996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 33006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test input character for NOT being a member of one of 33016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the predefined sets (Word Characters, for example) 33026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 33036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 33046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 33056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue > 0 && opValue < URX_LAST_SET); 33096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 33116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_NEXT32(fInputText); 33136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c < 256) { 33146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 33156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s8->contains(c) == FALSE) { 33166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 33176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 33206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeSet *s = fPattern->fStaticSets[opValue]; 33216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s->contains(c) == FALSE) { 33226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 33236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the character wasn't in the set. 33276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 33286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_SETREF: 33336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 33346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 33356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 33366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 33386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 33396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // There is input left. Pick up one char and test it for set membership. 33416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_NEXT32(fInputText); 33426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue > 0 && opValue < sets->size()); 33436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c<256) { 33446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Regex8BitSet *s8 = &fPattern->fSets8[opValue]; 33456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s8->contains(c)) { 33466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 33476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 33506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); 33516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s->contains(c)) { 33526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The character is in the set. A Match. 33536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 33546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the character wasn't in the set. 33596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 33606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_DOTANY: 33656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 33666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // . matches anything, but stops at end-of-line. 33676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 33686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // At end of input. Match failed. Backtrack out. 33696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 33706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 33716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 33756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // There is input left. Advance over one char, unless we've hit end-of-line 33776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_NEXT32(fInputText); 33786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (((c & 0x7f) <= 0x29) && // First quickly bypass as many chars as possible 33796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) { 33806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // End of line in normal mode. . does not match. 33816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 33826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 33856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_DOTANY_ALL: 33906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 33916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // ., in dot-matches-all (including new lines) mode 33926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 33936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // At end of input. Match failed. Backtrack out. 33946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 33956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 33966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 34006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // There is input left. Advance over one char, except if we are 34026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // at a cr/lf, advance over both of them. 34036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 34046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fInputText); 34056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 34066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c==0x0d && fp->fInputIdx < fActiveLimit) { 34076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // In the case of a CR/LF, we need to advance over both. 34086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 nextc = UTEXT_CURRENT32(fInputText); 34096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (nextc == 0x0a) { 34106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (void)UTEXT_NEXT32(fInputText); 34116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 34126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 34166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_DOTANY_UNIX: 34196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 34206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // '.' operator, matches all, but stops at end-of-line. 34216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // UNIX_LINES mode, so 0x0a is the only recognized line ending. 34226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 34236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // At end of input. Match failed. Backtrack out. 34246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 34256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 34266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 34276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 34306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // There is input left. Advance over one char, unless we've hit end-of-line 34326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_NEXT32(fInputText); 34336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x0a) { 34346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // End of line in normal mode. '.' does not match the \n 34356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 34366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 34376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 34386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 34416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_JMP: 34446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = opValue; 34456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 34466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_FAIL: 34486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isMatch = FALSE; 34496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto breakFromLoop; 34506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_JMP_SAV: 34526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue < fPattern->fCompiledPat->size()); 34536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx, status); // State save to loc following current 34546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = opValue; // Then JMP. 34556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 34566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_JMP_SAV_X: 34586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This opcode is used with (x)+, when x can match a zero length string. 34596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Same as JMP_SAV, except conditional on the match having made forward progress. 34606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Destination of the JMP must be a URX_STO_INP_LOC, from which we get the 34616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // data address of the input position at the start of the loop. 34626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 34636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue > 0 && opValue < fPattern->fCompiledPat->size()); 34646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stoOp = (int32_t)pat[opValue-1]; 34656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(URX_TYPE(stoOp) == URX_STO_INP_LOC); 34666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t frameLoc = URX_VAL(stoOp); 34676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(frameLoc >= 0 && frameLoc < fFrameSize); 34686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t prevInputIdx = fp->fExtra[frameLoc]; 34696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(prevInputIdx <= fp->fInputIdx); 34706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (prevInputIdx < fp->fInputIdx) { 34716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The match did make progress. Repeat the loop. 34726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx, status); // State save to loc following current 34736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = opValue; 34746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[frameLoc] = fp->fInputIdx; 34756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If the input position did not advance, we do nothing here, 34776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // execution will fall out of the loop. 34786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 34806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_CTR_INIT: 34826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 34836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue >= 0 && opValue < fFrameSize-2); 34846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue] = 0; // Set the loop counter variable to zero 34856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Pick up the three extra operands that CTR_INIT has, and 34876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // skip the pattern location counter past 34886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 34896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx += 3; 34906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t loopLoc = URX_VAL(pat[instrOperandLoc]); 34916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minCount = (int32_t)pat[instrOperandLoc+1]; 34926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxCount = (int32_t)pat[instrOperandLoc+2]; 34936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(minCount>=0); 34946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(maxCount>=minCount || maxCount==-1); 34956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(loopLoc>=fp->fPatIdx); 34966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (minCount == 0) { 34986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, loopLoc+1, status); 34996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (maxCount == -1) { 35016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue+1] = fp->fInputIdx; // For loop breaking. 35026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (maxCount == 0) { 35036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 35046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 35076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_CTR_LOOP: 35096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 35106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2); 35116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t initOp = (int32_t)pat[opValue]; 35126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT); 35136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)]; 35146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minCount = (int32_t)pat[opValue+2]; 35156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxCount = (int32_t)pat[opValue+3]; 35166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (*pCounter)++; 35176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) { 35186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(*pCounter == maxCount); 35196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 35206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*pCounter >= minCount) { 35226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (maxCount == -1) { 35236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop has no hard upper bound. 35246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Check that it is progressing through the input, break if it is not. 35256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *pLastInputIdx = &fp->fExtra[URX_VAL(initOp) + 1]; 35266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx == *pLastInputIdx) { 35276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 35286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 35296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pLastInputIdx = fp->fInputIdx; 35306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx, status); 35336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = opValue + 4; // Loop back. 35356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 35376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_CTR_INIT_NG: 35396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 35406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Initialize a non-greedy loop 35416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue >= 0 && opValue < fFrameSize-2); 35426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue] = 0; // Set the loop counter variable to zero 35436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Pick up the three extra operands that CTR_INIT_NG has, and 35456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // skip the pattern location counter past 35466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 35476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx += 3; 35486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t loopLoc = URX_VAL(pat[instrOperandLoc]); 35496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minCount = (int32_t)pat[instrOperandLoc+1]; 35506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxCount = (int32_t)pat[instrOperandLoc+2]; 35516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(minCount>=0); 35526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(maxCount>=minCount || maxCount==-1); 35536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(loopLoc>fp->fPatIdx); 35546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (maxCount == -1) { 35556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue+1] = fp->fInputIdx; // Save initial input index for loop breaking. 35566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (minCount == 0) { 35596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (maxCount != 0) { 35606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx, status); 35616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = loopLoc+1; // Continue with stuff after repeated block 35636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 35666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_CTR_LOOP_NG: 35686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 35696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Non-greedy {min, max} loops 35706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2); 35716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t initOp = (int32_t)pat[opValue]; 35726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT_NG); 35736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)]; 35746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minCount = (int32_t)pat[opValue+2]; 35756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxCount = (int32_t)pat[opValue+3]; 35766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (*pCounter)++; 35786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) { 35796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The loop has matched the maximum permitted number of times. 35806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Break out of here with no action. Matching will 35816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // continue with the following pattern. 35826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(*pCounter == maxCount); 35836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 35846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*pCounter < minCount) { 35876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We haven't met the minimum number of matches yet. 35886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop back for another one. 35896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = opValue + 4; // Loop back. 35906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 35916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We do have the minimum number of matches. 35926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If there is no upper bound on the loop iterations, check that the input index 35946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // is progressing, and stop the loop if it is not. 35956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (maxCount == -1) { 35966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *pLastInputIdx = &fp->fExtra[URX_VAL(initOp) + 1]; 35976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx == *pLastInputIdx) { 35986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 35996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pLastInputIdx = fp->fInputIdx; 36016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop Continuation: we will fall into the pattern following the loop 36046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (non-greedy, don't execute loop body first), but first do 36056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // a state save to the top of the loop, so that a match failure 36066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // in the following pattern will try another iteration of the loop. 36076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, opValue + 4, status); 36086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 36116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_STO_SP: 36136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize); 36146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[opValue] = fStack->size(); 36156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 36166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LD_SP: 36186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 36196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize); 36206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t newStackSize = (int32_t)fData[opValue]; 36216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(newStackSize <= fStack->size()); 36226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; 36236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (newFP == (int64_t *)fp) { 36246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 36256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 36276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<fFrameSize; i++) { 36286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newFP[i] = ((int64_t *)fp)[i]; 36296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)newFP; 36316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fStack->setSize(newStackSize); 36326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 36346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKREF: 36366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 36376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue < fFrameSize); 36386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t groupStartIdx = fp->fExtra[opValue]; 36396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t groupEndIdx = fp->fExtra[opValue+1]; 36406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(groupStartIdx <= groupEndIdx); 36416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (groupStartIdx < 0) { 36426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This capture group has not participated in the match thus far, 36436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match. 36446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 36456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fAltInputText, groupStartIdx); 36476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 36486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note: if the capture group match was of an empty string the backref 36506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match succeeds. Verified by testing: Perl matches succeed 36516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // in this case, so we do too. 36526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = TRUE; 36546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 36556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (utext_getNativeIndex(fAltInputText) >= groupEndIdx) { 36566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = TRUE; 36576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 36586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (utext_getNativeIndex(fInputText) >= fActiveLimit) { 36606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 36616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 36626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 36636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 captureGroupChar = utext_next32(fAltInputText); 36656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 inputChar = utext_next32(fInputText); 36666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (inputChar != captureGroupChar) { 36676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 36686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 36696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (success) { 36736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 36746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 36756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 36766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 36796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKREF_I: 36836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 36846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue < fFrameSize); 36856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t groupStartIdx = fp->fExtra[opValue]; 36866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t groupEndIdx = fp->fExtra[opValue+1]; 36876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(groupStartIdx <= groupEndIdx); 36886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (groupStartIdx < 0) { 36896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This capture group has not participated in the match thus far, 36906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match. 36916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 36926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(fAltInputText, groupStartIdx); 36946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utext_setNativeIndex(fInputText, fp->fInputIdx); 36956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CaseFoldingUTextIterator captureGroupItr(*fAltInputText); 36966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CaseFoldingUTextIterator inputItr(*fInputText); 36976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note: if the capture group match was of an empty string the backref 36996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match succeeds. Verified by testing: Perl matches succeed 37006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // in this case, so we do too. 37016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = TRUE; 37036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 37046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!captureGroupItr.inExpansion() && utext_getNativeIndex(fAltInputText) >= groupEndIdx) { 37056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = TRUE; 37066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 37076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!inputItr.inExpansion() && utext_getNativeIndex(fInputText) >= fActiveLimit) { 37096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 37106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 37116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 37126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 captureGroupChar = captureGroupItr.next(); 37146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 inputChar = inputItr.next(); 37156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (inputChar != captureGroupChar) { 37166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 37176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 37186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (success && inputItr.inExpansion()) { 37226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We otained a match by consuming part of a string obtained from 37236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // case-folding a single code point of the input text. 37246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This does not count as an overall match. 37256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 37266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (success) { 37296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 37306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 37316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 37326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 37366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_STO_INP_LOC: 37386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 37396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue >= 0 && opValue < fFrameSize); 37406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue] = fp->fInputIdx; 37416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 37436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_JMPX: 37456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 37466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 37476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx += 1; 37486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t dataLoc = URX_VAL(pat[instrOperandLoc]); 37496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(dataLoc >= 0 && dataLoc < fFrameSize); 37506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t savedInputIdx = fp->fExtra[dataLoc]; 37516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(savedInputIdx <= fp->fInputIdx); 37526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (savedInputIdx < fp->fInputIdx) { 37536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = opValue; // JMP 37546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 37556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no progress in loop. 37566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 37596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LA_START: 37616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 37626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Entering a lookahead block. 37636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save Stack Ptr, Input Pos. 37646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 37656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[opValue] = fStack->size(); 37666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[opValue+1] = fp->fInputIdx; 37676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveStart = fLookStart; // Set the match region change for 37686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = fLookLimit; // transparent bounds. 37696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 37716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LA_END: 37736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 37746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Leaving a look-ahead block. 37756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // restore Stack Ptr, Input Pos to positions they had on entry to block. 37766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 37776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stackSize = fStack->size(); 37786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t newStackSize =(int32_t)fData[opValue]; 37796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(stackSize >= newStackSize); 37806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (stackSize > newStackSize) { 37816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Copy the current top frame back to the new (cut back) top frame. 37826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This makes the capture groups from within the look-ahead 37836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // expression available. 37846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; 37856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 37866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<fFrameSize; i++) { 37876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newFP[i] = ((int64_t *)fp)[i]; 37886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)newFP; 37906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fStack->setSize(newStackSize); 37916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = fData[opValue+1]; 37936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Restore the active region bounds in the input string; they may have 37956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // been changed because of transparent bounds on a Region. 37966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveStart = fRegionStart; 37976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = fRegionLimit; 37986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 38006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_ONECHAR_I: 38026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Case insensitive one char. The char from the pattern is already case folded. 38036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Input text is not, but case folding the input can not reduce two or more code 38046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // points to one. 38056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx < fActiveLimit) { 38066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 38076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_NEXT32(fInputText); 38096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) { 38106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 38116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 38126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 38146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 38156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 38186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 38196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_STRING_I: 38216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 38226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Case-insensitive test input against a literal string. 38236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Strings require two slots in the compiled pattern, one for the 38246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // offset to the string text, and one for the length. 38256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The compiled string has already been case folded. 38266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 38276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *patternString = litText + opValue; 38286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t patternStringIdx = 0; 38296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org op = (int32_t)pat[fp->fPatIdx]; 38316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx++; 38326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org opType = URX_TYPE(op); 38336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org opValue = URX_VAL(op); 38346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opType == URX_STRING_LEN); 38356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t patternStringLen = opValue; // Length of the string from the pattern. 38366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 cPattern; 38396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 cText; 38406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = TRUE; 38416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 38436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CaseFoldingUTextIterator inputIterator(*fInputText); 38446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (patternStringIdx < patternStringLen) { 38456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!inputIterator.inExpansion() && UTEXT_GETNATIVEINDEX(fInputText) >= fActiveLimit) { 38466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 38476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 38486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 38496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(patternString, patternStringIdx, patternStringLen, cPattern); 38516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cText = inputIterator.next(); 38526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (cText != cPattern) { 38536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 38546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 38556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (inputIterator.inExpansion()) { 38586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 38596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (success) { 38626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 38636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 38646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 38656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 38696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LB_START: 38716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 38726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Entering a look-behind block. 38736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save Stack Ptr, Input Pos. 38746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TODO: implement transparent bounds. Ticket #6067 38756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 38766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[opValue] = fStack->size(); 38776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[opValue+1] = fp->fInputIdx; 38786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Init the variable containing the start index for attempted matches. 38796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[opValue+2] = -1; 38806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save input string length, then reset to pin any matches to end at 38816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the current position. 38826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[opValue+3] = fActiveLimit; 38836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = fp->fInputIdx; 38846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 38866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LB_CONT: 38896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 38906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Positive Look-Behind, at top of loop checking for matches of LB expression 38916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // at all possible input starting positions. 38926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fetch the min and max possible match lengths. They are the operands 38946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // of this op in the pattern. 38956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minML = (int32_t)pat[fp->fPatIdx++]; 38966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxML = (int32_t)pat[fp->fPatIdx++]; 38976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(minML <= maxML); 38986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(minML >= 0); 38996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fetch (from data) the last input index where a match was attempted. 39016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 39026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *lbStartIdx = &fData[opValue+2]; 39036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*lbStartIdx < 0) { 39046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // First time through loop. 39056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *lbStartIdx = fp->fInputIdx - minML; 39066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 39076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 2nd through nth time through the loop. 39086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Back up start position for match by one. 39096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*lbStartIdx == 0) { 39106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (*lbStartIdx)--; 39116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 39126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, *lbStartIdx); 39136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (void)UTEXT_PREVIOUS32(fInputText); 39146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText); 39156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) { 39196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We have tried all potential match starting points without 39206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // getting a match. Backtrack out, and out of the 39216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Look Behind altogether. 39226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 39236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t restoreInputLen = fData[opValue+3]; 39246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(restoreInputLen >= fActiveLimit); 39256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(restoreInputLen <= fInputLength); 39266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = restoreInputLen; 39276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 39286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save state to this URX_LB_CONT op, so failure to match will repeat the loop. 39316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (successful match will fall off the end of the loop.) 39326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx-3, status); 39336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = *lbStartIdx; 39346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 39366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LB_END: 39386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // End of a look-behind block, after a successful match. 39396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 39406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 39416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx != fActiveLimit) { 39426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The look-behind expression matched, but the match did not 39436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // extend all the way to the point that we are looking behind from. 39446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // FAIL out of here, which will take us back to the LB_CONT, which 39456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // will retry the match starting at another position or fail 39466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the look-behind altogether, whichever is appropriate. 39476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 39486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 39496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Look-behind match is good. Restore the orignal input string length, 39526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // which had been truncated to pin the end of the lookbehind match to the 39536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // position being looked-behind. 39546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t originalInputLen = fData[opValue+3]; 39556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(originalInputLen >= fActiveLimit); 39566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(originalInputLen <= fInputLength); 39576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = originalInputLen; 39586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 39606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LBN_CONT: 39636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 39646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Negative Look-Behind, at top of loop checking for matches of LB expression 39656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // at all possible input starting positions. 39666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fetch the extra parameters of this op. 39686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minML = (int32_t)pat[fp->fPatIdx++]; 39696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxML = (int32_t)pat[fp->fPatIdx++]; 39706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t continueLoc = (int32_t)pat[fp->fPatIdx++]; 39716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continueLoc = URX_VAL(continueLoc); 39726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(minML <= maxML); 39736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(minML >= 0); 39746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(continueLoc > fp->fPatIdx); 39756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fetch (from data) the last input index where a match was attempted. 39776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 39786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *lbStartIdx = &fData[opValue+2]; 39796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*lbStartIdx < 0) { 39806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // First time through loop. 39816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *lbStartIdx = fp->fInputIdx - minML; 39826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 39836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 2nd through nth time through the loop. 39846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Back up start position for match by one. 39856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*lbStartIdx == 0) { 39866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (*lbStartIdx)--; 39876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 39886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, *lbStartIdx); 39896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (void)UTEXT_PREVIOUS32(fInputText); 39906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText); 39916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) { 39956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We have tried all potential match starting points without 39966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // getting a match, which means that the negative lookbehind as 39976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // a whole has succeeded. Jump forward to the continue location 39986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t restoreInputLen = fData[opValue+3]; 39996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(restoreInputLen >= fActiveLimit); 40006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(restoreInputLen <= fInputLength); 40016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = restoreInputLen; 40026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = continueLoc; 40036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 40046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save state to this URX_LB_CONT op, so failure to match will repeat the loop. 40076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (successful match will cause a FAIL out of the loop altogether.) 40086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx-4, status); 40096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = *lbStartIdx; 40106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 40126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LBN_END: 40146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // End of a negative look-behind block, after a successful match. 40156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 40166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 40176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx != fActiveLimit) { 40186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The look-behind expression matched, but the match did not 40196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // extend all the way to the point that we are looking behind from. 40206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // FAIL out of here, which will take us back to the LB_CONT, which 40216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // will retry the match starting at another position or succeed 40226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the look-behind altogether, whichever is appropriate. 40236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 40246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 40256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Look-behind expression matched, which means look-behind test as 40286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // a whole Fails 40296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Restore the orignal input string length, which had been truncated 40316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // inorder to pin the end of the lookbehind match 40326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // to the position being looked-behind. 40336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t originalInputLen = fData[opValue+3]; 40346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(originalInputLen >= fActiveLimit); 40356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(originalInputLen <= fInputLength); 40366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = originalInputLen; 40376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Restore original stack position, discarding any state saved 40396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // by the successful pattern match. 40406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 40416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t newStackSize = (int32_t)fData[opValue]; 40426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fStack->size() > newStackSize); 40436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fStack->setSize(newStackSize); 40446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // FAIL, which will take control back to someplace 40466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // prior to entering the look-behind test. 40476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 40486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 40506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LOOP_SR_I: 40536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop Initialization for the optimized implementation of 40546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // [some character set]* 40556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This op scans through all matching input. 40566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The following LOOP_C op emulates stack unwinding if the following pattern fails. 40576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 40586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue > 0 && opValue < sets->size()); 40596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Regex8BitSet *s8 = &fPattern->fSets8[opValue]; 40606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); 40616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop through input, until either the input is exhausted or 40636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // we reach a character that is not a member of the set. 40646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t ix = fp->fInputIdx; 40656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, ix); 40666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 40676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ix >= fActiveLimit) { 40686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 40696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 40706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_NEXT32(fInputText); 40726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c<256) { 40736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s8->contains(c) == FALSE) { 40746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 40756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 40776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s->contains(c) == FALSE) { 40786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 40796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ix = UTEXT_GETNATIVEINDEX(fInputText); 40826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If there were no matching characters, skip over the loop altogether. 40856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The loop doesn't run at all, a * op always succeeds. 40866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ix == fp->fInputIdx) { 40876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx++; // skip the URX_LOOP_C op. 40886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 40896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Peek ahead in the compiled pattern, to the URX_LOOP_C that 40926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // must follow. It's operand is the stack location 40936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // that holds the starting input index for the match of this [set]* 40946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t loopcOp = (int32_t)pat[fp->fPatIdx]; 40956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C); 40966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stackLoc = URX_VAL(loopcOp); 40976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize); 40986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[stackLoc] = fp->fInputIdx; 40996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = ix; 41006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save State to the URX_LOOP_C op that follows this one, 41026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // so that match failures in the following code will return to there. 41036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Then bump the pattern idx so the LOOP_C is skipped on the way out of here. 41046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx, status); 41056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx++; 41066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 41086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LOOP_DOT_I: 41116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop Initialization for the optimized implementation of .* 41126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This op scans through all remaining input. 41136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The following LOOP_C op emulates stack unwinding if the following pattern fails. 41146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 41156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop through input until the input is exhausted (we reach an end-of-line) 41166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // In DOTALL mode, we can just go straight to the end of the input. 41176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t ix; 41186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((opValue & 1) == 1) { 41196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Dot-matches-All mode. Jump straight to the end of the string. 41206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ix = fActiveLimit; 41216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 41226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 41236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // NOT DOT ALL mode. Line endings do not match '.' 41246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Scan forward until a line ending or end of input. 41256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ix = fp->fInputIdx; 41266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, ix); 41276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 41286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ix >= fActiveLimit) { 41296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 41306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 41316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = UTEXT_NEXT32(fInputText); 41336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((c & 0x7f) <= 0x29) { // Fast filter of non-new-line-s 41346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((c == 0x0a) || // 0x0a is newline in both modes. 41356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (((opValue & 2) == 0) && // IF not UNIX_LINES mode 41366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (c<=0x0d && c>=0x0a)) || c==0x85 ||c==0x2028 || c==0x2029) { 41376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // char is a line ending. Exit the scanning loop. 41386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 41396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ix = UTEXT_GETNATIVEINDEX(fInputText); 41426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If there were no matching characters, skip over the loop altogether. 41466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The loop doesn't run at all, a * op always succeeds. 41476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ix == fp->fInputIdx) { 41486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx++; // skip the URX_LOOP_C op. 41496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 41506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Peek ahead in the compiled pattern, to the URX_LOOP_C that 41536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // must follow. It's operand is the stack location 41546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // that holds the starting input index for the match of this .* 41556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t loopcOp = (int32_t)pat[fp->fPatIdx]; 41566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C); 41576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stackLoc = URX_VAL(loopcOp); 41586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize); 41596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[stackLoc] = fp->fInputIdx; 41606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = ix; 41616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save State to the URX_LOOP_C op that follows this one, 41636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // so that match failures in the following code will return to there. 41646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Then bump the pattern idx so the LOOP_C is skipped on the way out of here. 41656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx, status); 41666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx++; 41676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 41696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LOOP_C: 41726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 41736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue<fFrameSize); 41746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org backSearchIndex = fp->fExtra[opValue]; 41756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(backSearchIndex <= fp->fInputIdx); 41766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (backSearchIndex == fp->fInputIdx) { 41776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We've backed up the input idx to the point that the loop started. 41786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The loop is done. Leave here without saving state. 41796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Subsequent failures won't come back here. 41806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 41816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Set up for the next iteration of the loop, with input index 41836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // backed up by one from the last time through, 41846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // and a state save to this instruction in case the following code fails again. 41856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (We're going backwards because this loop emulates stack unwinding, not 41866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the initial scan forward.) 41876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fp->fInputIdx > 0); 41886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 41896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 prevC = UTEXT_PREVIOUS32(fInputText); 41906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 41916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 41926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 twoPrevC = UTEXT_PREVIOUS32(fInputText); 41936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (prevC == 0x0a && 41946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx > backSearchIndex && 41956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org twoPrevC == 0x0d) { 41966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t prevOp = (int32_t)pat[fp->fPatIdx-2]; 41976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (URX_TYPE(prevOp) == URX_LOOP_DOT_I) { 41986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // .*, stepping back over CRLF pair. 41996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 42006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx-1, status); 42056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 42076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 42116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Trouble. The compiled pattern contains an entry with an 42126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // unrecognized type tag. 42136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(FALSE); 42146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 42176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isMatch = FALSE; 42186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 42196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgbreakFromLoop: 42236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = isMatch; 42246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isMatch) { 42256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fLastMatchEnd = fMatchEnd; 42266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatchStart = startIdx; 42276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatchEnd = fp->fInputIdx; 42286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fTraceDebug) { 42296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_RUN_DEBUG_PRINTF(("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd)); 42306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 42336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 42346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fTraceDebug) { 42356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_RUN_DEBUG_PRINTF(("No match\n\n")); 42366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFrame = fp; // The active stack frame when the engine stopped. 42406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Contains the capture group results that we need to 42416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // access later. 42426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 42436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 42446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 42476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 42486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// MatchChunkAt This is the actual matching engine. Like MatchAt, but with the 42496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// assumption that the entire string is available in the UText's 42506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// chunk buffer. For now, that means we can use int32_t indexes, 42516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// except for anything that needs to be saved (like group starts 42526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// and ends). 42536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 42546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// startIdx: begin matching a this index. 42556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// toEnd: if true, match must extend to end of the input region 42566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// 42576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------------- 42586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status) { 42596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool isMatch = FALSE; // True if the we have a match. 42606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t backSearchIndex = INT32_MAX; // used after greedy single-character matches for searching backwards 42626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t op; // Operation from the compiled pattern, split into 42646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t opType; // the opcode 42656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t opValue; // and the operand value. 42666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#ifdef REGEX_RUN_DEBUG 42686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fTraceDebug) 42696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 42706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("MatchAt(startIdx=%d)\n", startIdx); 42716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("Original Pattern: "); 42726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = utext_next32From(fPattern->fPattern, 0); 42736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (c != U_SENTINEL) { 42746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c<32 || c>256) { 42756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = '.'; 42766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_DUMP_DEBUG_PRINTF(("%c", c)); 42786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fPattern->fPattern); 42806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("\n"); 42826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("Input String: "); 42836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = utext_next32From(fInputText, 0); 42846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (c != U_SENTINEL) { 42856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c<32 || c>256) { 42866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = '.'; 42876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("%c", c); 42896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = UTEXT_NEXT32(fInputText); 42916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("\n"); 42936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("\n"); 42946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 42966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 42986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 42996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Cache frequently referenced items from the compiled pattern 43026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *pat = fPattern->fCompiledPat->getBuffer(); 43046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *litText = fPattern->fLiteralText.getBuffer(); 43066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UVector *sets = fPattern->fSets; 43076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *inputBuf = fInputText->chunkContents; 43096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFrameSize = fPattern->fFrameSize; 43116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REStackFrame *fp = resetStack(); 43126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = 0; 43146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = startIdx; 43156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Zero out the pattern's static data 43176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 43186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i = 0; i<fPattern->fDataSize; i++) { 43196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[i] = 0; 43206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Main loop for interpreting the compiled pattern. 43246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // One iteration of the loop per pattern operation performed. 43256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 43266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 43276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 43286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (_heapchk() != _HEAPOK) { 43296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr, "Heap Trouble\n"); 43306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 43326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org op = (int32_t)pat[fp->fPatIdx]; 43346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org opType = URX_TYPE(op); 43356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org opValue = URX_VAL(op); 43366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#ifdef REGEX_RUN_DEBUG 43376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fTraceDebug) { 43386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 43396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("inputIdx=%ld inputChar=%x sp=%3ld activeLimit=%ld ", fp->fInputIdx, 43406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit); 43416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fPattern->dumpOp(fp->fPatIdx); 43426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 43446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx++; 43456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (opType) { 43476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_NOP: 43506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 43516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKTRACK: 43546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Force a backtrack. In some circumstances, the pattern compiler 43556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // will notice that the pattern can't possibly match anything, and will 43566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // emit one of these at that point. 43576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 43586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 43596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_ONECHAR: 43626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx < fActiveLimit) { 43636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 43646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 43656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == opValue) { 43666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 43676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 43696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 43706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 43726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 43736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_STRING: 43766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 43776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test input against a literal string. 43786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Strings require two slots in the compiled pattern, one for the 43796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // offset to the string text, and one for the length. 43806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stringStartIdx = opValue; 43816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stringLen; 43826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org op = (int32_t)pat[fp->fPatIdx]; // Fetch the second operand 43846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx++; 43856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org opType = URX_TYPE(op); 43866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stringLen = URX_VAL(op); 43876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opType == URX_STRING_LEN); 43886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(stringLen >= 2); 43896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar * pInp = inputBuf + fp->fInputIdx; 43916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar * pInpLimit = inputBuf + fActiveLimit; 43926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar * pPat = litText+stringStartIdx; 43936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar * pEnd = pInp + stringLen; 43946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = TRUE; 43956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (pInp < pEnd) { 43966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pInp >= pInpLimit) { 43976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 43986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 43996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*pInp++ != *pPat++) { 44026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 44036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (success) { 44086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx += stringLen; 44096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 44106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 44116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_STATE_SAVE: 44176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, opValue, status); 44186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_END: 44226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The match loop will exit via this path on a successful match, 44236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // when we reach the end of the pattern. 44246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (toEnd && fp->fInputIdx != fActiveLimit) { 44256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The pattern matched, but not to the end of input. Try some more. 44266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 44276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isMatch = TRUE; 44306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto breakFromLoop; 44316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Start and End Capture stack frame variables are laid out out like this: 44336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // fp->fExtra[opValue] - The start of a completed capture group 44346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // opValue+1 - The end of a completed capture group 44356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // opValue+2 - the start of a capture group whose end 44366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // has not yet been reached (and might not ever be). 44376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_START_CAPTURE: 44386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue >= 0 && opValue < fFrameSize-3); 44396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue+2] = fp->fInputIdx; 44406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_END_CAPTURE: 44446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue >= 0 && opValue < fFrameSize-3); 44456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fp->fExtra[opValue+2] >= 0); // Start pos for this group must be set. 44466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue] = fp->fExtra[opValue+2]; // Tentative start becomes real. 44476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue+1] = fp->fInputIdx; // End position 44486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fp->fExtra[opValue] <= fp->fExtra[opValue+1]); 44496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_DOLLAR: // $, test for End of line 44536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // or for position before new line at end of input 44546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx < fAnchorLimit-2) { 44556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We are no where near the end of input. Fail. 44566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This is the common case. Keep it first. 44576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 44586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fAnchorLimit) { 44616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We really are at the end of input. Success. 44626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 44636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; 44646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If we are positioned just before a new-line that is located at the 44686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // end of input, succeed. 44696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx == fAnchorLimit-1) { 44706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 44716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_GET(inputBuf, fAnchorStart, fp->fInputIdx, fAnchorLimit, c); 44726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((c>=0x0a && c<=0x0d) || c==0x85 || c==0x2028 || c==0x2029) { 44746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp->fInputIdx-1]==0x0d)) { 44756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // At new-line at end of input. Success 44766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 44776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; 44786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (fp->fInputIdx == fAnchorLimit-2 && 44826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inputBuf[fp->fInputIdx]==0x0d && inputBuf[fp->fInputIdx+1]==0x0a) { 44836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 44846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; 44856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; // At CR/LF at end of input. Success 44866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 44896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_DOLLAR_D: // $, test for End of Line, in UNIX_LINES mode. 44946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fAnchorLimit-1) { 44956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Either at the last character of input, or off the end. 44966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx == fAnchorLimit-1) { 44976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // At last char of input. Success if it's a new line. 44986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (inputBuf[fp->fInputIdx] == 0x0a) { 44996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 45006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; 45016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 45046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Off the end of input. Success. 45056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 45066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; 45076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Not at end of input. Back-track out. 45126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 45136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_DOLLAR_M: // $, test for End of line in multi-line mode 45176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 45186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fAnchorLimit) { 45196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We really are at the end of input. Success. 45206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 45216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; 45226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If we are positioned just before a new-line, succeed. 45256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // It makes no difference where the new-line is within the input. 45266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c = inputBuf[fp->fInputIdx]; 45276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((c>=0x0a && c<=0x0d) || c==0x85 ||c==0x2028 || c==0x2029) { 45286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // At a line end, except for the odd chance of being in the middle of a CR/LF sequence 45296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // In multi-line mode, hitting a new-line just before the end of input does not 45306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // set the hitEnd or requireEnd flags 45316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp->fInputIdx-1]==0x0d)) { 45326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // not at a new line. Fail. 45366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 45376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_DOLLAR_MD: // $, test for End of line in multi-line and UNIX_LINES mode 45426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 45436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fAnchorLimit) { 45446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We really are at the end of input. Success. 45456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 45466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; // Java set requireEnd in this case, even though 45476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; // adding a new-line would not lose the match. 45486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If we are not positioned just before a new-line, the test fails; backtrack out. 45506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // It makes no difference where the new-line is within the input. 45516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (inputBuf[fp->fInputIdx] != 0x0a) { 45526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 45536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_CARET: // ^, test for start of line 45596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx != fAnchorStart) { 45606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 45616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_CARET_M: // ^, test for start of line in mulit-line mode 45666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 45676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx == fAnchorStart) { 45686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We are at the start input. Success. 45696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Check whether character just before the current pos is a new-line 45726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // unless we are at the end of input 45736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c = inputBuf[fp->fInputIdx - 1]; 45746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((fp->fInputIdx < fAnchorLimit) && 45756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) { 45766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // It's a new-line. ^ is true. Success. 45776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TODO: what should be done with positions between a CR and LF? 45786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Not at the start of a line. Fail. 45816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 45826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_CARET_M_UNIX: // ^, test for start of line in mulit-line + Unix-line mode 45876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 45886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fp->fInputIdx >= fAnchorStart); 45896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx <= fAnchorStart) { 45906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We are at the start input. Success. 45916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Check whether character just before the current pos is a new-line 45946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fp->fInputIdx <= fAnchorLimit); 45956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c = inputBuf[fp->fInputIdx - 1]; 45966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c != 0x0a) { 45976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Not at the start of a line. Back-track out. 45986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 45996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 46026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKSLASH_B: // Test for word boundaries 46046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 46056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = isChunkWordBoundary((int32_t)fp->fInputIdx); 46066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success ^= (UBool)(opValue != 0); // flip sense for \B 46076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!success) { 46086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 46096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 46126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKSLASH_BU: // Test for word boundaries, Unicode-style 46156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 46166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = isUWordBoundary(fp->fInputIdx); 46176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success ^= (UBool)(opValue != 0); // flip sense for \B 46186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!success) { 46196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 46206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 46236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKSLASH_D: // Test for decimal digit 46266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 46276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 46286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 46296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 46306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 46316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 46346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 46356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t ctype = u_charType(c); // TODO: make a unicode set for this. Will be faster. 46366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = (ctype == U_DECIMAL_DIGIT_NUMBER); 46376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success ^= (UBool)(opValue != 0); // flip sense for \D 46386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!success) { 46396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 46406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 46436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKSLASH_G: // Test for position at end of previous match 46466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->fInputIdx==fActiveStart))) { 46476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 46486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 46506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKSLASH_X: 46536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Match a Grapheme, as defined by Unicode TR 29. 46546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Differs slightly from Perl, which consumes combining marks independently 46556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // of context. 46566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 46576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fail if at end of input 46596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 46606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 46616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 46626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 46636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Examine (and consume) the current char. 46666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Dispatch into a little state machine, based on the char. 46676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 46686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 46696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet **sets = fPattern->fStaticSets; 46706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_NORMAL]->contains(c)) goto GC_Extend; 46716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_CONTROL]->contains(c)) goto GC_Control; 46726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_L]->contains(c)) goto GC_L; 46736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_LV]->contains(c)) goto GC_V; 46746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_LVT]->contains(c)) goto GC_T; 46756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_V]->contains(c)) goto GC_V; 46766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_T]->contains(c)) goto GC_T; 46776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto GC_Extend; 46786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_L: 46826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 46836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 46846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_L]->contains(c)) goto GC_L; 46856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_LV]->contains(c)) goto GC_V; 46866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_LVT]->contains(c)) goto GC_T; 46876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_V]->contains(c)) goto GC_V; 46886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_PREV(inputBuf, 0, fp->fInputIdx, c); 46896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto GC_Extend; 46906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_V: 46926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 46936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 46946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_V]->contains(c)) goto GC_V; 46956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_T]->contains(c)) goto GC_T; 46966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_PREV(inputBuf, 0, fp->fInputIdx, c); 46976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto GC_Extend; 46986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_T: 47006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 47016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 47026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_T]->contains(c)) goto GC_T; 47036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_PREV(inputBuf, 0, fp->fInputIdx, c); 47046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto GC_Extend; 47056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_Extend: 47076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Combining characters are consumed here 47086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 47096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 47106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 47116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 47136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (sets[URX_GC_EXTEND]->contains(c) == FALSE) { 47146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_BACK_1(inputBuf, 0, fp->fInputIdx); 47156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 47166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto GC_Done; 47196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_Control: 47216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Most control chars stand alone (don't combine with combining chars), 47226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // except for that CR/LF sequence is a single grapheme cluster. 47236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x0d && fp->fInputIdx < fActiveLimit && inputBuf[fp->fInputIdx] == 0x0a) { 47246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx++; 47256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgGC_Done: 47286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 47296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 47306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 47326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKSLASH_Z: // Test for end of Input 47386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx < fAnchorLimit) { 47396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 47406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 47416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 47426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fRequireEnd = TRUE; 47436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 47456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_STATIC_SETREF: 47496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 47506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test input character against one of the predefined sets 47516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (Word Characters, for example) 47526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The high bit of the op value is a flag for the match polarity. 47536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 0: success if input char is in set. 47546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 1: success if input char is not in set. 47556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 47566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 47576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 47586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 47596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = ((opValue & URX_NEG_SET) == URX_NEG_SET); 47626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org opValue &= ~URX_NEG_SET; 47636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue > 0 && opValue < URX_LAST_SET); 47646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 47666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 47676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c < 256) { 47686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 47696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s8->contains(c)) { 47706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = !success; 47716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 47736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeSet *s = fPattern->fStaticSets[opValue]; 47746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s->contains(c)) { 47756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = !success; 47766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!success) { 47796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 47806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 47836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_STAT_SETREF_N: 47866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 47876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test input character for NOT being a member of one of 47886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the predefined sets (Word Characters, for example) 47896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 47906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 47916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 47926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 47936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue > 0 && opValue < URX_LAST_SET); 47966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 47986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 47996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c < 256) { 48006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 48016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s8->contains(c) == FALSE) { 48026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 48036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 48056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeSet *s = fPattern->fStaticSets[opValue]; 48066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s->contains(c) == FALSE) { 48076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 48086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 48116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 48136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_SETREF: 48166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 48176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 48186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 48196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 48206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 48216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue > 0 && opValue < sets->size()); 48246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // There is input left. Pick up one char and test it for set membership. 48266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 48276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 48286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c<256) { 48296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Regex8BitSet *s8 = &fPattern->fSets8[opValue]; 48306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s8->contains(c)) { 48316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The character is in the set. A Match. 48326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 48336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 48356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); 48366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s->contains(c)) { 48376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The character is in the set. A Match. 48386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 48396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the character wasn't in the set. 48436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 48446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 48466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_DOTANY: 48496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 48506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // . matches anything, but stops at end-of-line. 48516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 48526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // At end of input. Match failed. Backtrack out. 48536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 48546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 48556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 48566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // There is input left. Advance over one char, unless we've hit end-of-line 48596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 48606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 48616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (((c & 0x7f) <= 0x29) && // First quickly bypass as many chars as possible 48626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) { 48636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // End of line in normal mode. . does not match. 48646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 48656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 48666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 48696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_DOTANY_ALL: 48726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 48736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // . in dot-matches-all (including new lines) mode 48746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 48756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // At end of input. Match failed. Backtrack out. 48766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 48776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 48786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 48796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // There is input left. Advance over one char, except if we are 48826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // at a cr/lf, advance over both of them. 48836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 48846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 48856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c==0x0d && fp->fInputIdx < fActiveLimit) { 48866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // In the case of a CR/LF, we need to advance over both. 48876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (inputBuf[fp->fInputIdx] == 0x0a) { 48886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_FWD_1(inputBuf, fp->fInputIdx, fActiveLimit); 48896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 48936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_DOTANY_UNIX: 48966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 48976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // '.' operator, matches all, but stops at end-of-line. 48986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // UNIX_LINES mode, so 0x0a is the only recognized line ending. 48996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx >= fActiveLimit) { 49006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // At end of input. Match failed. Backtrack out. 49016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 49026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 49036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 49046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // There is input left. Advance over one char, unless we've hit end-of-line 49076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 49086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 49096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c == 0x0a) { 49106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // End of line in normal mode. '.' does not match the \n 49116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 49126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 49156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_JMP: 49186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = opValue; 49196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 49206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_FAIL: 49226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isMatch = FALSE; 49236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto breakFromLoop; 49246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_JMP_SAV: 49266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue < fPattern->fCompiledPat->size()); 49276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx, status); // State save to loc following current 49286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = opValue; // Then JMP. 49296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 49306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_JMP_SAV_X: 49326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This opcode is used with (x)+, when x can match a zero length string. 49336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Same as JMP_SAV, except conditional on the match having made forward progress. 49346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Destination of the JMP must be a URX_STO_INP_LOC, from which we get the 49356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // data address of the input position at the start of the loop. 49366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 49376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue > 0 && opValue < fPattern->fCompiledPat->size()); 49386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stoOp = (int32_t)pat[opValue-1]; 49396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(URX_TYPE(stoOp) == URX_STO_INP_LOC); 49406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t frameLoc = URX_VAL(stoOp); 49416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(frameLoc >= 0 && frameLoc < fFrameSize); 49426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t prevInputIdx = (int32_t)fp->fExtra[frameLoc]; 49436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(prevInputIdx <= fp->fInputIdx); 49446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (prevInputIdx < fp->fInputIdx) { 49456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The match did make progress. Repeat the loop. 49466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx, status); // State save to loc following current 49476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = opValue; 49486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[frameLoc] = fp->fInputIdx; 49496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If the input position did not advance, we do nothing here, 49516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // execution will fall out of the loop. 49526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 49546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_CTR_INIT: 49566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 49576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue >= 0 && opValue < fFrameSize-2); 49586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue] = 0; // Set the loop counter variable to zero 49596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Pick up the three extra operands that CTR_INIT has, and 49616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // skip the pattern location counter past 49626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 49636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx += 3; 49646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t loopLoc = URX_VAL(pat[instrOperandLoc]); 49656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minCount = (int32_t)pat[instrOperandLoc+1]; 49666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxCount = (int32_t)pat[instrOperandLoc+2]; 49676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(minCount>=0); 49686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(maxCount>=minCount || maxCount==-1); 49696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(loopLoc>=fp->fPatIdx); 49706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (minCount == 0) { 49726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, loopLoc+1, status); 49736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (maxCount == -1) { 49756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue+1] = fp->fInputIdx; // For loop breaking. 49766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (maxCount == 0) { 49776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 49786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 49816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_CTR_LOOP: 49836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 49846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2); 49856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t initOp = (int32_t)pat[opValue]; 49866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT); 49876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)]; 49886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minCount = (int32_t)pat[opValue+2]; 49896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxCount = (int32_t)pat[opValue+3]; 49906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (*pCounter)++; 49916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) { 49926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(*pCounter == maxCount); 49936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 49946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*pCounter >= minCount) { 49966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (maxCount == -1) { 49976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop has no hard upper bound. 49986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Check that it is progressing through the input, break if it is not. 49996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *pLastInputIdx = &fp->fExtra[URX_VAL(initOp) + 1]; 50006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx == *pLastInputIdx) { 50016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 50026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 50036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pLastInputIdx = fp->fInputIdx; 50046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx, status); 50076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = opValue + 4; // Loop back. 50096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 50116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_CTR_INIT_NG: 50136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 50146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Initialize a non-greedy loop 50156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue >= 0 && opValue < fFrameSize-2); 50166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue] = 0; // Set the loop counter variable to zero 50176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Pick up the three extra operands that CTR_INIT_NG has, and 50196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // skip the pattern location counter past 50206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 50216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx += 3; 50226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t loopLoc = URX_VAL(pat[instrOperandLoc]); 50236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minCount = (int32_t)pat[instrOperandLoc+1]; 50246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxCount = (int32_t)pat[instrOperandLoc+2]; 50256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(minCount>=0); 50266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(maxCount>=minCount || maxCount==-1); 50276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(loopLoc>fp->fPatIdx); 50286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (maxCount == -1) { 50296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue+1] = fp->fInputIdx; // Save initial input index for loop breaking. 50306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (minCount == 0) { 50336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (maxCount != 0) { 50346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx, status); 50356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = loopLoc+1; // Continue with stuff after repeated block 50376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 50406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_CTR_LOOP_NG: 50426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 50436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Non-greedy {min, max} loops 50446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2); 50456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t initOp = (int32_t)pat[opValue]; 50466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT_NG); 50476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)]; 50486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minCount = (int32_t)pat[opValue+2]; 50496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxCount = (int32_t)pat[opValue+3]; 50506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (*pCounter)++; 50526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) { 50536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The loop has matched the maximum permitted number of times. 50546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Break out of here with no action. Matching will 50556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // continue with the following pattern. 50566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(*pCounter == maxCount); 50576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 50586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*pCounter < minCount) { 50616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We haven't met the minimum number of matches yet. 50626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop back for another one. 50636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = opValue + 4; // Loop back. 50646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 50656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We do have the minimum number of matches. 50666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If there is no upper bound on the loop iterations, check that the input index 50686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // is progressing, and stop the loop if it is not. 50696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (maxCount == -1) { 50706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *pLastInputIdx = &fp->fExtra[URX_VAL(initOp) + 1]; 50716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx == *pLastInputIdx) { 50726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 50736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pLastInputIdx = fp->fInputIdx; 50756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop Continuation: we will fall into the pattern following the loop 50786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (non-greedy, don't execute loop body first), but first do 50796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // a state save to the top of the loop, so that a match failure 50806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // in the following pattern will try another iteration of the loop. 50816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, opValue + 4, status); 50826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 50856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_STO_SP: 50876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize); 50886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[opValue] = fStack->size(); 50896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 50906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LD_SP: 50926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 50936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize); 50946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t newStackSize = (int32_t)fData[opValue]; 50956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(newStackSize <= fStack->size()); 50966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; 50976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (newFP == (int64_t *)fp) { 50986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 50996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 51016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<fFrameSize; i++) { 51026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newFP[i] = ((int64_t *)fp)[i]; 51036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)newFP; 51056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fStack->setSize(newStackSize); 51066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 51086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKREF: 51106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 51116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue < fFrameSize); 51126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t groupStartIdx = fp->fExtra[opValue]; 51136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t groupEndIdx = fp->fExtra[opValue+1]; 51146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(groupStartIdx <= groupEndIdx); 51156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t inputIndex = fp->fInputIdx; 51166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (groupStartIdx < 0) { 51176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This capture group has not participated in the match thus far, 51186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match. 51196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 51206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = TRUE; 51226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (int64_t groupIndex = groupStartIdx; groupIndex < groupEndIdx; ++groupIndex,++inputIndex) { 51236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (inputIndex >= fActiveLimit) { 51246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 51256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 51266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 51276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (inputBuf[groupIndex] != inputBuf[inputIndex]) { 51296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 51306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 51316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (success) { 51346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = inputIndex; 51356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 51366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 51376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 51406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_BACKREF_I: 51426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 51436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue < fFrameSize); 51446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t groupStartIdx = fp->fExtra[opValue]; 51456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t groupEndIdx = fp->fExtra[opValue+1]; 51466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(groupStartIdx <= groupEndIdx); 51476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (groupStartIdx < 0) { 51486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This capture group has not participated in the match thus far, 51496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match. 51506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 51516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CaseFoldingUCharIterator captureGroupItr(inputBuf, groupStartIdx, groupEndIdx); 51536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CaseFoldingUCharIterator inputItr(inputBuf, fp->fInputIdx, fActiveLimit); 51546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Note: if the capture group match was of an empty string the backref 51566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // match succeeds. Verified by testing: Perl matches succeed 51576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // in this case, so we do too. 51586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = TRUE; 51606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 51616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 captureGroupChar = captureGroupItr.next(); 51626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (captureGroupChar == U_SENTINEL) { 51636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = TRUE; 51646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 51656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 inputChar = inputItr.next(); 51676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (inputChar == U_SENTINEL) { 51686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 51696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 51706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 51716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (inputChar != captureGroupChar) { 51736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 51746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 51756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (success && inputItr.inExpansion()) { 51796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We otained a match by consuming part of a string obtained from 51806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // case-folding a single code point of the input text. 51816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This does not count as an overall match. 51826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 51836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (success) { 51866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = inputItr.getIndex(); 51876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 51886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 51896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 51926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_STO_INP_LOC: 51946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 51956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue >= 0 && opValue < fFrameSize); 51966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[opValue] = fp->fInputIdx; 51976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 51996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_JMPX: 52016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 52026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 52036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx += 1; 52046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t dataLoc = URX_VAL(pat[instrOperandLoc]); 52056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(dataLoc >= 0 && dataLoc < fFrameSize); 52066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t savedInputIdx = (int32_t)fp->fExtra[dataLoc]; 52076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(savedInputIdx <= fp->fInputIdx); 52086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (savedInputIdx < fp->fInputIdx) { 52096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = opValue; // JMP 52106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 52116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no progress in loop. 52126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 52156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LA_START: 52176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 52186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Entering a lookahead block. 52196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save Stack Ptr, Input Pos. 52206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 52216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[opValue] = fStack->size(); 52226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[opValue+1] = fp->fInputIdx; 52236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveStart = fLookStart; // Set the match region change for 52246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = fLookLimit; // transparent bounds. 52256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 52276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LA_END: 52296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 52306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Leaving a look-ahead block. 52316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // restore Stack Ptr, Input Pos to positions they had on entry to block. 52326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 52336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stackSize = fStack->size(); 52346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t newStackSize = (int32_t)fData[opValue]; 52356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(stackSize >= newStackSize); 52366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (stackSize > newStackSize) { 52376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Copy the current top frame back to the new (cut back) top frame. 52386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This makes the capture groups from within the look-ahead 52396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // expression available. 52406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; 52416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 52426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0; i<fFrameSize; i++) { 52436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newFP[i] = ((int64_t *)fp)[i]; 52446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)newFP; 52466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fStack->setSize(newStackSize); 52476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = fData[opValue+1]; 52496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Restore the active region bounds in the input string; they may have 52516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // been changed because of transparent bounds on a Region. 52526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveStart = fRegionStart; 52536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = fRegionLimit; 52546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 52566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_ONECHAR_I: 52586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx < fActiveLimit) { 52596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 52606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 52616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) { 52626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 52636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 52656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 52666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 52686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 52696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_STRING_I: 52716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Case-insensitive test input against a literal string. 52726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Strings require two slots in the compiled pattern, one for the 52736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // offset to the string text, and one for the length. 52746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The compiled string has already been case folded. 52756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 52766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *patternString = litText + opValue; 52776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org op = (int32_t)pat[fp->fPatIdx]; 52796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx++; 52806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org opType = URX_TYPE(op); 52816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org opValue = URX_VAL(op); 52826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opType == URX_STRING_LEN); 52836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t patternStringLen = opValue; // Length of the string from the pattern. 52846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 cText; 52866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 cPattern; 52876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool success = TRUE; 52886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t patternStringIdx = 0; 52896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CaseFoldingUCharIterator inputIterator(inputBuf, fp->fInputIdx, fActiveLimit); 52906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (patternStringIdx < patternStringLen) { 52916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(patternString, patternStringIdx, patternStringLen, cPattern); 52926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cText = inputIterator.next(); 52936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (cText != cPattern) { 52946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 52956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (cText == U_SENTINEL) { 52966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 52976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 52996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (inputIterator.inExpansion()) { 53026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org success = FALSE; 53036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (success) { 53066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = inputIterator.getIndex(); 53076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 53086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 53096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 53126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LB_START: 53146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 53156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Entering a look-behind block. 53166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save Stack Ptr, Input Pos. 53176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TODO: implement transparent bounds. Ticket #6067 53186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 53196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[opValue] = fStack->size(); 53206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[opValue+1] = fp->fInputIdx; 53216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Init the variable containing the start index for attempted matches. 53226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[opValue+2] = -1; 53236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save input string length, then reset to pin any matches to end at 53246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the current position. 53256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData[opValue+3] = fActiveLimit; 53266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = fp->fInputIdx; 53276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 53296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LB_CONT: 53326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 53336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Positive Look-Behind, at top of loop checking for matches of LB expression 53346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // at all possible input starting positions. 53356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fetch the min and max possible match lengths. They are the operands 53376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // of this op in the pattern. 53386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minML = (int32_t)pat[fp->fPatIdx++]; 53396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxML = (int32_t)pat[fp->fPatIdx++]; 53406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(minML <= maxML); 53416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(minML >= 0); 53426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fetch (from data) the last input index where a match was attempted. 53446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 53456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *lbStartIdx = &fData[opValue+2]; 53466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*lbStartIdx < 0) { 53476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // First time through loop. 53486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *lbStartIdx = fp->fInputIdx - minML; 53496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 53506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 2nd through nth time through the loop. 53516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Back up start position for match by one. 53526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*lbStartIdx == 0) { 53536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (*lbStartIdx)--; 53546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 53556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_BACK_1(inputBuf, 0, *lbStartIdx); 53566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) { 53606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We have tried all potential match starting points without 53616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // getting a match. Backtrack out, and out of the 53626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Look Behind altogether. 53636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 53646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t restoreInputLen = fData[opValue+3]; 53656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(restoreInputLen >= fActiveLimit); 53666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(restoreInputLen <= fInputLength); 53676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = restoreInputLen; 53686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 53696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save state to this URX_LB_CONT op, so failure to match will repeat the loop. 53726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (successful match will fall off the end of the loop.) 53736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx-3, status); 53746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = *lbStartIdx; 53756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 53776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LB_END: 53796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // End of a look-behind block, after a successful match. 53806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 53816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 53826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx != fActiveLimit) { 53836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The look-behind expression matched, but the match did not 53846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // extend all the way to the point that we are looking behind from. 53856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // FAIL out of here, which will take us back to the LB_CONT, which 53866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // will retry the match starting at another position or fail 53876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the look-behind altogether, whichever is appropriate. 53886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 53896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 53906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Look-behind match is good. Restore the orignal input string length, 53936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // which had been truncated to pin the end of the lookbehind match to the 53946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // position being looked-behind. 53956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t originalInputLen = fData[opValue+3]; 53966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(originalInputLen >= fActiveLimit); 53976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(originalInputLen <= fInputLength); 53986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = originalInputLen; 53996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 54016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LBN_CONT: 54046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 54056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Negative Look-Behind, at top of loop checking for matches of LB expression 54066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // at all possible input starting positions. 54076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fetch the extra parameters of this op. 54096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minML = (int32_t)pat[fp->fPatIdx++]; 54106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t maxML = (int32_t)pat[fp->fPatIdx++]; 54116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t continueLoc = (int32_t)pat[fp->fPatIdx++]; 54126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continueLoc = URX_VAL(continueLoc); 54136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(minML <= maxML); 54146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(minML >= 0); 54156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(continueLoc > fp->fPatIdx); 54166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Fetch (from data) the last input index where a match was attempted. 54186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 54196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t *lbStartIdx = &fData[opValue+2]; 54206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*lbStartIdx < 0) { 54216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // First time through loop. 54226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *lbStartIdx = fp->fInputIdx - minML; 54236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 54246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 2nd through nth time through the loop. 54256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Back up start position for match by one. 54266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*lbStartIdx == 0) { 54276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (*lbStartIdx)--; // Because U16_BACK is unsafe starting at 0. 54286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 54296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_BACK_1(inputBuf, 0, *lbStartIdx); 54306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) { 54346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We have tried all potential match starting points without 54356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // getting a match, which means that the negative lookbehind as 54366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // a whole has succeeded. Jump forward to the continue location 54376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t restoreInputLen = fData[opValue+3]; 54386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(restoreInputLen >= fActiveLimit); 54396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(restoreInputLen <= fInputLength); 54406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = restoreInputLen; 54416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx = continueLoc; 54426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 54436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save state to this URX_LB_CONT op, so failure to match will repeat the loop. 54466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (successful match will cause a FAIL out of the loop altogether.) 54476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx-4, status); 54486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = *lbStartIdx; 54496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 54516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LBN_END: 54536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // End of a negative look-behind block, after a successful match. 54546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 54556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 54566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fp->fInputIdx != fActiveLimit) { 54576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The look-behind expression matched, but the match did not 54586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // extend all the way to the point that we are looking behind from. 54596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // FAIL out of here, which will take us back to the LB_CONT, which 54606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // will retry the match starting at another position or succeed 54616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the look-behind altogether, whichever is appropriate. 54626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 54636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 54646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Look-behind expression matched, which means look-behind test as 54676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // a whole Fails 54686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Restore the orignal input string length, which had been truncated 54706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // inorder to pin the end of the lookbehind match 54716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // to the position being looked-behind. 54726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int64_t originalInputLen = fData[opValue+3]; 54736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(originalInputLen >= fActiveLimit); 54746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(originalInputLen <= fInputLength); 54756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fActiveLimit = originalInputLen; 54766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Restore original stack position, discarding any state saved 54786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // by the successful pattern match. 54796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 54806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t newStackSize = (int32_t)fData[opValue]; 54816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fStack->size() > newStackSize); 54826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fStack->setSize(newStackSize); 54836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // FAIL, which will take control back to someplace 54856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // prior to entering the look-behind test. 54866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = (REStackFrame *)fStack->popFrame(fFrameSize); 54876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 54896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LOOP_SR_I: 54926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop Initialization for the optimized implementation of 54936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // [some character set]* 54946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This op scans through all matching input. 54956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The following LOOP_C op emulates stack unwinding if the following pattern fails. 54966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 54976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue > 0 && opValue < sets->size()); 54986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Regex8BitSet *s8 = &fPattern->fSets8[opValue]; 54996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); 55006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop through input, until either the input is exhausted or 55026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // we reach a character that is not a member of the set. 55036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t ix = (int32_t)fp->fInputIdx; 55046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 55056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ix >= fActiveLimit) { 55066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 55076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 55086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 55106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, ix, fActiveLimit, c); 55116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c<256) { 55126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s8->contains(c) == FALSE) { 55136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_BACK_1(inputBuf, 0, ix); 55146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 55156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 55176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (s->contains(c) == FALSE) { 55186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_BACK_1(inputBuf, 0, ix); 55196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 55206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If there were no matching characters, skip over the loop altogether. 55256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The loop doesn't run at all, a * op always succeeds. 55266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ix == fp->fInputIdx) { 55276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx++; // skip the URX_LOOP_C op. 55286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 55296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Peek ahead in the compiled pattern, to the URX_LOOP_C that 55326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // must follow. It's operand is the stack location 55336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // that holds the starting input index for the match of this [set]* 55346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t loopcOp = (int32_t)pat[fp->fPatIdx]; 55356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C); 55366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stackLoc = URX_VAL(loopcOp); 55376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize); 55386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[stackLoc] = fp->fInputIdx; 55396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = ix; 55406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save State to the URX_LOOP_C op that follows this one, 55426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // so that match failures in the following code will return to there. 55436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Then bump the pattern idx so the LOOP_C is skipped on the way out of here. 55446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx, status); 55456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx++; 55466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 55486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LOOP_DOT_I: 55516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop Initialization for the optimized implementation of .* 55526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This op scans through all remaining input. 55536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The following LOOP_C op emulates stack unwinding if the following pattern fails. 55546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 55556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Loop through input until the input is exhausted (we reach an end-of-line) 55566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // In DOTALL mode, we can just go straight to the end of the input. 55576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t ix; 55586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((opValue & 1) == 1) { 55596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Dot-matches-All mode. Jump straight to the end of the string. 55606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ix = (int32_t)fActiveLimit; 55616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 55626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 55636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // NOT DOT ALL mode. Line endings do not match '.' 55646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Scan forward until a line ending or end of input. 55656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ix = (int32_t)fp->fInputIdx; 55666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (;;) { 55676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ix >= fActiveLimit) { 55686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fHitEnd = TRUE; 55696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 55706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 55726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT(inputBuf, ix, fActiveLimit, c); // c = inputBuf[ix++] 55736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((c & 0x7f) <= 0x29) { // Fast filter of non-new-line-s 55746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((c == 0x0a) || // 0x0a is newline in both modes. 55756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (((opValue & 2) == 0) && // IF not UNIX_LINES mode 55766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((c<=0x0d && c>=0x0a) || c==0x85 || c==0x2028 || c==0x2029))) { 55776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // char is a line ending. Put the input pos back to the 55786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // line ending char, and exit the scanning loop. 55796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_BACK_1(inputBuf, 0, ix); 55806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 55816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If there were no matching characters, skip over the loop altogether. 55876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The loop doesn't run at all, a * op always succeeds. 55886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (ix == fp->fInputIdx) { 55896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx++; // skip the URX_LOOP_C op. 55906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 55916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Peek ahead in the compiled pattern, to the URX_LOOP_C that 55946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // must follow. It's operand is the stack location 55956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // that holds the starting input index for the match of this .* 55966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t loopcOp = (int32_t)pat[fp->fPatIdx]; 55976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C); 55986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stackLoc = URX_VAL(loopcOp); 55996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize); 56006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fExtra[stackLoc] = fp->fInputIdx; 56016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx = ix; 56026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Save State to the URX_LOOP_C op that follows this one, 56046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // so that match failures in the following code will return to there. 56056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Then bump the pattern idx so the LOOP_C is skipped on the way out of here. 56066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx, status); 56076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fPatIdx++; 56086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 56096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 56106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case URX_LOOP_C: 56136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 56146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(opValue>=0 && opValue<fFrameSize); 56156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org backSearchIndex = (int32_t)fp->fExtra[opValue]; 56166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(backSearchIndex <= fp->fInputIdx); 56176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (backSearchIndex == fp->fInputIdx) { 56186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We've backed up the input idx to the point that the loop started. 56196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The loop is done. Leave here without saving state. 56206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Subsequent failures won't come back here. 56216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 56226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 56236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Set up for the next iteration of the loop, with input index 56246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // backed up by one from the last time through, 56256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // and a state save to this instruction in case the following code fails again. 56266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (We're going backwards because this loop emulates stack unwinding, not 56276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the initial scan forward.) 56286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(fp->fInputIdx > 0); 56296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 prevC; 56306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_PREV(inputBuf, 0, fp->fInputIdx, prevC); // !!!: should this 0 be one of f*Limit? 56316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (prevC == 0x0a && 56336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp->fInputIdx > backSearchIndex && 56346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inputBuf[fp->fInputIdx-1] == 0x0d) { 56356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t prevOp = (int32_t)pat[fp->fPatIdx-2]; 56366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (URX_TYPE(prevOp) == URX_LOOP_DOT_I) { 56376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // .*, stepping back over CRLF pair. 56386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_BACK_1(inputBuf, 0, fp->fInputIdx); 56396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 56406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 56416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fp = StateSave(fp, fp->fPatIdx-1, status); 56446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 56456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 56466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 56506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Trouble. The compiled pattern contains an entry with an 56516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // unrecognized type tag. 56526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(FALSE); 56536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 56546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 56566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isMatch = FALSE; 56576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 56586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 56596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 56606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgbreakFromLoop: 56626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatch = isMatch; 56636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isMatch) { 56646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fLastMatchEnd = fMatchEnd; 56656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatchStart = startIdx; 56666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fMatchEnd = fp->fInputIdx; 56676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fTraceDebug) { 56686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_RUN_DEBUG_PRINTF(("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd)); 56696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 56706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 56716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org else 56726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 56736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fTraceDebug) { 56746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org REGEX_RUN_DEBUG_PRINTF(("No match\n\n")); 56756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 56766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 56776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFrame = fp; // The active stack frame when the engine stopped. 56796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Contains the capture group results that we need to 56806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // access later. 56816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 56836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 56846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexMatcher) 56876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END 56896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 5691