1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru************************************************************************** 31b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert* Copyright (C) 2002-2015 International Business Machines Corporation * 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* and others. All rights reserved. * 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru************************************************************************** 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 8c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// file: rematch.cpp 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Contains the implementation of class RegexMatcher, 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// which is one of the main API classes for the ICU regular expression package. 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/rbbi.h" 22103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf.h" 23103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvectr32.h" 2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uvectr64.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regeximp.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regexst.h" 3150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "regextxt.h" 3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "ucase.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// #include <malloc.h> // Needed for heapcheck testing 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 38c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Default limit for the size of the back track stack, to avoid system 39c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// failures causedby heap exhaustion. Units are in 32 bit words, not bytes. 40c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// This value puts ICU's limits higher than most other regexp implementations, 41c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// which use recursion rather than the heap, and take more storage per 42c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// backtrack point. 43c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 44c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t DEFAULT_BACKTRACK_STACK_CAPACITY = 8000000; 45c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 46c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Time limit counter constant. 47c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Time limits for expression evaluation are in terms of quanta of work by 48c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// the engine, each of which is 10,000 state saves. 49c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// This constant determines that state saves per tick number. 50c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t TIMER_INITIAL_VALUE = 10000; 51c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 521b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 531b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert// Test for any of the Unicode line terminating characters. 541b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertstatic inline UBool isLineTerminator(UChar32 c) { 551b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (c & ~(0x0a | 0x0b | 0x0c | 0x0d | 0x85 | 0x2028 | 0x2029)) { 561b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return false; 571b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 581b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return (c<=0x0d && c>=0x0a) || c==0x85 || c==0x2028 || c==0x2029; 591b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert} 601b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------------------- 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Constructor and Destructor 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------------------- 66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusRegexMatcher::RegexMatcher(const RegexPattern *pat) { 67c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fDeferredStatus = U_ZERO_ERROR; 68c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru init(fDeferredStatus); 69c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 70c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 71c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pat==NULL) { 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDeferredStatus = U_ILLEGAL_ARGUMENT_ERROR; 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 76c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPattern = pat; 7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init2(RegexStaticSets::gStaticSets->fEmptyText, fDeferredStatus); 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher::RegexMatcher(const UnicodeString ®exp, const UnicodeString &input, 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t flags, UErrorCode &status) { 84c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru init(status); 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 88c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UParseError pe; 89c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPatternOwned = RegexPattern::compile(regexp, flags, pe, status); 90c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPattern = fPatternOwned; 91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText inputText = UTEXT_INITIALIZER; 9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openConstUnicodeString(&inputText, &input, &status); 9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init2(&inputText, status); 9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&inputText); 9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fInputUniStrMaybeMutable = TRUE; 9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 10150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher::RegexMatcher(UText *regexp, UText *input, 10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, UErrorCode &status) { 10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init(status); 10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternOwned = RegexPattern::compile(regexp, flags, pe, status); 10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPattern = fPatternOwned; 114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru init2(input, status); 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusRegexMatcher::RegexMatcher(const UnicodeString ®exp, 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t flags, UErrorCode &status) { 120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru init(status); 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UParseError pe; 125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPatternOwned = RegexPattern::compile(regexp, flags, pe, status); 12650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 12850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPattern = fPatternOwned; 13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init2(RegexStaticSets::gStaticSets->fEmptyText, status); 13150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 13250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusRegexMatcher::RegexMatcher(UText *regexp, 13450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, UErrorCode &status) { 13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init(status); 13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternOwned = RegexPattern::compile(regexp, flags, pe, status); 14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPattern = fPatternOwned; 14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init2(RegexStaticSets::gStaticSets->fEmptyText, status); 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher::~RegexMatcher() { 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fStack; 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fData != fSmallData) { 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(fData); 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData = NULL; 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fPatternOwned) { 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fPatternOwned; 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPatternOwned = NULL; 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPattern = NULL; 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInput) { 16550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete fInput; 16650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInputText) { 16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(fInputText); 16950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 17050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fAltInputText) { 17150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(fAltInputText); 17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru #if UCONFIG_NO_BREAK_ITERATION==0 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fWordBreakItr; 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru #endif 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// init() common initialization for use by all constructors. 181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Initialize all fields, get the object into a consistent state. 182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// This must be done even when the initial status shows an error, 183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// so that the object is initialized sufficiently well for the destructor 184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// to run safely. 185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid RegexMatcher::init(UErrorCode &status) { 187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPattern = NULL; 188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPatternOwned = NULL; 189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFrameSize = 0; 190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRegionStart = 0; 191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRegionLimit = 0; 192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fAnchorStart = 0; 193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fAnchorLimit = 0; 194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fLookStart = 0; 195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fLookLimit = 0; 196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveStart = 0; 197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = 0; 198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fTransparentBounds = FALSE; 199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fAnchoringBounds = TRUE; 200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fMatch = FALSE; 201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fMatchStart = 0; 202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fMatchEnd = 0; 203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fLastMatchEnd = -1; 204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fAppendPosition = 0; 205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = FALSE; 206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRequireEnd = FALSE; 207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fStack = NULL; 208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFrame = NULL; 209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fTimeLimit = 0; 210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fTime = 0; 211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fTickCounter = 0; 212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fStackLimit = DEFAULT_BACKTRACK_STACK_CAPACITY; 213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fCallbackFn = NULL; 214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fCallbackContext = NULL; 21527f654740f2a26ad62a5c155af9199af9e69b889claireho fFindProgressCallbackFn = NULL; 21627f654740f2a26ad62a5c155af9199af9e69b889claireho fFindProgressCallbackContext = NULL; 217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fTraceDebug = FALSE; 218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fDeferredStatus = status; 219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fData = fSmallData; 220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fWordBreakItr = NULL; 221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 222103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fStack = NULL; 22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputText = NULL; 22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAltInputText = NULL; 22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInput = NULL; 22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputLength = 0; 22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputUniStrMaybeMutable = FALSE; 22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fDeferredStatus = status; 231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// init2() Common initialization for use by RegexMatcher constructors, part 2. 236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// This handles the common setup to be done after the Pattern is available. 237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 23850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::init2(UText *input, UErrorCode &status) { 239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fDeferredStatus = status; 241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(fSmallData[0]))) { 245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fData = (int64_t *)uprv_malloc(fPattern->fDataSize * sizeof(int64_t)); 246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fData == NULL) { 247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 252103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fStack = new UVector64(status); 253103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (fStack == NULL) { 254103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius status = fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 255103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return; 256103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 257103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru reset(input); 259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setStackLimit(DEFAULT_BACKTRACK_STACK_CAPACITY, status); 260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fDeferredStatus = status; 262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar BACKSLASH = 0x5c; 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar DOLLARSIGN = 0x24; 2691b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertstatic const UChar LEFTBRACKET = 0x7b; 2701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertstatic const UChar RIGHTBRACKET = 0x7d; 2711b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// appendReplacement 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher &RegexMatcher::appendReplacement(UnicodeString &dest, 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString &replacement, 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) { 28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText replacementText = UTEXT_INITIALIZER; 281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openConstUnicodeString(&replacementText, &replacement, &status); 283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_SUCCESS(status)) { 28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText resultText = UTEXT_INITIALIZER; 28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &dest, &status); 286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho appendReplacement(&resultText, &replacementText, status); 28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&resultText); 29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&replacementText); 29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// appendReplacement, UText mode 29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 30050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::appendReplacement(UText *dest, 30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *replacement, 30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) { 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = fDeferredStatus; 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatch == FALSE) { 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_REGEX_INVALID_STATE; 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copy input string from the end of previous match to start of current match 31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t destLen = utext_nativeLength(dest); 31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatchStart > fAppendPosition) { 31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius destLen += utext_replace(dest, destLen, destLen, fInputText->chunkContents+fAppendPosition, 32027f654740f2a26ad62a5c155af9199af9e69b889claireho (int32_t)(fMatchStart-fAppendPosition), &status); 32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len16; 32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_USES_U16(fInputText)) { 32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = (int32_t)(fMatchStart-fAppendPosition); 32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = utext_extract(fInputText, fAppendPosition, fMatchStart, NULL, 0, &lengthStatus); 32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1)); 33027f654740f2a26ad62a5c155af9199af9e69b889claireho if (inputChars == NULL) { 33127f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_MEMORY_ALLOCATION_ERROR; 33227f654740f2a26ad62a5c155af9199af9e69b889claireho return *this; 33327f654740f2a26ad62a5c155af9199af9e69b889claireho } 33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(fInputText, fAppendPosition, fMatchStart, inputChars, len16+1, &status); 33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, inputChars, len16, &status); 33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(inputChars); 33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fAppendPosition = fMatchEnd; 340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // scan the replacement text, looking for substitutions ($n) and \escapes. 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO: optimize this loop by efficiently scanning for '$' or '\', 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // move entire ranges not containing substitutions. 34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(replacement, 0); 3461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert for (UChar32 c = UTEXT_NEXT32(replacement); U_SUCCESS(status) && c != U_SENTINEL; c = UTEXT_NEXT32(replacement)) { 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == BACKSLASH) { 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Backslash Escape. Copy the following char out without further checks. 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note: Surrogate pairs don't need any special handling 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The second half wont be a '$' or a '\', and 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // will move to the dest normally on the next 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // loop iteration. 35350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_CURRENT32(replacement); 35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == U_SENTINEL) { 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c==0x55/*U*/ || c==0x75/*u*/) { 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We have a \udddd or \Udddddddd escape sequence. 36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t offset = 0; 36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho struct URegexUTextUnescapeCharContext context = U_REGEX_UTEXT_UNESCAPE_CONTEXT(replacement); 36250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 escapedChar = u_unescapeAt(uregex_utext_unescape_charAt, &offset, INT32_MAX, &context); 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (escapedChar != (UChar32)0xFFFFFFFF) { 36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_IS_BMP(escapedChar)) { 36550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c16 = (UChar)escapedChar; 36650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status); 36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar surrogate[2]; 36950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho surrogate[0] = U16_LEAD(escapedChar); 37050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho surrogate[1] = U16_TRAIL(escapedChar); 37150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 37250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status); 37350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 37450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO: Report errors for mal-formed \u escapes? 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // As this is, the original sequence is output, which may be OK. 37750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (context.lastOffset == offset) { 378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_PREVIOUS32(replacement); 37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (context.lastOffset != offset-1) { 38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_moveIndex32(replacement, offset - context.lastOffset - 1); 38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 38250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 384b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_NEXT32(replacement); 38550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Plain backslash escape. Just put out the escaped character. 38650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_IS_BMP(c)) { 38750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c16 = (UChar)c; 38850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status); 38950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 39050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar surrogate[2]; 39150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho surrogate[0] = U16_LEAD(c); 39250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho surrogate[1] = U16_TRAIL(c); 39350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 39450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status); 39550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 39850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (c != DOLLARSIGN) { 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Normal char, not a $. Copy it out without further checks. 40050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_IS_BMP(c)) { 40150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c16 = (UChar)c; 40250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status); 40350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 40450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar surrogate[2]; 40550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho surrogate[0] = U16_LEAD(c); 40650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho surrogate[1] = U16_TRAIL(c); 40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status); 40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 4121b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // We've got a $. Pick up a capture group name or number if one follows. 4131b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Consume digits so long as the resulting group number <= the number of 4141b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // number of capture groups in the pattern. 415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t groupNum = 0; 4171b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t numDigits = 0; 4181b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UChar32 nextChar = utext_current32(replacement); 4191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (nextChar == LEFTBRACKET) { 4201b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Scan for a Named Capture Group, ${name}. 4211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UnicodeString groupName; 4221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert utext_next32(replacement); 4231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert while(U_SUCCESS(status) && nextChar != RIGHTBRACKET) { 4241b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert nextChar = utext_next32(replacement); 4251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (nextChar == U_SENTINEL) { 4261b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; 4271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else if ((nextChar >= 0x41 && nextChar <= 0x5a) || // A..Z 4281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert (nextChar >= 0x61 && nextChar <= 0x7a) || // a..z 4291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert (nextChar >= 0x31 && nextChar <= 0x39)) { // 0..9 4301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert groupName.append(nextChar); 4311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else if (nextChar == RIGHTBRACKET) { 4321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert groupNum = uhash_geti(fPattern->fNamedCaptureMap, &groupName); 4331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (groupNum == 0) { 4341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; 4351b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 4361b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else { 4371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Character was something other than a name char or a closing '}' 4381b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; 4391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4411b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 4421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else if (u_isdigit(nextChar)) { 4431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // $n Scan for a capture group number 4441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t numCaptureGroups = fPattern->fGroupMap->size(); 4451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert for (;;) { 4461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert nextChar = UTEXT_CURRENT32(replacement); 4471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (nextChar == U_SENTINEL) { 4481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 4491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 4501b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (u_isdigit(nextChar) == FALSE) { 4511b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 4521b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 4531b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t nextDigitVal = u_charDigitValue(nextChar); 4541b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (groupNum*10 + nextDigitVal > numCaptureGroups) { 4551b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Don't consume the next digit if it makes the capture group number too big. 4561b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (numDigits == 0) { 4571b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert status = U_INDEX_OUTOFBOUNDS_ERROR; 4581b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 4591b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 4601b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 4611b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert (void)UTEXT_NEXT32(replacement); 4621b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert groupNum=groupNum*10 + nextDigitVal; 4631b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert ++numDigits; 46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4651b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else { 4661b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // $ not followed by capture group name or number. 4671b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert status = U_REGEX_INVALID_CAPTURE_GROUP_NAME; 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 469fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_SUCCESS(status)) { 47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += appendGroup(groupNum, dest, status); 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } // End of $ capture group handling 4741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } // End of per-character loop through the replacement string. 475fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// appendTail Intended to be used in conjunction with appendReplacement() 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// To the destination string, append everything following 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// the last match position from the input string. 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Note: Match ranges do not affect appendTail or appendReplacement 488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString &RegexMatcher::appendTail(UnicodeString &dest) { 49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 49250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText resultText = UTEXT_INITIALIZER; 49350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &dest, &status); 494fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 49550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 49627f654740f2a26ad62a5c155af9199af9e69b889claireho appendTail(&resultText, status); 49750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&resultText); 49850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 499fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 50050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 50150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 50250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 50350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 50450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// appendTail, UText mode 50550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 50627f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::appendTail(UText *dest, UErrorCode &status) { 50727f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 508fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return dest; 50927f654740f2a26ad62a5c155af9199af9e69b889claireho } 51027f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(fDeferredStatus)) { 51127f654740f2a26ad62a5c155af9199af9e69b889claireho status = fDeferredStatus; 512fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return dest; 51327f654740f2a26ad62a5c155af9199af9e69b889claireho } 514fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 51550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInputLength > fAppendPosition) { 51650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 51750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t destLen = utext_nativeLength(dest); 518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius utext_replace(dest, destLen, destLen, fInputText->chunkContents+fAppendPosition, 51927f654740f2a26ad62a5c155af9199af9e69b889claireho (int32_t)(fInputLength-fAppendPosition), &status); 52050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 52150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len16; 52250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_USES_U16(fInputText)) { 52350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = (int32_t)(fInputLength-fAppendPosition); 52450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 52550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = utext_extract(fInputText, fAppendPosition, fInputLength, NULL, 0, &status); 52650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; // buffer overflow 52750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 528fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 52950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16)); 53027f654740f2a26ad62a5c155af9199af9e69b889claireho if (inputChars == NULL) { 53127f654740f2a26ad62a5c155af9199af9e69b889claireho fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 53227f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 533fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius utext_extract(fInputText, fAppendPosition, fInputLength, inputChars, len16, &status); // unterminated 53427f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t destLen = utext_nativeLength(dest); 53527f654740f2a26ad62a5c155af9199af9e69b889claireho utext_replace(dest, destLen, destLen, inputChars, len16, &status); 53627f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(inputChars); 53727f654740f2a26ad62a5c155af9199af9e69b889claireho } 53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return dest; 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// end 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t RegexMatcher::end(UErrorCode &err) const { 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return end(0, err); 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 55427f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::end64(UErrorCode &err) const { 55527f654740f2a26ad62a5c155af9199af9e69b889claireho return end64(0, err); 55627f654740f2a26ad62a5c155af9199af9e69b889claireho} 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 55827f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::end64(int32_t group, UErrorCode &err) const { 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatch == FALSE) { 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_REGEX_INVALID_STATE; 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (group < 0 || group > fPattern->fGroupMap->size()) { 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_INDEX_OUTOFBOUNDS_ERROR; 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 57050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t e = -1; 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (group == 0) { 572fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius e = fMatchEnd; 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Get the position within the stack frame of the variables for 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // this capture group. 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t groupOffset = fPattern->fGroupMap->elementAti(group-1); 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(groupOffset < fPattern->fFrameSize); 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(groupOffset >= 0); 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru e = fFrame->fExtra[groupOffset + 1]; 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 581fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 58227f654740f2a26ad62a5c155af9199af9e69b889claireho return e; 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 58527f654740f2a26ad62a5c155af9199af9e69b889clairehoint32_t RegexMatcher::end(int32_t group, UErrorCode &err) const { 58627f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)end64(group, err); 58727f654740f2a26ad62a5c155af9199af9e69b889claireho} 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 589f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//-------------------------------------------------------------------------------- 590f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius// 591f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius// findProgressInterrupt This function is called once for each advance in the target 592f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius// string from the find() function, and calls the user progress callback 593f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius// function if there is one installed. 594f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius// 595f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius// Return: TRUE if the find operation is to be terminated. 596f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius// FALSE if the find operation is to continue running. 597f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius// 598f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//-------------------------------------------------------------------------------- 599f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusUBool RegexMatcher::findProgressInterrupt(int64_t pos, UErrorCode &status) { 600f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (fFindProgressCallbackFn && !(*fFindProgressCallbackFn)(fFindProgressCallbackContext, pos)) { 601f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius status = U_REGEX_STOPPED_BY_CALLER; 602f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return TRUE; 603f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 604f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return FALSE; 605f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// find() 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool RegexMatcher::find() { 613f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(fDeferredStatus)) { 614f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return FALSE; 615f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 616f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode status = U_ZERO_ERROR; 617f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool result = find(status); 618f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return result; 619f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 620f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 621f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//-------------------------------------------------------------------------------- 622f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius// 623f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius// find() 624f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius// 625f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//-------------------------------------------------------------------------------- 626f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusUBool RegexMatcher::find(UErrorCode &status) { 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Start at the position of the last match end. (Will be zero if the 62850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // matcher has been reset.) 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 630f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 631f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return FALSE; 632f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 634f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius status = fDeferredStatus; 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 637fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 63850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 639f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return findUsingChunk(status); 64050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 64250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t startPos = fMatchEnd; 643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (startPos==0) { 644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru startPos = fActiveStart; 645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatch) { 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save the position of any previous successful match. 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fLastMatchEnd = fMatchEnd; 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatchStart == fMatchEnd) { 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Previous match had zero length. Move start position up one position 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to avoid sending find() into a loop on zero-length matches. 654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (startPos >= fActiveLimit) { 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatch = FALSE; 656c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 65950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 660b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_NEXT32(fInputText); 66150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = UTEXT_GETNATIVEINDEX(fInputText); 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fLastMatchEnd >= 0) { 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // A previous find() failed to match. Don't try again. 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (without this test, a pattern with a zero-length match 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // could match again at the end of an input string.) 668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compute the position in the input string beyond which a match can not begin, because 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the minimum length match would extend past the end of the input. 676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Note: some patterns that cannot match anything will have fMinMatchLength==Max Int. 677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Be aware of possible overflows if making changes here. 67850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t testStartLimit; 67950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_USES_U16(fInputText)) { 68050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho testStartLimit = fActiveLimit - fPattern->fMinMatchLen; 68150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos > testStartLimit) { 68250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 68350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 68450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 68550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 68650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 687f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius // We don't know exactly how long the minimum match length is in native characters. 688f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius // Treat anything > 0 as 1. 689f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius testStartLimit = fActiveLimit - (fPattern->fMinMatchLen > 0 ? 1 : 0); 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(startPos >= 0); 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (fPattern->fStartType) { 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case START_NO_INFO: 697fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // No optimization was found. 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Try a match at each input position. 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 700f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MatchAt(startPos, FALSE, status); 701f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatch) { 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 70750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= testStartLimit) { 708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 71150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 712b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_NEXT32(fInputText); 71350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = UTEXT_GETNATIVEINDEX(fInputText); 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note that it's perfectly OK for a pattern to have a zero-length 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // match at the end of a string, so we must make sure that the loop 71650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // runs with startPos == testStartLimit the last time through. 717f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (findProgressInterrupt(startPos, status)) 71827f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(FALSE); 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case START_START: 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Matches are only possible at the start of the input string 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (pattern begins with ^ or \A) 725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (startPos > fActiveStart) { 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatch = FALSE; 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 729f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MatchAt(startPos, FALSE, status); 730f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fMatch; 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case START_SET: 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Match may start on any char from a pre-computed set. 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(fPattern->fMinMatchLen > 0); 74050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 742f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int64_t pos = startPos; 74350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 744f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius startPos = UTEXT_GETNATIVEINDEX(fInputText); 74550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c will be -1 (U_SENTINEL) at end of text, in which case we 74650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // skip this next block (so we don't have a negative array index) 74750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // and handle end of text in the following block. 74827f654740f2a26ad62a5c155af9199af9e69b889claireho if (c >= 0 && ((c<256 && fPattern->fInitialChars8->contains(c)) || 74927f654740f2a26ad62a5c155af9199af9e69b889claireho (c>=256 && fPattern->fInitialChars->contains(c)))) { 750f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MatchAt(pos, FALSE, status); 751f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatch) { 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 75750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, pos); 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 759f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (startPos > testStartLimit) { 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatch = FALSE; 761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 764f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (findProgressInterrupt(startPos, status)) 76527f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(FALSE); 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case START_STRING: 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case START_CHAR: 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Match starts on exactly one char. 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(fPattern->fMinMatchLen > 0); 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 theChar = fPattern->fInitialChar; 77650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 778f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int64_t pos = startPos; 77950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 780f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius startPos = UTEXT_GETNATIVEINDEX(fInputText); 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == theChar) { 782f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MatchAt(pos, FALSE, status); 783f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatch) { 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 78950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, pos); 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 791f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (startPos > testStartLimit) { 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatch = FALSE; 793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 796f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (findProgressInterrupt(startPos, status)) 79727f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 79850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(FALSE); 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case START_LINE: 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (startPos == fAnchorStart) { 806f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MatchAt(startPos, FALSE, status); 807f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatch) { 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 81350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 81450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 81550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = UTEXT_GETNATIVEINDEX(fInputText); 81650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 81750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 81850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_PREVIOUS32(fInputText); 81950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fPattern->fFlags & UREGEX_UNIX_LINES) { 82350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c == 0x0a) { 825f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MatchAt(startPos, FALSE, status); 826f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fMatch) { 830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return TRUE; 831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 83250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 83450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= testStartLimit) { 835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fMatch = FALSE; 836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 83950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 84050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = UTEXT_GETNATIVEINDEX(fInputText); 841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Note that it's perfectly OK for a pattern to have a zero-length 842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // match at the end of a string, so we must make sure that the loop 84350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // runs with startPos == testStartLimit the last time through. 844f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (findProgressInterrupt(startPos, status)) 84527f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 8491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (isLineTerminator(c)) { 8501b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (c == 0x0d && startPos < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) { 8511b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert (void)UTEXT_NEXT32(fInputText); 8521b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert startPos = UTEXT_GETNATIVEINDEX(fInputText); 8531b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 8541b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert MatchAt(startPos, FALSE, status); 8551b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(status)) { 8561b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return FALSE; 8571b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 8581b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (fMatch) { 8591b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return TRUE; 8601b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 8611b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UTEXT_SETNATIVEINDEX(fInputText, startPos); 862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 86350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= testStartLimit) { 864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fMatch = FALSE; 865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 86850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 86950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = UTEXT_GETNATIVEINDEX(fInputText); 870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Note that it's perfectly OK for a pattern to have a zero-length 871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // match at the end of a string, so we must make sure that the loop 87250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // runs with startPos == testStartLimit the last time through. 873f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (findProgressInterrupt(startPos, status)) 87427f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(FALSE); 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(FALSE); 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 88927f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool RegexMatcher::find(int64_t start, UErrorCode &status) { 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = fDeferredStatus; 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru this->reset(); // Note: Reset() is specified by Java Matcher documentation. 898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This will reset the region to be the full input length. 89950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (start < 0) { 90050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 90150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 90250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 903fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 90427f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t nativeStart = start; 90527f654740f2a26ad62a5c155af9199af9e69b889claireho if (nativeStart < fActiveStart || nativeStart > fActiveLimit) { 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_INDEX_OUTOFBOUNDS_ERROR; 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 909fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fMatchEnd = nativeStart; 910f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return find(status); 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 91650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// findUsingChunk() -- like find(), but with the advance knowledge that the 91750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// entire string is available in the UText's chunk buffer. 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 920f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusUBool RegexMatcher::findUsingChunk(UErrorCode &status) { 92150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Start at the position of the last match end. (Will be zero if the 92250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // matcher has been reset. 92350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 92550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t startPos = (int32_t)fMatchEnd; 92650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos==0) { 92750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = (int32_t)fActiveStart; 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 929fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 93050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *inputBuf = fInputText->chunkContents; 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 93250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch) { 93350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save the position of any previous successful match. 93450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLastMatchEnd = fMatchEnd; 935fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 93650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatchStart == fMatchEnd) { 93750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Previous match had zero length. Move start position up one position 93850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // to avoid sending find() into a loop on zero-length matches. 93950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= fActiveLimit) { 94050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 94150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 94250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 94350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 94450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_FWD_1(inputBuf, startPos, fInputLength); 94550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 94650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 94750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fLastMatchEnd >= 0) { 94850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // A previous find() failed to match. Don't try again. 94950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (without this test, a pattern with a zero-length match 95050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // could match again at the end of an input string.) 95150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 95250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 95350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 955fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 956fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 95750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compute the position in the input string beyond which a match can not begin, because 95850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the minimum length match would extend past the end of the input. 95950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Note: some patterns that cannot match anything will have fMinMatchLength==Max Int. 96050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Be aware of possible overflows if making changes here. 961f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius // Note: a match can begin at inputBuf + testLen; it is an inclusive limit. 96250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t testLen = (int32_t)(fActiveLimit - fPattern->fMinMatchLen); 96350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos > testLen) { 96450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 96550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 968fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 96950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 97050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(startPos >= 0); 971fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 97250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho switch (fPattern->fStartType) { 97350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case START_NO_INFO: 974fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // No optimization was found. 97550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Try a match at each input position. 97650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 977f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MatchChunkAt(startPos, FALSE, status); 978f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 97950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 98050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 98150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch) { 98250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 98350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 98450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= testLen) { 98550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 98650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 98750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 98850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_FWD_1(inputBuf, startPos, fActiveLimit); 98950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Note that it's perfectly OK for a pattern to have a zero-length 99050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // match at the end of a string, so we must make sure that the loop 99150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // runs with startPos == testLen the last time through. 992f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (findProgressInterrupt(startPos, status)) 99327f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 99450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 99550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(FALSE); 996fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 99750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case START_START: 99850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Matches are only possible at the start of the input string 99950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (pattern begins with ^ or \A) 100050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos > fActiveStart) { 100150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 100250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 100350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1004f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MatchChunkAt(startPos, FALSE, status); 1005f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 100650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 100750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 100850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fMatch; 1009fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1010fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 101150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case START_SET: 101250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 101350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Match may start on any char from a pre-computed set. 101450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fPattern->fMinMatchLen > 0); 101550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 101650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t pos = startPos; 101750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, startPos, fActiveLimit, c); // like c = inputBuf[startPos++]; 101827f654740f2a26ad62a5c155af9199af9e69b889claireho if ((c<256 && fPattern->fInitialChars8->contains(c)) || 101927f654740f2a26ad62a5c155af9199af9e69b889claireho (c>=256 && fPattern->fInitialChars->contains(c))) { 1020f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MatchChunkAt(pos, FALSE, status); 1021f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 102250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 102350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 102450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch) { 102550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 102650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 102750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1028f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (startPos > testLen) { 102950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 103050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 103150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 103250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1033f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (findProgressInterrupt(startPos, status)) 103427f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 103550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 103750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(FALSE); 1038fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 103950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case START_STRING: 104050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case START_CHAR: 104150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 104250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Match starts on exactly one char. 104350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fPattern->fMinMatchLen > 0); 104450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 theChar = fPattern->fInitialChar; 104550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 104650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t pos = startPos; 104750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, startPos, fActiveLimit, c); // like c = inputBuf[startPos++]; 104850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == theChar) { 1049f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MatchChunkAt(pos, FALSE, status); 1050f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 105150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 105250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 105350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch) { 105450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 105550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 105650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1057f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (startPos > testLen) { 105850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 105950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 106050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 106150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1062f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (findProgressInterrupt(startPos, status)) 106327f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 106450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1066f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius U_ASSERT(FALSE); 1067fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 106850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case START_LINE: 106950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 107050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 107150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos == fAnchorStart) { 1072f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MatchChunkAt(startPos, FALSE, status); 1073f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 107450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 107550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 107650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch) { 107750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 107850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 107950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_FWD_1(inputBuf, startPos, fActiveLimit); 108050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1081fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 108250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPattern->fFlags & UREGEX_UNIX_LINES) { 108350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 108450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = inputBuf[startPos-1]; 108550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0a) { 1086f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MatchChunkAt(startPos, FALSE, status); 1087f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 108850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 108950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 109050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch) { 109150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 109250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 109350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 109450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= testLen) { 109550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 109650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 109750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 109850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 109950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_FWD_1(inputBuf, startPos, fActiveLimit); 110050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Note that it's perfectly OK for a pattern to have a zero-length 110150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // match at the end of a string, so we must make sure that the loop 110250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // runs with startPos == testLen the last time through. 1103f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (findProgressInterrupt(startPos, status)) 110427f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 110550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 110650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 110750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 110850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = inputBuf[startPos-1]; 11091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (isLineTerminator(c)) { 111050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) { 111150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos++; 111250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1113f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MatchChunkAt(startPos, FALSE, status); 1114f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 111550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 111650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 111750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch) { 111850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 111950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 112050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 112150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= testLen) { 112250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 112350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 112450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 112550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 112650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_FWD_1(inputBuf, startPos, fActiveLimit); 112750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Note that it's perfectly OK for a pattern to have a zero-length 112850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // match at the end of a string, so we must make sure that the loop 112950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // runs with startPos == testLen the last time through. 1130f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (findProgressInterrupt(startPos, status)) 113127f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 113250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 113350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 113650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho default: 113750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(FALSE); 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 114050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(FALSE); 114150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 114850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// group() 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 115150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::group(UErrorCode &status) const { 115250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return group(0, status); 115350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 115450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 115527f654740f2a26ad62a5c155af9199af9e69b889claireho// Return immutable shallow clone 115627f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::group(UText *dest, int64_t &group_len, UErrorCode &status) const { 115727f654740f2a26ad62a5c155af9199af9e69b889claireho return group(0, dest, group_len, status); 115850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 115950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 116027f654740f2a26ad62a5c155af9199af9e69b889claireho// Return immutable shallow clone 116127f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const { 116227f654740f2a26ad62a5c155af9199af9e69b889claireho group_len = 0; 116327f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 116427f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 116527f654740f2a26ad62a5c155af9199af9e69b889claireho } 116627f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(fDeferredStatus)) { 116727f654740f2a26ad62a5c155af9199af9e69b889claireho status = fDeferredStatus; 1168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if (fMatch == FALSE) { 116927f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_REGEX_INVALID_STATE; 1170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) { 117127f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 117227f654740f2a26ad62a5c155af9199af9e69b889claireho } 1173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(status)) { 1175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return dest; 117627f654740f2a26ad62a5c155af9199af9e69b889claireho } 1177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 117827f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t s, e; 117927f654740f2a26ad62a5c155af9199af9e69b889claireho if (groupNum == 0) { 118027f654740f2a26ad62a5c155af9199af9e69b889claireho s = fMatchStart; 118127f654740f2a26ad62a5c155af9199af9e69b889claireho e = fMatchEnd; 118227f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 118327f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1); 118427f654740f2a26ad62a5c155af9199af9e69b889claireho U_ASSERT(groupOffset < fPattern->fFrameSize); 118527f654740f2a26ad62a5c155af9199af9e69b889claireho U_ASSERT(groupOffset >= 0); 118627f654740f2a26ad62a5c155af9199af9e69b889claireho s = fFrame->fExtra[groupOffset]; 118727f654740f2a26ad62a5c155af9199af9e69b889claireho e = fFrame->fExtra[groupOffset+1]; 118827f654740f2a26ad62a5c155af9199af9e69b889claireho } 118950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 119027f654740f2a26ad62a5c155af9199af9e69b889claireho if (s < 0) { 119127f654740f2a26ad62a5c155af9199af9e69b889claireho // A capture group wasn't part of the match 119227f654740f2a26ad62a5c155af9199af9e69b889claireho return utext_clone(dest, fInputText, FALSE, TRUE, &status); 119327f654740f2a26ad62a5c155af9199af9e69b889claireho } 119427f654740f2a26ad62a5c155af9199af9e69b889claireho U_ASSERT(s <= e); 119527f654740f2a26ad62a5c155af9199af9e69b889claireho group_len = e - s; 1196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 119727f654740f2a26ad62a5c155af9199af9e69b889claireho dest = utext_clone(dest, fInputText, FALSE, TRUE, &status); 119827f654740f2a26ad62a5c155af9199af9e69b889claireho if (dest) 119927f654740f2a26ad62a5c155af9199af9e69b889claireho UTEXT_SETNATIVEINDEX(dest, s); 120027f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 120127f654740f2a26ad62a5c155af9199af9e69b889claireho} 120250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 120350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const { 120450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString result; 12051b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int64_t groupStart = start64(groupNum, status); 12061b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int64_t groupEnd = end64(groupNum, status); 12071b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(status) || groupStart == -1 || groupStart == groupEnd) { 120827f654740f2a26ad62a5c155af9199af9e69b889claireho return result; 120927f654740f2a26ad62a5c155af9199af9e69b889claireho } 1210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 12111b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Get the group length using a utext_extract preflight. 12121b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // UText is actually pretty efficient at this when underlying encoding is UTF-16. 12131b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t length = utext_extract(fInputText, groupStart, groupEnd, NULL, 0, &status); 12141b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (status != U_BUFFER_OVERFLOW_ERROR) { 12151b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return result; 121650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 12181b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert status = U_ZERO_ERROR; 12191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UChar *buf = result.getBuffer(length); 12201b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (buf == NULL) { 12211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert status = U_MEMORY_ALLOCATION_ERROR; 122250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 12231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t extractLength = utext_extract(fInputText, groupStart, groupEnd, buf, length, &status); 12241b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert result.releaseBuffer(extractLength); 12251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert U_ASSERT(length == extractLength); 122650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 12271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return result; 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 12301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 123150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 123250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 123350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// appendGroup() -- currently internal only, appends a group to a UText rather 123450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// than replacing its contents 123550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 123650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 123750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 123850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint64_t RegexMatcher::appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const { 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 124027f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = fDeferredStatus; 124427f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 124627f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t destLen = utext_nativeLength(dest); 1247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 124850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch == FALSE) { 124950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_INVALID_STATE; 125050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return utext_replace(dest, destLen, destLen, NULL, 0, &status); 125150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 125250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) { 1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_INDEX_OUTOFBOUNDS_ERROR; 125450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return utext_replace(dest, destLen, destLen, NULL, 0, &status); 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 125750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t s, e; 125850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (groupNum == 0) { 125950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho s = fMatchStart; 126050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho e = fMatchEnd; 126150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 126250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1); 126350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(groupOffset < fPattern->fFrameSize); 126450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(groupOffset >= 0); 126550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho s = fFrame->fExtra[groupOffset]; 126650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho e = fFrame->fExtra[groupOffset+1]; 126750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 126950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s < 0) { 1270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // A capture group wasn't part of the match 127150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return utext_replace(dest, destLen, destLen, NULL, 0, &status); 127250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 127350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(s <= e); 1274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 127550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t deltaLen; 127650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 127750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(e <= fInputLength); 127850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho deltaLen = utext_replace(dest, destLen, destLen, fInputText->chunkContents+s, (int32_t)(e-s), &status); 127950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 128050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len16; 128150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_USES_U16(fInputText)) { 128250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = (int32_t)(e-s); 128350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 128450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 128550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = utext_extract(fInputText, s, e, NULL, 0, &lengthStatus); 128650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 128750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *groupChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1)); 128827f654740f2a26ad62a5c155af9199af9e69b889claireho if (groupChars == NULL) { 128927f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_MEMORY_ALLOCATION_ERROR; 129027f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 129127f654740f2a26ad62a5c155af9199af9e69b889claireho } 129250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(fInputText, s, e, groupChars, len16+1, &status); 1293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 129450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho deltaLen = utext_replace(dest, destLen, destLen, groupChars, len16, &status); 129550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(groupChars); 129650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 129750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return deltaLen; 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 130450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// groupCount() 1305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 130750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::groupCount() const { 130850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fPattern->fGroupMap->size(); 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 131350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// hasAnchoringBounds() 1314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 131650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::hasAnchoringBounds() const { 131750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fAnchoringBounds; 1318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1319c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 132350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// hasTransparentBounds() 1324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 132650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::hasTransparentBounds() const { 132750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fTransparentBounds; 1328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 133150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 133450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// hitEnd() 1335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 133750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::hitEnd() const { 133850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fHitEnd; 1339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 134450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// input() 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 134750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UnicodeString &RegexMatcher::input() const { 134850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!fInput) { 134950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 135050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len16; 135150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_USES_U16(fInputText)) { 135250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = (int32_t)fInputLength; 135350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 135450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = utext_extract(fInputText, 0, fInputLength, NULL, 0, &status); 135550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; // overflow, length status 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 135750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString *result = new UnicodeString(len16, 0, 0); 1358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 135950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *inputChars = result->getBuffer(len16); 136050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(fInputText, 0, fInputLength, inputChars, len16, &status); // unterminated warning 136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result->releaseBuffer(len16); 1362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 136350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*(const UnicodeString **)&fInput) = result; // pointer assignment, rather than operator= 136450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 136650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *fInput; 136750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 136850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 136950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 137050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 137150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// inputText() 137250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 137350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 137450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUText *RegexMatcher::inputText() const { 137550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fInputText; 137650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 137750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 137850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 137950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 138050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 138150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// getInput() -- like inputText(), but makes a clone or copies into another UText 138250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 138350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 138427f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::getInput (UText *dest, UErrorCode &status) const { 138527f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 138627f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 138727f654740f2a26ad62a5c155af9199af9e69b889claireho } 138827f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(fDeferredStatus)) { 138927f654740f2a26ad62a5c155af9199af9e69b889claireho status = fDeferredStatus; 1390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return dest; 139127f654740f2a26ad62a5c155af9199af9e69b889claireho } 1392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 139350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest) { 139450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 139550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest, 0, utext_nativeLength(dest), fInputText->chunkContents, (int32_t)fInputLength, &status); 139650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 139750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t input16Len; 139850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_USES_U16(fInputText)) { 139950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho input16Len = (int32_t)fInputLength; 140050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 140150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 140250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho input16Len = utext_extract(fInputText, 0, fInputLength, NULL, 0, &lengthStatus); // buffer overflow error 140350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 140450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(input16Len)); 140527f654740f2a26ad62a5c155af9199af9e69b889claireho if (inputChars == NULL) { 140627f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 140727f654740f2a26ad62a5c155af9199af9e69b889claireho } 1408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 140950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 141050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(fInputText, 0, fInputLength, inputChars, input16Len, &status); // not terminated warning 141150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 141250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest, 0, utext_nativeLength(dest), inputChars, input16Len, &status); 1413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 141450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(inputChars); 141550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 141650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 141750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 141850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return utext_clone(NULL, fInputText, FALSE, TRUE, &status); 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 142350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool compat_SyncMutableUTextContents(UText *ut); 142450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool compat_SyncMutableUTextContents(UText *ut) { 142550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool retVal = FALSE; 1426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 142750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // In the following test, we're really only interested in whether the UText should switch 142850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // between heap and stack allocation. If length hasn't changed, we won't, so the chunkContents 142950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // will still point to the correct data. 143050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (utext_nativeLength(ut) != ut->nativeIndexingLimit) { 143150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString *us=(UnicodeString *)ut->context; 1432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 143350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Update to the latest length. 143450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // For example, (utext_nativeLength(ut) != ut->nativeIndexingLimit). 143550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newLength = us->length(); 1436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 143750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Update the chunk description. 143850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The buffer may have switched between stack- and heap-based. 143950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ut->chunkContents = us->getBuffer(); 144050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ut->chunkLength = newLength; 144150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ut->chunkNativeLimit = newLength; 144250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ut->nativeIndexingLimit = newLength; 144350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retVal = TRUE; 144450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 144650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return retVal; 144750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 145150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// lookingAt() 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 145450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::lookingAt(UErrorCode &status) { 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 145650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = fDeferredStatus; 146050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 146150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 146350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInputUniStrMaybeMutable) { 146450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (compat_SyncMutableUTextContents(fInputText)) { 146550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputLength = utext_nativeLength(fInputText); 146650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 146750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 146850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 146950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else { 147050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resetPreserveRegion(); 147150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 147250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 147350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt((int32_t)fActiveStart, FALSE, status); 147450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 147550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchAt(fActiveStart, FALSE, status); 1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 147750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fMatch; 147850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 147950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 148127f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool RegexMatcher::lookingAt(int64_t start, UErrorCode &status) { 148250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 148350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 148450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 148550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 148650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = fDeferredStatus; 148750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 148850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reset(); 1490fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 149150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (start < 0) { 149250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 149350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 149450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 149650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInputUniStrMaybeMutable) { 149750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (compat_SyncMutableUTextContents(fInputText)) { 149850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputLength = utext_nativeLength(fInputText); 149950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 150050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 150350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t nativeStart; 150427f654740f2a26ad62a5c155af9199af9e69b889claireho nativeStart = start; 150527f654740f2a26ad62a5c155af9199af9e69b889claireho if (nativeStart < fActiveStart || nativeStart > fActiveLimit) { 150650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 150750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 150850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1509fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 151050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 151150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt((int32_t)nativeStart, FALSE, status); 151250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 151350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchAt(nativeStart, FALSE, status); 151450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 151550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fMatch; 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 152250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// matches() 1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 152550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::matches(UErrorCode &status) { 152650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 152750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 152850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 152950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 153050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = fDeferredStatus; 153150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 153250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 153450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInputUniStrMaybeMutable) { 153550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (compat_SyncMutableUTextContents(fInputText)) { 153650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputLength = utext_nativeLength(fInputText); 153750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 153850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 153950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 154050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else { 154150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resetPreserveRegion(); 154250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 154450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 154550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt((int32_t)fActiveStart, TRUE, status); 154650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 154750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchAt(fActiveStart, TRUE, status); 154850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 154950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fMatch; 1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 155327f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool RegexMatcher::matches(int64_t start, UErrorCode &status) { 155450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 155550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 155650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 155750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 155850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = fDeferredStatus; 155950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 156050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reset(); 1562fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 156350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (start < 0) { 156450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 156550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 1566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 156850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInputUniStrMaybeMutable) { 156950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (compat_SyncMutableUTextContents(fInputText)) { 157050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputLength = utext_nativeLength(fInputText); 157150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 157250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 157350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 157550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t nativeStart; 157627f654740f2a26ad62a5c155af9199af9e69b889claireho nativeStart = start; 157727f654740f2a26ad62a5c155af9199af9e69b889claireho if (nativeStart < fActiveStart || nativeStart > fActiveLimit) { 1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_INDEX_OUTOFBOUNDS_ERROR; 157950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 1580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 158250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 158350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt((int32_t)nativeStart, TRUE, status); 158450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 158550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchAt(nativeStart, TRUE, status); 158650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 158750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fMatch; 158850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 159450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// pattern 1595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 159750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst RegexPattern &RegexMatcher::pattern() const { 159850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *fPattern; 1599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 160350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 160550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// region 1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 160750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 160827f654740f2a26ad62a5c155af9199af9e69b889clairehoRegexMatcher &RegexMatcher::region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status) { 1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 161050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1612fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 161327f654740f2a26ad62a5c155af9199af9e69b889claireho if (regionStart>regionLimit || regionStart<0 || regionLimit<0) { 161450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ILLEGAL_ARGUMENT_ERROR; 1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1616fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 161727f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t nativeStart = regionStart; 161827f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t nativeLimit = regionLimit; 161927f654740f2a26ad62a5c155af9199af9e69b889claireho if (nativeStart > fInputLength || nativeLimit > fInputLength) { 162027f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ILLEGAL_ARGUMENT_ERROR; 1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 162250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 162327f654740f2a26ad62a5c155af9199af9e69b889claireho if (startIndex == -1) 162427f654740f2a26ad62a5c155af9199af9e69b889claireho this->reset(); 162527f654740f2a26ad62a5c155af9199af9e69b889claireho else 1626fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius resetPreserveRegion(); 1627fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 162850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRegionStart = nativeStart; 162950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRegionLimit = nativeLimit; 163050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveStart = nativeStart; 163150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = nativeLimit; 163227f654740f2a26ad62a5c155af9199af9e69b889claireho 163327f654740f2a26ad62a5c155af9199af9e69b889claireho if (startIndex != -1) { 163427f654740f2a26ad62a5c155af9199af9e69b889claireho if (startIndex < fActiveStart || startIndex > fActiveLimit) { 163527f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 163627f654740f2a26ad62a5c155af9199af9e69b889claireho } 1637fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fMatchEnd = startIndex; 163827f654740f2a26ad62a5c155af9199af9e69b889claireho } 163927f654740f2a26ad62a5c155af9199af9e69b889claireho 164050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!fTransparentBounds) { 164150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLookStart = nativeStart; 164250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLookLimit = nativeLimit; 164350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 164450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fAnchoringBounds) { 164550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAnchorStart = nativeStart; 164650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAnchorLimit = nativeLimit; 164750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 164850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 165127f654740f2a26ad62a5c155af9199af9e69b889clairehoRegexMatcher &RegexMatcher::region(int64_t start, int64_t limit, UErrorCode &status) { 165227f654740f2a26ad62a5c155af9199af9e69b889claireho return region(start, limit, -1, status); 165327f654740f2a26ad62a5c155af9199af9e69b889claireho} 1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 165750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// regionEnd 1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 166050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::regionEnd() const { 166127f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)fRegionLimit; 1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 166427f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::regionEnd64() const { 166527f654740f2a26ad62a5c155af9199af9e69b889claireho return fRegionLimit; 166627f654740f2a26ad62a5c155af9199af9e69b889claireho} 1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 167050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// regionStart 1671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 167350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::regionStart() const { 167427f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)fRegionStart; 167527f654740f2a26ad62a5c155af9199af9e69b889claireho} 167627f654740f2a26ad62a5c155af9199af9e69b889claireho 167727f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::regionStart64() const { 167827f654740f2a26ad62a5c155af9199af9e69b889claireho return fRegionStart; 1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 168450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// replaceAll 1685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 168750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::replaceAll(const UnicodeString &replacement, UErrorCode &status) { 168850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText replacementText = UTEXT_INITIALIZER; 168950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText resultText = UTEXT_INITIALIZER; 169050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString resultString; 169127f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 169227f654740f2a26ad62a5c155af9199af9e69b889claireho return resultString; 169327f654740f2a26ad62a5c155af9199af9e69b889claireho } 1694fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 169550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openConstUnicodeString(&replacementText, &replacement, &status); 169650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &resultString, &status); 1697fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 169850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho replaceAll(&replacementText, &resultText, status); 1699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 170050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&resultText); 170150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&replacementText); 1702fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 170350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return resultString; 1704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 170650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 170850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// replaceAll, UText mode 1709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 171050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUText *RegexMatcher::replaceAll(UText *replacement, UText *dest, UErrorCode &status) { 1711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 171227f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 1713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 1715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = fDeferredStatus; 171627f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 1717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1718fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 171950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest == NULL) { 172050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString emptyString; 172150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText empty = UTEXT_INITIALIZER; 1722fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 172350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&empty, &emptyString, &status); 172450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest = utext_clone(NULL, &empty, TRUE, FALSE, &status); 172550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&empty); 1726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 172750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 172850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 172950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 173050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (find()) { 173150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho appendReplacement(dest, replacement, status); 173250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 173350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 173450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 173550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 173627f654740f2a26ad62a5c155af9199af9e69b889claireho appendTail(dest, status); 173750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1738fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 173950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 1740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 174550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// replaceFirst 1746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 174850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::replaceFirst(const UnicodeString &replacement, UErrorCode &status) { 174950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText replacementText = UTEXT_INITIALIZER; 175050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText resultText = UTEXT_INITIALIZER; 175150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString resultString; 1752fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 175350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openConstUnicodeString(&replacementText, &replacement, &status); 175450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &resultString, &status); 1755fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 175650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho replaceFirst(&replacementText, &resultText, status); 1757fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 175850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&resultText); 175950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&replacementText); 1760fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 176150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return resultString; 1762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 176550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// replaceFirst, UText mode 1766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 176750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUText *RegexMatcher::replaceFirst(UText *replacement, UText *dest, UErrorCode &status) { 1768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 176927f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 1770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 1772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = fDeferredStatus; 177327f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 1774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 177550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru reset(); 177750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!find()) { 177827f654740f2a26ad62a5c155af9199af9e69b889claireho return getInput(dest, status); 177950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1780fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 178150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest == NULL) { 178250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString emptyString; 178350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText empty = UTEXT_INITIALIZER; 1784fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 178550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&empty, &emptyString, &status); 178650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest = utext_clone(NULL, &empty, TRUE, FALSE, &status); 178750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&empty); 1788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1789fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 179050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho appendReplacement(dest, replacement, status); 179127f654740f2a26ad62a5c155af9199af9e69b889claireho appendTail(dest, status); 1792fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 179350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 1794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 179950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// requireEnd 1800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 180250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::requireEnd() const { 180350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fRequireEnd; 1804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 180950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// reset 1810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 181250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::reset() { 181350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRegionStart = 0; 181450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRegionLimit = fInputLength; 181550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveStart = 0; 181650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = fInputLength; 181750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAnchorStart = 0; 181850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAnchorLimit = fInputLength; 181950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLookStart = 0; 182050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLookLimit = fInputLength; 182150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resetPreserveRegion(); 182250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 182350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 182450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 182550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 182650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 182750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::resetPreserveRegion() { 182850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatchStart = 0; 182950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatchEnd = 0; 183050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLastMatchEnd = -1; 183150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAppendPosition = 0; 183250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 183350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = FALSE; 183450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = FALSE; 183550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTime = 0; 183650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTickCounter = TIMER_INITIAL_VALUE; 183750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho //resetStack(); // more expensive than it looks... 183850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 183950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 184050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 184150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::reset(const UnicodeString &input) { 184250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputText = utext_openConstUnicodeString(fInputText, &input, &fDeferredStatus); 184327f654740f2a26ad62a5c155af9199af9e69b889claireho if (fPattern->fNeedsAltInput) { 184427f654740f2a26ad62a5c155af9199af9e69b889claireho fAltInputText = utext_clone(fAltInputText, fInputText, FALSE, TRUE, &fDeferredStatus); 184527f654740f2a26ad62a5c155af9199af9e69b889claireho } 18461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(fDeferredStatus)) { 18471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return *this; 18481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 184950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputLength = utext_nativeLength(fInputText); 1850fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 185150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 185250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete fInput; 185350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInput = NULL; 185450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 185550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Do the following for any UnicodeString. 185650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This is for compatibility for those clients who modify the input string "live" during regex operations. 1857fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fInputUniStrMaybeMutable = TRUE; 1858fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 185950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fWordBreakItr != NULL) { 186050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==0 186150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 186250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fWordBreakItr->setText(fInputText, status); 186350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 186450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 186550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 186650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 186750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 186850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 186950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::reset(UText *input) { 187050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInputText != input) { 187150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputText = utext_clone(fInputText, input, FALSE, TRUE, &fDeferredStatus); 187250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPattern->fNeedsAltInput) fAltInputText = utext_clone(fAltInputText, fInputText, FALSE, TRUE, &fDeferredStatus); 18731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(fDeferredStatus)) { 18741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return *this; 18751b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 187650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputLength = utext_nativeLength(fInputText); 1877fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 187850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete fInput; 187950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInput = NULL; 1880fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 188150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fWordBreakItr != NULL) { 188250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==0 188350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 188450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fWordBreakItr->setText(input, status); 188550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 188650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 188750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 188850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 188950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputUniStrMaybeMutable = FALSE; 189050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 189150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 189250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 189350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 189450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*RegexMatcher &RegexMatcher::reset(const UChar *) { 189550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fDeferredStatus = U_INTERNAL_PROGRAM_ERROR; 189650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 189750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}*/ 189850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 189927f654740f2a26ad62a5c155af9199af9e69b889clairehoRegexMatcher &RegexMatcher::reset(int64_t position, UErrorCode &status) { 190050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 190150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 190250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 190350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); // Reset also resets the region to be the entire string. 1904fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 190527f654740f2a26ad62a5c155af9199af9e69b889claireho if (position < 0 || position > fActiveLimit) { 190650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 190750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 190850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 190927f654740f2a26ad62a5c155af9199af9e69b889claireho fMatchEnd = position; 191050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 1911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 191327f654740f2a26ad62a5c155af9199af9e69b889claireho 19142e615e9896b12236afe0ff2695e8afc2ee73f961claireho//-------------------------------------------------------------------------------- 19152e615e9896b12236afe0ff2695e8afc2ee73f961claireho// 19162e615e9896b12236afe0ff2695e8afc2ee73f961claireho// refresh 19172e615e9896b12236afe0ff2695e8afc2ee73f961claireho// 19182e615e9896b12236afe0ff2695e8afc2ee73f961claireho//-------------------------------------------------------------------------------- 19192e615e9896b12236afe0ff2695e8afc2ee73f961clairehoRegexMatcher &RegexMatcher::refreshInputText(UText *input, UErrorCode &status) { 19202e615e9896b12236afe0ff2695e8afc2ee73f961claireho if (U_FAILURE(status)) { 19212e615e9896b12236afe0ff2695e8afc2ee73f961claireho return *this; 19222e615e9896b12236afe0ff2695e8afc2ee73f961claireho } 19232e615e9896b12236afe0ff2695e8afc2ee73f961claireho if (input == NULL) { 19242e615e9896b12236afe0ff2695e8afc2ee73f961claireho status = U_ILLEGAL_ARGUMENT_ERROR; 19252e615e9896b12236afe0ff2695e8afc2ee73f961claireho return *this; 19262e615e9896b12236afe0ff2695e8afc2ee73f961claireho } 19272e615e9896b12236afe0ff2695e8afc2ee73f961claireho if (utext_nativeLength(fInputText) != utext_nativeLength(input)) { 19282e615e9896b12236afe0ff2695e8afc2ee73f961claireho status = U_ILLEGAL_ARGUMENT_ERROR; 19292e615e9896b12236afe0ff2695e8afc2ee73f961claireho return *this; 19302e615e9896b12236afe0ff2695e8afc2ee73f961claireho } 19312e615e9896b12236afe0ff2695e8afc2ee73f961claireho int64_t pos = utext_getNativeIndex(fInputText); 19322e615e9896b12236afe0ff2695e8afc2ee73f961claireho // Shallow read-only clone of the new UText into the existing input UText 19332e615e9896b12236afe0ff2695e8afc2ee73f961claireho fInputText = utext_clone(fInputText, input, FALSE, TRUE, &status); 19342e615e9896b12236afe0ff2695e8afc2ee73f961claireho if (U_FAILURE(status)) { 19352e615e9896b12236afe0ff2695e8afc2ee73f961claireho return *this; 19362e615e9896b12236afe0ff2695e8afc2ee73f961claireho } 19372e615e9896b12236afe0ff2695e8afc2ee73f961claireho utext_setNativeIndex(fInputText, pos); 1938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 19392e615e9896b12236afe0ff2695e8afc2ee73f961claireho if (fAltInputText != NULL) { 19402e615e9896b12236afe0ff2695e8afc2ee73f961claireho pos = utext_getNativeIndex(fAltInputText); 19412e615e9896b12236afe0ff2695e8afc2ee73f961claireho fAltInputText = utext_clone(fAltInputText, input, FALSE, TRUE, &status); 19422e615e9896b12236afe0ff2695e8afc2ee73f961claireho if (U_FAILURE(status)) { 19432e615e9896b12236afe0ff2695e8afc2ee73f961claireho return *this; 19442e615e9896b12236afe0ff2695e8afc2ee73f961claireho } 19452e615e9896b12236afe0ff2695e8afc2ee73f961claireho utext_setNativeIndex(fAltInputText, pos); 19462e615e9896b12236afe0ff2695e8afc2ee73f961claireho } 19472e615e9896b12236afe0ff2695e8afc2ee73f961claireho return *this; 19482e615e9896b12236afe0ff2695e8afc2ee73f961claireho} 1949b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 195050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 195150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 195450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// setTrace 1955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 195750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setTrace(UBool state) { 195850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTraceDebug = state; 1959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 196250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 19631b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert/** 19641b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * UText, replace entire contents of the destination UText with a substring of the source UText. 19651b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * 19661b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * @param src The source UText 19671b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * @param dest The destination UText. Must be writable. 19681b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * May be NULL, in which case a new UText will be allocated. 19691b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * @param start Start index of source substring. 19701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * @param limit Limit index of source substring. 19711b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert * @param status An error code. 19721b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert */ 19731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertstatic UText *utext_extract_replace(UText *src, UText *dest, int64_t start, int64_t limit, UErrorCode *status) { 19741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(*status)) { 19751b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return dest; 19761b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 19771b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (start == limit) { 19781b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (dest) { 19791b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, status); 19801b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return dest; 19811b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else { 19821b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return utext_openUChars(NULL, NULL, 0, status); 19831b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 19841b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 19851b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t length = utext_extract(src, start, limit, NULL, 0, status); 19861b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (*status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(*status)) { 19871b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return dest; 19881b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 19891b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert *status = U_ZERO_ERROR; 19901b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert MaybeStackArray<UChar, 40> buffer; 19911b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (length >= buffer.getCapacity()) { 19921b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UChar *newBuf = buffer.resize(length+1); // Leave space for terminating Nul. 19931b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (newBuf == NULL) { 19941b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert *status = U_MEMORY_ALLOCATION_ERROR; 19951b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 19961b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 19971b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert utext_extract(src, start, limit, buffer.getAlias(), length+1, status); 19981b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (dest) { 19991b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert utext_replace(dest, 0, utext_nativeLength(dest), buffer.getAlias(), length, status); 20001b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return dest; 20011b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 20021b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 20031b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Caller did not provide a prexisting UText. 20041b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Open a new one, and have it adopt the text buffer storage. 20051b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(*status)) { 20061b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return NULL; 20071b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 20081b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t ownedLength = 0; 20091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UChar *ownedBuf = buffer.orphanOrClone(length+1, ownedLength); 20101b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (ownedBuf == NULL) { 20111b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert *status = U_MEMORY_ALLOCATION_ERROR; 20121b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return NULL; 20131b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 20141b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UText *result = utext_openUChars(NULL, ownedBuf, length, status); 20151b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (U_FAILURE(*status)) { 20161b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert uprv_free(ownedBuf); 20171b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return NULL; 20181b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 20191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert result->providerProperties |= (1 << UTEXT_PROVIDER_OWNS_TEXT); 20201b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert return result; 20211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert} 20221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 20231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 202450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------- 2025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 202650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// split 2027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 202850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------- 202950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::split(const UnicodeString &input, 203050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString dest[], 203150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 203250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) 203350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 203450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText inputText = UTEXT_INITIALIZER; 203550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openConstUnicodeString(&inputText, &input, &status); 203627f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 203727f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 203827f654740f2a26ad62a5c155af9199af9e69b889claireho } 203950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 204027f654740f2a26ad62a5c155af9199af9e69b889claireho UText **destText = (UText **)uprv_malloc(sizeof(UText*)*destCapacity); 204127f654740f2a26ad62a5c155af9199af9e69b889claireho if (destText == NULL) { 204227f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_MEMORY_ALLOCATION_ERROR; 204327f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 204427f654740f2a26ad62a5c155af9199af9e69b889claireho } 204550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 204650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i = 0; i < destCapacity; i++) { 204750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destText[i] = utext_openUnicodeString(NULL, &dest[i], &status); 204850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2049fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 205050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t fieldCount = split(&inputText, destText, destCapacity, status); 2051fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 205250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i = 0; i < destCapacity; i++) { 205350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(destText[i]); 205450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 205627f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(destText); 205750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&inputText); 205850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fieldCount; 205950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 2060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 206250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// split, UText mode 206350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 206450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::split(UText *input, 206550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *dest[], 206650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 206750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) 206850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 206950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 207050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check arguements for validity 207150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 207250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 207350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 207450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }; 207550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 207650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (destCapacity < 1) { 207750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ILLEGAL_ARGUMENT_ERROR; 207850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 207950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 208050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 208150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 208250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Reset for the input text 208350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 208450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(input); 208550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t nextOutputStringStart = 0; 208650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fActiveLimit == 0) { 208750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 208850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 208950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 209050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 209150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Loop through the input text, searching for the delimiter pattern 209250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 209350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 209450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t numCaptureGroups = fPattern->fGroupMap->size(); 209550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; ; i++) { 209650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (i>=destCapacity-1) { 209750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There is one or zero output string left. 209850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fill the last output string with whatever is left from the input, then exit the loop. 209950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // ( i will be == destCapacity if we filled the output array while processing 210050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // capture groups of the delimiter expression, in which case we will discard the 210150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // last capture group saved in favor of the unprocessed remainder of the 210250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // input string.) 210350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho i = destCapacity-1; 210450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fActiveLimit > nextOutputStringStart) { 210550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) { 210650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest[i]) { 2107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius utext_replace(dest[i], 0, utext_nativeLength(dest[i]), 2108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius input->chunkContents+nextOutputStringStart, 210927f654740f2a26ad62a5c155af9199af9e69b889claireho (int32_t)(fActiveLimit-nextOutputStringStart), &status); 211050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 211150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText remainingText = UTEXT_INITIALIZER; 2112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart, 211327f654740f2a26ad62a5c155af9199af9e69b889claireho fActiveLimit-nextOutputStringStart, &status); 211450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 211550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&remainingText); 211650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 211750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 211850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 2119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t remaining16Length = 212027f654740f2a26ad62a5c155af9199af9e69b889claireho utext_extract(input, nextOutputStringStart, fActiveLimit, NULL, 0, &lengthStatus); 212150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1)); 212227f654740f2a26ad62a5c155af9199af9e69b889claireho if (remainingChars == NULL) { 212327f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_MEMORY_ALLOCATION_ERROR; 212427f654740f2a26ad62a5c155af9199af9e69b889claireho break; 212527f654740f2a26ad62a5c155af9199af9e69b889claireho } 212627f654740f2a26ad62a5c155af9199af9e69b889claireho 212750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(input, nextOutputStringStart, fActiveLimit, remainingChars, remaining16Length+1, &status); 212850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest[i]) { 212950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status); 213050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 213150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText remainingText = UTEXT_INITIALIZER; 213250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&remainingText, remainingChars, remaining16Length, &status); 213350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 213450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&remainingText); 213550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 213750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(remainingChars); 213850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 213950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 214050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 214150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 214250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (find()) { 214350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We found another delimiter. Move everything from where we started looking 214450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // up until the start of the delimiter into the next output string. 214550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) { 214650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest[i]) { 2147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius utext_replace(dest[i], 0, utext_nativeLength(dest[i]), 2148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius input->chunkContents+nextOutputStringStart, 214927f654740f2a26ad62a5c155af9199af9e69b889claireho (int32_t)(fMatchStart-nextOutputStringStart), &status); 215050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 215150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText remainingText = UTEXT_INITIALIZER; 2152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart, 215327f654740f2a26ad62a5c155af9199af9e69b889claireho fMatchStart-nextOutputStringStart, &status); 215450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 215550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&remainingText); 215650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 215750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 215850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 215950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t remaining16Length = utext_extract(input, nextOutputStringStart, fMatchStart, NULL, 0, &lengthStatus); 216050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1)); 216127f654740f2a26ad62a5c155af9199af9e69b889claireho if (remainingChars == NULL) { 216227f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_MEMORY_ALLOCATION_ERROR; 216327f654740f2a26ad62a5c155af9199af9e69b889claireho break; 216427f654740f2a26ad62a5c155af9199af9e69b889claireho } 216550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(input, nextOutputStringStart, fMatchStart, remainingChars, remaining16Length+1, &status); 216650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest[i]) { 216750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status); 216850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 216950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText remainingText = UTEXT_INITIALIZER; 217050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&remainingText, remainingChars, remaining16Length, &status); 217150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 217250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&remainingText); 217350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 217550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(remainingChars); 217650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 217750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho nextOutputStringStart = fMatchEnd; 217850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 217950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If the delimiter pattern has capturing parentheses, the captured 218050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // text goes out into the next n destination strings. 218150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t groupNum; 218250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) { 2183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (i >= destCapacity-2) { 2184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Never fill the last available output string with capture group text. 2185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // It will filled with the last field, the remainder of the 2186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // unsplit input text. 218750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 218850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 218950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho i++; 21901b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert dest[i] = utext_extract_replace(fInputText, dest[i], 21911b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert start64(groupNum, status), end64(groupNum, status), &status); 219250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 219350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 219450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (nextOutputStringStart == fActiveLimit) { 2195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // The delimiter was at the end of the string. We're done, but first 2196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // we output one last empty string, for the empty field following 2197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // the delimiter at the end of input. 2198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (i+1 < destCapacity) { 2199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++i; 2200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (dest[i] == NULL) { 2201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho dest[i] = utext_openUChars(NULL, NULL, 0, &status); 2202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 2203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static UChar emptyString[] = {(UChar)0}; 2204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho utext_replace(dest[i], 0, utext_nativeLength(dest[i]), emptyString, 0, &status); 2205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 220650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 2208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 2209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 221050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 221150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else 221250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 221350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We ran off the end of the input while looking for the next delimiter. 221450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // All the remaining text goes into the current output string. 221550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) { 221650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest[i]) { 2217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius utext_replace(dest[i], 0, utext_nativeLength(dest[i]), 2218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius input->chunkContents+nextOutputStringStart, 221927f654740f2a26ad62a5c155af9199af9e69b889claireho (int32_t)(fActiveLimit-nextOutputStringStart), &status); 222050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 222150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText remainingText = UTEXT_INITIALIZER; 2222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart, 222327f654740f2a26ad62a5c155af9199af9e69b889claireho fActiveLimit-nextOutputStringStart, &status); 222450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 222550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&remainingText); 222650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 222750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 222850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 222950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t remaining16Length = utext_extract(input, nextOutputStringStart, fActiveLimit, NULL, 0, &lengthStatus); 223050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1)); 223127f654740f2a26ad62a5c155af9199af9e69b889claireho if (remainingChars == NULL) { 223227f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_MEMORY_ALLOCATION_ERROR; 223327f654740f2a26ad62a5c155af9199af9e69b889claireho break; 223427f654740f2a26ad62a5c155af9199af9e69b889claireho } 2235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 223650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(input, nextOutputStringStart, fActiveLimit, remainingChars, remaining16Length+1, &status); 223750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest[i]) { 223850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status); 223950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 224050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText remainingText = UTEXT_INITIALIZER; 224150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&remainingText, remainingChars, remaining16Length, &status); 224250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 224350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&remainingText); 224450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 224650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(remainingChars); 224750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 224850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 224950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 225027f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 225127f654740f2a26ad62a5c155af9199af9e69b889claireho break; 225227f654740f2a26ad62a5c155af9199af9e69b889claireho } 225327f654740f2a26ad62a5c155af9199af9e69b889claireho } // end of for loop 225450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return i+1; 225550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 225650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 225750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 225850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 225950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 226050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// start 226150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 226250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 226350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::start(UErrorCode &status) const { 226450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return start(0, status); 226550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 226650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 226727f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::start64(UErrorCode &status) const { 226827f654740f2a26ad62a5c155af9199af9e69b889claireho return start64(0, status); 226927f654740f2a26ad62a5c155af9199af9e69b889claireho} 227050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 227150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 227250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 227350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// start(int32_t group, UErrorCode &status) 227450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 227550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 227627f654740f2a26ad62a5c155af9199af9e69b889claireho 227727f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::start64(int32_t group, UErrorCode &status) const { 227850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 227950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return -1; 228050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 228150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 228250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = fDeferredStatus; 228350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return -1; 228450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 228550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch == FALSE) { 228650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_INVALID_STATE; 228750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return -1; 228850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 228950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (group < 0 || group > fPattern->fGroupMap->size()) { 229050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 229150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return -1; 229250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 229350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t s; 229450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (group == 0) { 2295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius s = fMatchStart; 229650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 229750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t groupOffset = fPattern->fGroupMap->elementAti(group-1); 229850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(groupOffset < fPattern->fFrameSize); 229950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(groupOffset >= 0); 230050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho s = fFrame->fExtra[groupOffset]; 230150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 230327f654740f2a26ad62a5c155af9199af9e69b889claireho return s; 230450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 230550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 230650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 230727f654740f2a26ad62a5c155af9199af9e69b889clairehoint32_t RegexMatcher::start(int32_t group, UErrorCode &status) const { 230827f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)start64(group, status); 230927f654740f2a26ad62a5c155af9199af9e69b889claireho} 231050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 231150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 231250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 231350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// useAnchoringBounds 231450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 231550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 231650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::useAnchoringBounds(UBool b) { 231750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAnchoringBounds = b; 231850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAnchorStart = (fAnchoringBounds ? fRegionStart : 0); 231950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAnchorLimit = (fAnchoringBounds ? fRegionLimit : fInputLength); 232050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 232150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 232250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 232350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 232450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 232550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 232650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// useTransparentBounds 232750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 232850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 232950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::useTransparentBounds(UBool b) { 233050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTransparentBounds = b; 233150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLookStart = (fTransparentBounds ? 0 : fRegionStart); 233250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLookLimit = (fTransparentBounds ? fInputLength : fRegionLimit); 233350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 233450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 233550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 233650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 233750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 233850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// setTimeLimit 233950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 234050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 234150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setTimeLimit(int32_t limit, UErrorCode &status) { 234250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 234350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 234450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 234550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 234650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = fDeferredStatus; 234750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 234850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 234950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (limit < 0) { 235050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ILLEGAL_ARGUMENT_ERROR; 235150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 235250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 235350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTimeLimit = limit; 235450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 235550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 235650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 235750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 235850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 235950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// getTimeLimit 236050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 236150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 236250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::getTimeLimit() const { 236350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fTimeLimit; 236450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 236550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 236650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 236750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 236850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 236950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// setStackLimit 237050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 237150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 237250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setStackLimit(int32_t limit, UErrorCode &status) { 237350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 237450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 237550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 237650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 237750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = fDeferredStatus; 237850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 237950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 238050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (limit < 0) { 238150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ILLEGAL_ARGUMENT_ERROR; 238250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 238350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 238550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Reset the matcher. This is needed here in case there is a current match 2386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // whose final stack frame (containing the match results, pointed to by fFrame) 238750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // would be lost by resizing to a smaller stack size. 238850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 2389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 239050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (limit == 0) { 239150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Unlimited stack expansion 239250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStack->setMaxCapacity(0); 239350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 239450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Change the units of the limit from bytes to ints, and bump the size up 2395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // to be big enough to hold at least one stack frame for the pattern, 239650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // if it isn't there already. 239750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t adjustedLimit = limit / sizeof(int32_t); 239850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (adjustedLimit < fPattern->fFrameSize) { 239950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho adjustedLimit = fPattern->fFrameSize; 240050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 240150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStack->setMaxCapacity(adjustedLimit); 240250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 240350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStackLimit = limit; 240450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 240550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 240650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 240750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 240850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 240950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// getStackLimit 241050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 241150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 241250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::getStackLimit() const { 241350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fStackLimit; 241450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 241550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 241650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 241750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 241850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 241950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// setMatchCallback 242050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 242150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 242250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setMatchCallback(URegexMatchCallback *callback, 242350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const void *context, 242450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) { 242550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 242650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 242750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 242850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fCallbackFn = callback; 242950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fCallbackContext = context; 243050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 243150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 243250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 243350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 243450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 243550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// getMatchCallback 243650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 243750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 243850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::getMatchCallback(URegexMatchCallback *&callback, 243950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const void *&context, 244050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) { 244150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 244250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 244350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 244450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho callback = fCallbackFn; 244550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho context = fCallbackContext; 244650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 244750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 244850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 244927f654740f2a26ad62a5c155af9199af9e69b889claireho//-------------------------------------------------------------------------------- 245027f654740f2a26ad62a5c155af9199af9e69b889claireho// 245127f654740f2a26ad62a5c155af9199af9e69b889claireho// setMatchCallback 245227f654740f2a26ad62a5c155af9199af9e69b889claireho// 245327f654740f2a26ad62a5c155af9199af9e69b889claireho//-------------------------------------------------------------------------------- 245427f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexMatcher::setFindProgressCallback(URegexFindProgressCallback *callback, 245527f654740f2a26ad62a5c155af9199af9e69b889claireho const void *context, 245627f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode &status) { 245727f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 245827f654740f2a26ad62a5c155af9199af9e69b889claireho return; 245927f654740f2a26ad62a5c155af9199af9e69b889claireho } 246027f654740f2a26ad62a5c155af9199af9e69b889claireho fFindProgressCallbackFn = callback; 246127f654740f2a26ad62a5c155af9199af9e69b889claireho fFindProgressCallbackContext = context; 246227f654740f2a26ad62a5c155af9199af9e69b889claireho} 246327f654740f2a26ad62a5c155af9199af9e69b889claireho 246427f654740f2a26ad62a5c155af9199af9e69b889claireho 246527f654740f2a26ad62a5c155af9199af9e69b889claireho//-------------------------------------------------------------------------------- 246627f654740f2a26ad62a5c155af9199af9e69b889claireho// 246727f654740f2a26ad62a5c155af9199af9e69b889claireho// getMatchCallback 246827f654740f2a26ad62a5c155af9199af9e69b889claireho// 246927f654740f2a26ad62a5c155af9199af9e69b889claireho//-------------------------------------------------------------------------------- 247027f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexMatcher::getFindProgressCallback(URegexFindProgressCallback *&callback, 247127f654740f2a26ad62a5c155af9199af9e69b889claireho const void *&context, 247227f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode &status) { 247327f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 247427f654740f2a26ad62a5c155af9199af9e69b889claireho return; 247527f654740f2a26ad62a5c155af9199af9e69b889claireho } 247627f654740f2a26ad62a5c155af9199af9e69b889claireho callback = fFindProgressCallbackFn; 247727f654740f2a26ad62a5c155af9199af9e69b889claireho context = fFindProgressCallbackContext; 247827f654740f2a26ad62a5c155af9199af9e69b889claireho} 247927f654740f2a26ad62a5c155af9199af9e69b889claireho 248027f654740f2a26ad62a5c155af9199af9e69b889claireho 248150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//================================================================================ 248250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 248350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Code following this point in this file is the internal 248450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Match Engine Implementation. 248550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 248650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//================================================================================ 248750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 248850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 248950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 249050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 249150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// resetStack 249250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Discard any previous contents of the state save stack, and initialize a 2493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius// new stack frame to all -1. The -1s are needed for capture group limits, 249450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// where they indicate that a group has not yet matched anything. 249550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 249650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoREStackFrame *RegexMatcher::resetStack() { 249750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Discard any previous contents of the state save stack, and initialize a 249850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // new stack frame with all -1 data. The -1s are needed for capture group limits, 249950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // where they indicate that a group has not yet matched anything. 250050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStack->removeAllElements(); 250150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 250250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *iFrame = (REStackFrame *)fStack->reserveBlock(fPattern->fFrameSize, fDeferredStatus); 250350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 250450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<fPattern->fFrameSize-RESTACKFRAME_HDRCOUNT; i++) { 250550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho iFrame->fExtra[i] = -1; 250650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 250750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return iFrame; 250850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 250950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 251050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 251150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 251250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 251350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 2514fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius// isWordBoundary 251550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// in perl, "xab..cd..", \b is true at positions 0,3,5,7 251650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// For us, 251750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// If the current char is a combining mark, 251850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// \b is FALSE. 251950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Else Scan backwards to the first non-combining char. 252050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// We are at a boundary if the this char and the original chars are 252150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// opposite in membership in \w set 252250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 252350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// parameters: pos - the current position in the input buffer 252450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 252550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// TODO: double-check edge cases at region boundaries. 252650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 252750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 252850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::isWordBoundary(int64_t pos) { 252950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool isBoundary = FALSE; 253050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool cIsWord = FALSE; 2531fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 253250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (pos >= fLookLimit) { 253350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 253450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 253550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Determine whether char c at current position is a member of the word set of chars. 253650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we're off the end of the string, behave as though we're not at a word char. 253750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, pos); 253850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_CURRENT32(fInputText); 253950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND) || u_charType(c) == U_FORMAT_CHAR) { 254050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Current char is a combining one. Not a boundary. 254150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 254250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 254350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho cIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(c); 254450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2545fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 254650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Back up until we come to a non-combining char, determine whether 254750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // that char is a word char. 254850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool prevCIsWord = FALSE; 254950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 255050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_GETNATIVEINDEX(fInputText) <= fLookStart) { 255150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 255250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 255350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 prevChar = UTEXT_PREVIOUS32(fInputText); 255450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!(u_hasBinaryProperty(prevChar, UCHAR_GRAPHEME_EXTEND) 255550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho || u_charType(prevChar) == U_FORMAT_CHAR)) { 255650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(prevChar); 255750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 255850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 255950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 256050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isBoundary = cIsWord ^ prevCIsWord; 256150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return isBoundary; 256250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 256350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 256450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::isChunkWordBoundary(int32_t pos) { 256550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool isBoundary = FALSE; 256650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool cIsWord = FALSE; 2567fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 256850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *inputBuf = fInputText->chunkContents; 2569fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 257050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (pos >= fLookLimit) { 257150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 257250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 257350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Determine whether char c at current position is a member of the word set of chars. 257450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we're off the end of the string, behave as though we're not at a word char. 257550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 257650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_GET(inputBuf, fLookStart, pos, fLookLimit, c); 257750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND) || u_charType(c) == U_FORMAT_CHAR) { 257850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Current char is a combining one. Not a boundary. 257950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 258050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 258150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho cIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(c); 258250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2583fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 258450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Back up until we come to a non-combining char, determine whether 258550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // that char is a word char. 258650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool prevCIsWord = FALSE; 258750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 258850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (pos <= fLookStart) { 258950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 259050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 259150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 prevChar; 259250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, fLookStart, pos, prevChar); 259350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!(u_hasBinaryProperty(prevChar, UCHAR_GRAPHEME_EXTEND) 259450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho || u_charType(prevChar) == U_FORMAT_CHAR)) { 259550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(prevChar); 259650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 259750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 259850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 259950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isBoundary = cIsWord ^ prevCIsWord; 260050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return isBoundary; 260150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 260250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 260350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 260450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 2605fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius// isUWordBoundary 260650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 260750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Test for a word boundary using RBBI word break. 260850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 260950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// parameters: pos - the current position in the input buffer 261050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 261150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 261250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::isUWordBoundary(int64_t pos) { 261350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool returnVal = FALSE; 261450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==0 2615fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 261650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we haven't yet created a break iterator for this matcher, do it now. 261750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fWordBreakItr == NULL) { 2618fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fWordBreakItr = 261950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), fDeferredStatus); 262050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 262150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 262250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 262350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fWordBreakItr->setText(fInputText, fDeferredStatus); 262450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 262550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 262650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (pos >= fLookLimit) { 262750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 262850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho returnVal = TRUE; // With Unicode word rules, only positions within the interior of "real" 262950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // words are not boundaries. All non-word chars stand by themselves, 263050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // with word boundaries on both sides. 263150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 263250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!UTEXT_USES_U16(fInputText)) { 263350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // !!!: Would like a better way to do this! 263450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 263550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pos = utext_extract(fInputText, 0, pos, NULL, 0, &status); 263650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 263750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho returnVal = fWordBreakItr->isBoundary((int32_t)pos); 263850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 263950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 264050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return returnVal; 264150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 264250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 264350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 264450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 264550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// IncrementTime This function is called once each TIMER_INITIAL_VALUE state 264650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// saves. Increment the "time" counter, and call the 264750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// user callback function if there is one installed. 264850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 264950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// If the match operation needs to be aborted, either for a time-out 265050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// or because the user callback asked for it, just set an error status. 265150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// The engine will pick that up and stop in its outer loop. 265250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 265350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 265450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::IncrementTime(UErrorCode &status) { 265550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTickCounter = TIMER_INITIAL_VALUE; 265650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTime++; 265750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fCallbackFn != NULL) { 265850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((*fCallbackFn)(fCallbackContext, fTime) == FALSE) { 265950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_STOPPED_BY_CALLER; 266050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 266150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 266250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 266350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fTimeLimit > 0 && fTime >= fTimeLimit) { 266450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_TIME_OUT; 266550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 266650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 266750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 266850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 266950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 267050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// StateSave 267150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Make a new stack frame, initialized as a copy of the current stack frame. 267250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Set the pattern index in the original stack frame from the operand value 267350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// in the opcode. Execution of the engine continues with the state in 267450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// the newly created stack frame 267550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 267650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Note that reserveBlock() may grow the stack, resulting in the 267750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// whole thing being relocated in memory. 267850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 267950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Parameters: 2680fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius// fp The top frame pointer when called. At return, a new 268150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// fame will be present 268250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// savePatIdx An index into the compiled pattern. Goes into the original 268350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// (not new) frame. If execution ever back-tracks out of the 268450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// new frame, this will be where we continue from in the pattern. 268550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Return 268650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// The new frame pointer. 268750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 268850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 268950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status) { 2690fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // push storage for a new frame. 269150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *newFP = fStack->reserveBlock(fFrameSize, status); 269250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (newFP == NULL) { 269350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Failure on attempted stack expansion. 269450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Stack function set some other error code, change it to a more 269550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // specific one for regular expressions. 269650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_STACK_OVERFLOW; 269750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We need to return a writable stack frame, so just return the 269850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // previous frame. The match operation will stop quickly 269950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // because of the error status, after which the frame will never 270050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // be looked at again. 270150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fp; 270250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 270350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)(newFP - fFrameSize); // in case of realloc of stack. 2704fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 270550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // New stack frame = copy of old top frame. 270650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *source = (int64_t *)fp; 270750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *dest = newFP; 270850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 270950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *dest++ = *source++; 271050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (source == newFP) { 271150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 271250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 271350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2714fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 271550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTickCounter--; 271650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fTickCounter <= 0) { 271750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IncrementTime(status); // Re-initializes fTickCounter 271850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 271950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = savePatIdx; 272050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return (REStackFrame *)newFP; 272150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 272250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 272350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 272450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 272550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 272650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// MatchAt This is the actual matching engine. 272750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 272850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// startIdx: begin matching a this index. 272950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// toEnd: if true, match must extend to end of the input region 273050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 273150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 273250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { 273350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool isMatch = FALSE; // True if the we have a match. 2734fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 273550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t backSearchIndex = U_INT64_MAX; // used after greedy single-character matches for searching backwards 273650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 273750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t op; // Operation from the compiled pattern, split into 273850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t opType; // the opcode 273950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t opValue; // and the operand value. 2740fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 2741fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifdef REGEX_RUN_DEBUG 274250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fTraceDebug) 274350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 274450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("MatchAt(startIdx=%ld)\n", startIdx); 274550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("Original Pattern: "); 274650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = utext_next32From(fPattern->fPattern, 0); 274750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (c != U_SENTINEL) { 274850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c<32 || c>256) { 274950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = '.'; 275050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2751fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c", c); 2752fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 275350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fPattern->fPattern); 275450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 275550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("\n"); 275650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("Input String: "); 275750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = utext_next32From(fInputText, 0); 275850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (c != U_SENTINEL) { 275950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c<32 || c>256) { 276050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = '.'; 276150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 276250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("%c", c); 2763fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 276450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 276550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 276650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("\n"); 276750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("\n"); 276850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2769fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif 277050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 277150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 277250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 277350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 277450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 277550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Cache frequently referenced items from the compiled pattern 277650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 277750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *pat = fPattern->fCompiledPat->getBuffer(); 277850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 277950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *litText = fPattern->fLiteralText.getBuffer(); 278050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UVector *sets = fPattern->fSets; 278150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 278250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fFrameSize = fPattern->fFrameSize; 278350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *fp = resetStack(); 278450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 278550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = 0; 278650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = startIdx; 278750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 278850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Zero out the pattern's static data 278950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 279050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i = 0; i<fPattern->fDataSize; i++) { 279150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[i] = 0; 279250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 279350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 279450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 279550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Main loop for interpreting the compiled pattern. 279650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // One iteration of the loop per pattern operation performed. 279750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 279850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 279950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho op = (int32_t)pat[fp->fPatIdx]; 280050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho opType = URX_TYPE(op); 280150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho opValue = URX_VAL(op); 2802fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifdef REGEX_RUN_DEBUG 280350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fTraceDebug) { 280450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 280559d709d503bab6e2b61931737e662dd293b40578ccornelius printf("inputIdx=%ld inputChar=%x sp=%3ld activeLimit=%ld ", fp->fInputIdx, 280650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit); 280750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPattern->dumpOp(fp->fPatIdx); 280850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2809fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif 281050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; 2811fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 281250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho switch (opType) { 281350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 281450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 281550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_NOP: 281650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 281750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 281850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 281950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKTRACK: 282050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Force a backtrack. In some circumstances, the pattern compiler 282150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // will notice that the pattern can't possibly match anything, and will 282250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // emit one of these at that point. 282350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 282450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 282550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 282650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 282750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_ONECHAR: 282850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx < fActiveLimit) { 282950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 283050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 283150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == opValue) { 283250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 283350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 283450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 283550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 283650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 283750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 283850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 283950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 284050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 284150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 284250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_STRING: 284350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 284450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Test input against a literal string. 284550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Strings require two slots in the compiled pattern, one for the 284650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // offset to the string text, and one for the length. 284750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2848103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t stringStartIdx = opValue; 284950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho op = (int32_t)pat[fp->fPatIdx]; // Fetch the second operand 285050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; 285150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho opType = URX_TYPE(op); 2852103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t stringLen = URX_VAL(op); 285350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opType == URX_STRING_LEN); 285450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(stringLen >= 2); 2855fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 2856103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UChar *patternString = litText+stringStartIdx; 2857103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t patternStringIndex = 0; 285850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 2859103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 inputChar; 2860103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 patternChar; 286150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = TRUE; 2862103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius while (patternStringIndex < stringLen) { 2863103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (UTEXT_GETNATIVEINDEX(fInputText) >= fActiveLimit) { 286450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = FALSE; 2865103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fHitEnd = TRUE; 2866103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 2867103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 2868103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius inputChar = UTEXT_NEXT32(fInputText); 2869103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U16_NEXT(patternString, patternStringIndex, stringLen, patternChar); 2870103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (patternChar != inputChar) { 2871103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 2872103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 287350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 287450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2875fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 287650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 287750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 287850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 287950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 288050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 288150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 288250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 288350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 288450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 288550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_STATE_SAVE: 288650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, opValue, status); 288750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 288850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 288950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 289050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_END: 289150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The match loop will exit via this path on a successful match, 289250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // when we reach the end of the pattern. 289350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (toEnd && fp->fInputIdx != fActiveLimit) { 289450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The pattern matched, but not to the end of input. Try some more. 289550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 289650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 289750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 289850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isMatch = TRUE; 289950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto breakFromLoop; 290050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 290150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Start and End Capture stack frame variables are laid out out like this: 290250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // fp->fExtra[opValue] - The start of a completed capture group 290350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // opValue+1 - The end of a completed capture group 290450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // opValue+2 - the start of a capture group whose end 290550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // has not yet been reached (and might not ever be). 290650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_START_CAPTURE: 290750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue >= 0 && opValue < fFrameSize-3); 290850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[opValue+2] = fp->fInputIdx; 290950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 291050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 291150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 291250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_END_CAPTURE: 291350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue >= 0 && opValue < fFrameSize-3); 291450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fp->fExtra[opValue+2] >= 0); // Start pos for this group must be set. 291550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[opValue] = fp->fExtra[opValue+2]; // Tentative start becomes real. 291650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[opValue+1] = fp->fInputIdx; // End position 291750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fp->fExtra[opValue] <= fp->fExtra[opValue+1]); 291850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 291950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 292050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 292150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOLLAR: // $, test for End of line 292250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // or for position before new line at end of input 292350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 292450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fAnchorLimit) { 292550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We really are at the end of input. Success. 292650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 292750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 292850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 292950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2930fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 293150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 2932fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 293350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we are positioned just before a new-line that is located at the 293450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // end of input, succeed. 293550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 293650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) { 29371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (isLineTerminator(c)) { 293850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If not in the middle of a CR/LF sequence 29391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && ((void)UTEXT_PREVIOUS32(fInputText), UTEXT_PREVIOUS32(fInputText))==0x0d)) { 294050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // At new-line at end of input. Success 294150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 294250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 2943fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 294450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 294550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 294650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 294750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 294850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 nextC = UTEXT_NEXT32(fInputText); 294950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0d && nextC == 0x0a && UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) { 295050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 295150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 295250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; // At CR/LF at end of input. Success 295350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 295450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 295550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 295650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 295750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 295850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 295950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 296050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 296150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOLLAR_D: // $, test for End of Line, in UNIX_LINES mode. 296250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fAnchorLimit) { 296350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Off the end of input. Success. 296450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 296550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 296650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 296750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 296850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 296950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 297050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Either at the last character of input, or off the end. 297150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0a && UTEXT_GETNATIVEINDEX(fInputText) == fAnchorLimit) { 297250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 297350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 297450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 297550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 297650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 297750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 297850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Not at end of input. Back-track out. 297950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 298050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 298150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 298250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 298350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOLLAR_M: // $, test for End of line in multi-line mode 298450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 298550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fAnchorLimit) { 298650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We really are at the end of input. Success. 298750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 298850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 298950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 299050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 299150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we are positioned just before a new-line, succeed. 299250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // It makes no difference where the new-line is within the input. 299350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 299450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_CURRENT32(fInputText); 29951b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (isLineTerminator(c)) { 299650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // At a line end, except for the odd chance of being in the middle of a CR/LF sequence 299750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // In multi-line mode, hitting a new-line just before the end of input does not 299850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // set the hitEnd or requireEnd flags 299950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && UTEXT_PREVIOUS32(fInputText)==0x0d)) { 300050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 300150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 300250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 300350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // not at a new line. Fail. 300450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 300550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 300650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 300750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 300850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 300950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOLLAR_MD: // $, test for End of line in multi-line and UNIX_LINES mode 301050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 301150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fAnchorLimit) { 301250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We really are at the end of input. Success. 301350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 301450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; // Java set requireEnd in this case, even though 301550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; // adding a new-line would not lose the match. 301650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 301750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we are not positioned just before a new-line, the test fails; backtrack out. 301850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // It makes no difference where the new-line is within the input. 301950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 302050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_CURRENT32(fInputText) != 0x0a) { 302150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 302250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 302350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 302450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 302550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 302650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 302750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CARET: // ^, test for start of line 302850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx != fAnchorStart) { 302950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 303050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 303150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 303250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 303350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 303450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CARET_M: // ^, test for start of line in mulit-line mode 303550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 303650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx == fAnchorStart) { 303750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We are at the start input. Success. 303850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 303950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 304050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check whether character just before the current pos is a new-line 304150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // unless we are at the end of input 304250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 3043fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c = UTEXT_PREVIOUS32(fInputText); 30441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if ((fp->fInputIdx < fAnchorLimit) && isLineTerminator(c)) { 304550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // It's a new-line. ^ is true. Success. 304650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TODO: what should be done with positions between a CR and LF? 304750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 304850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 304950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Not at the start of a line. Fail. 305050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 305150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 305250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 305350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 305450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 305550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CARET_M_UNIX: // ^, test for start of line in mulit-line + Unix-line mode 305650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 305750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fp->fInputIdx >= fAnchorStart); 305850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx <= fAnchorStart) { 305950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We are at the start input. Success. 306050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 306150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 306250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check whether character just before the current pos is a new-line 306350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fp->fInputIdx <= fAnchorLimit); 306450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 306550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_PREVIOUS32(fInputText); 306650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c != 0x0a) { 306750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Not at the start of a line. Back-track out. 306850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 306950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 307050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 307150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 307250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 307350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKSLASH_B: // Test for word boundaries 307450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 307550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = isWordBoundary(fp->fInputIdx); 307654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius success ^= (UBool)(opValue != 0); // flip sense for \B 307750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!success) { 307850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 307950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 308050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 308150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 308250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 308350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 308450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKSLASH_BU: // Test for word boundaries, Unicode-style 308550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 308650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = isUWordBoundary(fp->fInputIdx); 308754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius success ^= (UBool)(opValue != 0); // flip sense for \B 308850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!success) { 308950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 309050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 309150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 309250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 309350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 309450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 309550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKSLASH_D: // Test for decimal digit 309650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 309750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 309850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 309950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 310050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 310150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 310250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 310350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 310450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 310550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 310650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int8_t ctype = u_charType(c); // TODO: make a unicode set for this. Will be faster. 310750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = (ctype == U_DECIMAL_DIGIT_NUMBER); 310854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius success ^= (UBool)(opValue != 0); // flip sense for \D 310950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 311050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 311150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 311250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 311350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 311450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 311550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 311650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 311750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 311850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKSLASH_G: // Test for position at end of previous match 311927f654740f2a26ad62a5c155af9199af9e69b889claireho if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->fInputIdx==fActiveStart))) { 312050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 312150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 312250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 312350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 312450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 31251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert case URX_BACKSLASH_H: // Test for \h, horizontal white space. 31261b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert { 31271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (fp->fInputIdx >= fActiveLimit) { 31281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fHitEnd = TRUE; 31291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp = (REStackFrame *)fStack->popFrame(fFrameSize); 31301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 31311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 31321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 31331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UChar32 c = UTEXT_NEXT32(fInputText); 31341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int8_t ctype = u_charType(c); 31351b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UBool success = (ctype == U_SPACE_SEPARATOR || c == 9); // SPACE_SEPARATOR || TAB 31361b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert success ^= (UBool)(opValue != 0); // flip sense for \H 31371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (success) { 31381b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 31391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else { 31401b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp = (REStackFrame *)fStack->popFrame(fFrameSize); 31411b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 31421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 31431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 31441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 31451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 31461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert case URX_BACKSLASH_R: // Test for \R, any line break sequence. 31471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert { 31481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (fp->fInputIdx >= fActiveLimit) { 31491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fHitEnd = TRUE; 31501b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp = (REStackFrame *)fStack->popFrame(fFrameSize); 31511b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 31521b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 31531b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 31541b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UChar32 c = UTEXT_NEXT32(fInputText); 31551b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (isLineTerminator(c)) { 31561b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (c == 0x0d && utext_current32(fInputText) == 0x0a) { 31571b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert utext_next32(fInputText); 31581b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 31591b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 31601b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else { 31611b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp = (REStackFrame *)fStack->popFrame(fFrameSize); 31621b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 31631b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 31641b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 31651b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 31661b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 31671b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert case URX_BACKSLASH_V: // \v, any single line ending character. 31681b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert { 31691b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (fp->fInputIdx >= fActiveLimit) { 31701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fHitEnd = TRUE; 31711b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp = (REStackFrame *)fStack->popFrame(fFrameSize); 31721b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 31731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 31741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 31751b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UChar32 c = UTEXT_NEXT32(fInputText); 31761b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UBool success = isLineTerminator(c); 31771b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert success ^= (UBool)(opValue != 0); // flip sense for \V 31781b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (success) { 31791b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 31801b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else { 31811b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp = (REStackFrame *)fStack->popFrame(fFrameSize); 31821b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 31831b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 31841b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 31851b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 31861b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 3187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case URX_BACKSLASH_X: 318850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Match a Grapheme, as defined by Unicode TR 29. 318950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Differs slightly from Perl, which consumes combining marks independently 319050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // of context. 319150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 319250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 319350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fail if at end of input 319450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 319550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 319650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 319750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 319850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 320050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 320150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 320250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Examine (and consume) the current char. 320350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Dispatch into a little state machine, based on the char. 320450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 320550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 320650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 320750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet **sets = fPattern->fStaticSets; 320850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_NORMAL]->contains(c)) goto GC_Extend; 320950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_CONTROL]->contains(c)) goto GC_Control; 321050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_L]->contains(c)) goto GC_L; 321150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LV]->contains(c)) goto GC_V; 321250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LVT]->contains(c)) goto GC_T; 321350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_V]->contains(c)) goto GC_V; 321450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_T]->contains(c)) goto GC_T; 321550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 321650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 321750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 321850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 321950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_L: 322050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 322150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 322250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 322350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_L]->contains(c)) goto GC_L; 322450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LV]->contains(c)) goto GC_V; 322550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LVT]->contains(c)) goto GC_T; 322650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_V]->contains(c)) goto GC_V; 3227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_PREVIOUS32(fInputText); 322850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 322950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 323050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 323150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_V: 323250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 323350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 323450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 323550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_V]->contains(c)) goto GC_V; 323650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_T]->contains(c)) goto GC_T; 3237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_PREVIOUS32(fInputText); 323850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 323950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 324050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 324150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_T: 324250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 324350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 324450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 324550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_T]->contains(c)) goto GC_T; 3246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_PREVIOUS32(fInputText); 324750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 324850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 324950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 325050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_Extend: 325150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Combining characters are consumed here 325250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 325350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 325450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 325550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 325650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_CURRENT32(fInputText); 325750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_EXTEND]->contains(c) == FALSE) { 325850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 325950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_NEXT32(fInputText); 326150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 326250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 326350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Done; 326450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 326550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_Control: 3266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Most control chars stand alone (don't combine with combining chars), 326750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // except for that CR/LF sequence is a single grapheme cluster. 326850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0d && fp->fInputIdx < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) { 326950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 327050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 327150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 327250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 327350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_Done: 327450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 327550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 327650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 327750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 327850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 328050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 328150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 328250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 328350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKSLASH_Z: // Test for end of Input 328450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx < fAnchorLimit) { 328550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 328650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 328750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 328850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 328950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 329050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 329150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 329250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 329350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 329450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_STATIC_SETREF: 329550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 329650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Test input character against one of the predefined sets 329750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (Word Characters, for example) 329850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The high bit of the op value is a flag for the match polarity. 329950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 0: success if input char is in set. 330050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 1: success if input char is not in set. 330150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 330250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 330350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 330450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 330550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 330650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 3307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool success = ((opValue & URX_NEG_SET) == URX_NEG_SET); 330850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho opValue &= ~URX_NEG_SET; 330950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue > 0 && opValue < URX_LAST_SET); 331050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 331150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 331250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 331350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c < 256) { 331450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 331550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c)) { 331650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = !success; 331750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 331850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 331950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeSet *s = fPattern->fStaticSets[opValue]; 332050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c)) { 332150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = !success; 332250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 332350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 332450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 332550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 332650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 332750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the character wasn't in the set. 332850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 332950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 333050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 333150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 3332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 333350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 333450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_STAT_SETREF_N: 333550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 3336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Test input character for NOT being a member of one of 333750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the predefined sets (Word Characters, for example) 333850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 333950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 334050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 334150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 334250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 334350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 334450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue > 0 && opValue < URX_LAST_SET); 334550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 334650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 3347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 334850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 334950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c < 256) { 335050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 335150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c) == FALSE) { 335250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 335350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 335450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 335550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 335650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeSet *s = fPattern->fStaticSets[opValue]; 335750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c) == FALSE) { 335850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 335950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 336050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 336150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 336250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the character wasn't in the set. 336350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 336450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 336550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 3366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 336750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 336850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_SETREF: 336950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 337050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 337150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 337250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 337350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 337450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 3375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 337650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There is input left. Pick up one char and test it for set membership. 337750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 337850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue > 0 && opValue < sets->size()); 337950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c<256) { 338050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fSets8[opValue]; 338150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c)) { 338250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 338350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 338450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 338550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 338650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); 338750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c)) { 338850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The character is in the set. A Match. 338950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 339050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 339150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 339250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3393fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 339450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the character wasn't in the set. 339550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 339650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 339750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 339850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 339950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 340050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOTANY: 340150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 340250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // . matches anything, but stops at end-of-line. 340350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 340450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // At end of input. Match failed. Backtrack out. 340550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 340650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 340750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 340850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 341050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 3411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 341250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There is input left. Advance over one char, unless we've hit end-of-line 341350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 34141b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (isLineTerminator(c)) { 341550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // End of line in normal mode. . does not match. 341650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 341750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 341850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 341950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 342050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 342150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 342250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 342350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 342450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOTANY_ALL: 342550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 342650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // ., in dot-matches-all (including new lines) mode 342750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 342850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // At end of input. Match failed. Backtrack out. 342950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 343050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 343150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 343250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 343450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 3435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 343650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There is input left. Advance over one char, except if we are 343750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // at a cr/lf, advance over both of them. 3438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c; 343950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 344050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 344150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c==0x0d && fp->fInputIdx < fActiveLimit) { 344250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // In the case of a CR/LF, we need to advance over both. 344350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 nextc = UTEXT_CURRENT32(fInputText); 344450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (nextc == 0x0a) { 3445b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_NEXT32(fInputText); 344650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 344750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 344850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 344950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 345050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 345150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 345250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 345350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOTANY_UNIX: 345450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 345550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // '.' operator, matches all, but stops at end-of-line. 345650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // UNIX_LINES mode, so 0x0a is the only recognized line ending. 345750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 345850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // At end of input. Match failed. Backtrack out. 345950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 346050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 346150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 346250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 346350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 346450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 3465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 346650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There is input left. Advance over one char, unless we've hit end-of-line 346750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 346850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0a) { 346950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // End of line in normal mode. '.' does not match the \n 347050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 347150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 347250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 347350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 347450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 347550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 347650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 347750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 347850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_JMP: 347950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = opValue; 348050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 348150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 348250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_FAIL: 348350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isMatch = FALSE; 348450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto breakFromLoop; 348550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 348650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_JMP_SAV: 348750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue < fPattern->fCompiledPat->size()); 348850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); // State save to loc following current 348950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = opValue; // Then JMP. 349050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 349150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 349250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_JMP_SAV_X: 349350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This opcode is used with (x)+, when x can match a zero length string. 349450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Same as JMP_SAV, except conditional on the match having made forward progress. 349550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Destination of the JMP must be a URX_STO_INP_LOC, from which we get the 349650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // data address of the input position at the start of the loop. 349750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 349850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue > 0 && opValue < fPattern->fCompiledPat->size()); 349950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t stoOp = (int32_t)pat[opValue-1]; 350050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(URX_TYPE(stoOp) == URX_STO_INP_LOC); 350150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t frameLoc = URX_VAL(stoOp); 350250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(frameLoc >= 0 && frameLoc < fFrameSize); 350350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t prevInputIdx = fp->fExtra[frameLoc]; 350450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(prevInputIdx <= fp->fInputIdx); 350550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (prevInputIdx < fp->fInputIdx) { 350650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The match did make progress. Repeat the loop. 350750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); // State save to loc following current 350850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = opValue; 350950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[frameLoc] = fp->fInputIdx; 3510fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 351150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If the input position did not advance, we do nothing here, 351250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // execution will fall out of the loop. 351350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 351450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 351550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 351650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CTR_INIT: 351750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 351850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue >= 0 && opValue < fFrameSize-2); 351959d709d503bab6e2b61931737e662dd293b40578ccornelius fp->fExtra[opValue] = 0; // Set the loop counter variable to zero 352050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 352150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Pick up the three extra operands that CTR_INIT has, and 3522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // skip the pattern location counter past 352350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 352450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx += 3; 352550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t loopLoc = URX_VAL(pat[instrOperandLoc]); 352650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[instrOperandLoc+1]; 352750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[instrOperandLoc+2]; 352850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(minCount>=0); 352950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(maxCount>=minCount || maxCount==-1); 353059d709d503bab6e2b61931737e662dd293b40578ccornelius U_ASSERT(loopLoc>=fp->fPatIdx); 353150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 353250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (minCount == 0) { 353350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, loopLoc+1, status); 353450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 353559d709d503bab6e2b61931737e662dd293b40578ccornelius if (maxCount == -1) { 353659d709d503bab6e2b61931737e662dd293b40578ccornelius fp->fExtra[opValue+1] = fp->fInputIdx; // For loop breaking. 353759d709d503bab6e2b61931737e662dd293b40578ccornelius } else if (maxCount == 0) { 353850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 353950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 354050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 354150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 354250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 354350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CTR_LOOP: 354450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 354550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2); 354650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t initOp = (int32_t)pat[opValue]; 354750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT); 354850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)]; 354950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[opValue+2]; 355050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[opValue+3]; 355150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*pCounter)++; 355259d709d503bab6e2b61931737e662dd293b40578ccornelius if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) { 355359d709d503bab6e2b61931737e662dd293b40578ccornelius U_ASSERT(*pCounter == maxCount); 355450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 355550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 355650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*pCounter >= minCount) { 355759d709d503bab6e2b61931737e662dd293b40578ccornelius if (maxCount == -1) { 355859d709d503bab6e2b61931737e662dd293b40578ccornelius // Loop has no hard upper bound. 355959d709d503bab6e2b61931737e662dd293b40578ccornelius // Check that it is progressing through the input, break if it is not. 356059d709d503bab6e2b61931737e662dd293b40578ccornelius int64_t *pLastInputIdx = &fp->fExtra[URX_VAL(initOp) + 1]; 356159d709d503bab6e2b61931737e662dd293b40578ccornelius if (fp->fInputIdx == *pLastInputIdx) { 356259d709d503bab6e2b61931737e662dd293b40578ccornelius break; 356359d709d503bab6e2b61931737e662dd293b40578ccornelius } else { 356459d709d503bab6e2b61931737e662dd293b40578ccornelius *pLastInputIdx = fp->fInputIdx; 356559d709d503bab6e2b61931737e662dd293b40578ccornelius } 356659d709d503bab6e2b61931737e662dd293b40578ccornelius } 356750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 356850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 356950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = opValue + 4; // Loop back. 357050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 357150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 357250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 357350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CTR_INIT_NG: 357450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 357550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Initialize a non-greedy loop 357650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue >= 0 && opValue < fFrameSize-2); 357759d709d503bab6e2b61931737e662dd293b40578ccornelius fp->fExtra[opValue] = 0; // Set the loop counter variable to zero 357850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 357959d709d503bab6e2b61931737e662dd293b40578ccornelius // Pick up the three extra operands that CTR_INIT_NG has, and 3580fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // skip the pattern location counter past 358150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 358250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx += 3; 358350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t loopLoc = URX_VAL(pat[instrOperandLoc]); 358450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[instrOperandLoc+1]; 358550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[instrOperandLoc+2]; 358650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(minCount>=0); 358750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(maxCount>=minCount || maxCount==-1); 358850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(loopLoc>fp->fPatIdx); 358959d709d503bab6e2b61931737e662dd293b40578ccornelius if (maxCount == -1) { 359059d709d503bab6e2b61931737e662dd293b40578ccornelius fp->fExtra[opValue+1] = fp->fInputIdx; // Save initial input index for loop breaking. 359159d709d503bab6e2b61931737e662dd293b40578ccornelius } 359250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 359350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (minCount == 0) { 359450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (maxCount != 0) { 359550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 359650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 359750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = loopLoc+1; // Continue with stuff after repeated block 3598fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 359950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 360050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 360150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 360250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CTR_LOOP_NG: 360350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 360450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Non-greedy {min, max} loops 360550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2); 360650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t initOp = (int32_t)pat[opValue]; 360750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT_NG); 360850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)]; 360950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[opValue+2]; 361050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[opValue+3]; 361150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 361259d709d503bab6e2b61931737e662dd293b40578ccornelius (*pCounter)++; 361359d709d503bab6e2b61931737e662dd293b40578ccornelius if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) { 361450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The loop has matched the maximum permitted number of times. 361550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Break out of here with no action. Matching will 361650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // continue with the following pattern. 361759d709d503bab6e2b61931737e662dd293b40578ccornelius U_ASSERT(*pCounter == maxCount); 361850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 361950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 362050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 362150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*pCounter < minCount) { 362250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We haven't met the minimum number of matches yet. 362350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Loop back for another one. 362450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = opValue + 4; // Loop back. 362550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 362650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We do have the minimum number of matches. 362759d709d503bab6e2b61931737e662dd293b40578ccornelius 362859d709d503bab6e2b61931737e662dd293b40578ccornelius // If there is no upper bound on the loop iterations, check that the input index 362959d709d503bab6e2b61931737e662dd293b40578ccornelius // is progressing, and stop the loop if it is not. 363059d709d503bab6e2b61931737e662dd293b40578ccornelius if (maxCount == -1) { 363159d709d503bab6e2b61931737e662dd293b40578ccornelius int64_t *pLastInputIdx = &fp->fExtra[URX_VAL(initOp) + 1]; 363259d709d503bab6e2b61931737e662dd293b40578ccornelius if (fp->fInputIdx == *pLastInputIdx) { 363359d709d503bab6e2b61931737e662dd293b40578ccornelius break; 363459d709d503bab6e2b61931737e662dd293b40578ccornelius } 363559d709d503bab6e2b61931737e662dd293b40578ccornelius *pLastInputIdx = fp->fInputIdx; 363659d709d503bab6e2b61931737e662dd293b40578ccornelius } 363759d709d503bab6e2b61931737e662dd293b40578ccornelius 363859d709d503bab6e2b61931737e662dd293b40578ccornelius // Loop Continuation: we will fall into the pattern following the loop 363959d709d503bab6e2b61931737e662dd293b40578ccornelius // (non-greedy, don't execute loop body first), but first do 364059d709d503bab6e2b61931737e662dd293b40578ccornelius // a state save to the top of the loop, so that a match failure 364150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // in the following pattern will try another iteration of the loop. 364250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, opValue + 4, status); 364350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 364450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 364550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 364650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 364750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_STO_SP: 364850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize); 364950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[opValue] = fStack->size(); 365050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 365150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 365250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LD_SP: 365350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 365450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize); 365550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newStackSize = (int32_t)fData[opValue]; 365650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(newStackSize <= fStack->size()); 365750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; 365850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (newFP == (int64_t *)fp) { 365950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 366050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 366150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 366250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<fFrameSize; i++) { 366350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho newFP[i] = ((int64_t *)fp)[i]; 366450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 366550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)newFP; 366650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStack->setSize(newStackSize); 366750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 366850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 366950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 367050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKREF: 367150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 367250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue < fFrameSize); 367350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t groupStartIdx = fp->fExtra[opValue]; 367450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t groupEndIdx = fp->fExtra[opValue+1]; 367550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(groupStartIdx <= groupEndIdx); 367650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (groupStartIdx < 0) { 367750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This capture group has not participated in the match thus far, 367850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match. 367950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 368050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 368150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fAltInputText, groupStartIdx); 368250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 3683103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3684103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Note: if the capture group match was of an empty string the backref 3685fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // match succeeds. Verified by testing: Perl matches succeed 3686103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // in this case, so we do too. 3687fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 3688103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UBool success = TRUE; 3689103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius for (;;) { 3690103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (utext_getNativeIndex(fAltInputText) >= groupEndIdx) { 3691103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = TRUE; 3692103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 3693103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 3694103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (utext_getNativeIndex(fInputText) >= fActiveLimit) { 3695103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 369650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 3697103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 3698103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 3699103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 captureGroupChar = utext_next32(fAltInputText); 3700103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 inputChar = utext_next32(fInputText); 3701103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (inputChar != captureGroupChar) { 3702103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 3703103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 370450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3705103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 3706103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3707103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (success) { 3708103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 3709103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 3710103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fp = (REStackFrame *)fStack->popFrame(fFrameSize); 3711103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 3712103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 3713103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 3714103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3715103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3716103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3717103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case URX_BACKREF_I: 3718103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius { 3719103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U_ASSERT(opValue < fFrameSize); 3720103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int64_t groupStartIdx = fp->fExtra[opValue]; 3721103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int64_t groupEndIdx = fp->fExtra[opValue+1]; 3722103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U_ASSERT(groupStartIdx <= groupEndIdx); 3723103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (groupStartIdx < 0) { 3724103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // This capture group has not participated in the match thus far, 372550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match. 3726103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 372750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3728103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius utext_setNativeIndex(fAltInputText, groupStartIdx); 3729103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius utext_setNativeIndex(fInputText, fp->fInputIdx); 3730103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius CaseFoldingUTextIterator captureGroupItr(*fAltInputText); 3731103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius CaseFoldingUTextIterator inputItr(*fInputText); 3732103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3733103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Note: if the capture group match was of an empty string the backref 3734fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // match succeeds. Verified by testing: Perl matches succeed 3735103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // in this case, so we do too. 3736fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 3737103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UBool success = TRUE; 3738103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius for (;;) { 3739103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (!captureGroupItr.inExpansion() && utext_getNativeIndex(fAltInputText) >= groupEndIdx) { 3740103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = TRUE; 3741103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 3742103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 3743103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (!inputItr.inExpansion() && utext_getNativeIndex(fInputText) >= fActiveLimit) { 3744103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 3745103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fHitEnd = TRUE; 3746103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 3747103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 3748103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 captureGroupChar = captureGroupItr.next(); 3749103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 inputChar = inputItr.next(); 3750103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (inputChar != captureGroupChar) { 3751103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 3752103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 3753103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 3754103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 3755103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3756103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (success && inputItr.inExpansion()) { 3757fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // We otained a match by consuming part of a string obtained from 3758fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // case-folding a single code point of the input text. 3759103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // This does not count as an overall match. 3760103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 3761103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 3762103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3763103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (success) { 3764103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 3765103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 3766103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fp = (REStackFrame *)fStack->popFrame(fFrameSize); 3767103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 3768fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 376950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 377050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 3771fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 377250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_STO_INP_LOC: 377350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 377450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue >= 0 && opValue < fFrameSize); 377550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[opValue] = fp->fInputIdx; 377650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 377750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 377850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 377950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_JMPX: 378050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 378150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 378250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx += 1; 378350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t dataLoc = URX_VAL(pat[instrOperandLoc]); 378450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(dataLoc >= 0 && dataLoc < fFrameSize); 378550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t savedInputIdx = fp->fExtra[dataLoc]; 378650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(savedInputIdx <= fp->fInputIdx); 378750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (savedInputIdx < fp->fInputIdx) { 378850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = opValue; // JMP 378950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 379050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no progress in loop. 379150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 379250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 379350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 379450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 379550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LA_START: 379650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 379750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Entering a lookahead block. 379850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save Stack Ptr, Input Pos. 379950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 380050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[opValue] = fStack->size(); 380150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[opValue+1] = fp->fInputIdx; 380250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveStart = fLookStart; // Set the match region change for 380350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = fLookLimit; // transparent bounds. 380450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 380550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 380650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 380750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LA_END: 380850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 380950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Leaving a look-ahead block. 381050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // restore Stack Ptr, Input Pos to positions they had on entry to block. 381150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 381250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t stackSize = fStack->size(); 381350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newStackSize =(int32_t)fData[opValue]; 381450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(stackSize >= newStackSize); 381550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (stackSize > newStackSize) { 381650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Copy the current top frame back to the new (cut back) top frame. 381750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This makes the capture groups from within the look-ahead 381850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // expression available. 381950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; 382050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 382150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<fFrameSize; i++) { 382250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho newFP[i] = ((int64_t *)fp)[i]; 382350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 382450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)newFP; 382550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStack->setSize(newStackSize); 382650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 382750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = fData[opValue+1]; 382850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 382950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Restore the active region bounds in the input string; they may have 383050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // been changed because of transparent bounds on a Region. 383150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveStart = fRegionStart; 383250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = fRegionLimit; 383350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 383450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 383550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 383650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_ONECHAR_I: 3837103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Case insensitive one char. The char from the pattern is already case folded. 3838103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Input text is not, but case folding the input can not reduce two or more code 3839103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // points to one. 384050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx < fActiveLimit) { 384150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 384250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 384350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 384450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) { 384550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 384650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 384750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 384850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 384950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 385050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3851fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 385250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 385350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 385450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 385550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_STRING_I: 385650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 3857103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Case-insensitive test input against a literal string. 385850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Strings require two slots in the compiled pattern, one for the 385950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // offset to the string text, and one for the length. 3860103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // The compiled string has already been case folded. 386127f654740f2a26ad62a5c155af9199af9e69b889claireho { 3862103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UChar *patternString = litText + opValue; 3863103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t patternStringIdx = 0; 386450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 386550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho op = (int32_t)pat[fp->fPatIdx]; 386650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; 386750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho opType = URX_TYPE(op); 386850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho opValue = URX_VAL(op); 386950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opType == URX_STRING_LEN); 3870103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t patternStringLen = opValue; // Length of the string from the pattern. 3871fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 3872fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 3873103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 cPattern; 3874103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 cText; 3875103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UBool success = TRUE; 3876103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 387750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 3878103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius CaseFoldingUTextIterator inputIterator(*fInputText); 3879103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius while (patternStringIdx < patternStringLen) { 3880103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (!inputIterator.inExpansion() && UTEXT_GETNATIVEINDEX(fInputText) >= fActiveLimit) { 3881103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 3882103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fHitEnd = TRUE; 3883103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 388450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3885103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U16_NEXT(patternString, patternStringIdx, patternStringLen, cPattern); 3886103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius cText = inputIterator.next(); 3887103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (cText != cPattern) { 3888103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 3889103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 389050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 389150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3892103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (inputIterator.inExpansion()) { 3893103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 3894103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 3895103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 3896103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (success) { 3897103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 3898103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 389950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 390050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 390150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 390250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 390350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 390450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 390550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LB_START: 390650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 390750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Entering a look-behind block. 390850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save Stack Ptr, Input Pos. 390950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TODO: implement transparent bounds. Ticket #6067 391050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 391150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[opValue] = fStack->size(); 391250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[opValue+1] = fp->fInputIdx; 391350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Init the variable containing the start index for attempted matches. 391450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[opValue+2] = -1; 391550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save input string length, then reset to pin any matches to end at 391650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the current position. 391750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[opValue+3] = fActiveLimit; 391850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = fp->fInputIdx; 391950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 392050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 392150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 392250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 392350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LB_CONT: 392450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 392550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Positive Look-Behind, at top of loop checking for matches of LB expression 392650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // at all possible input starting positions. 392750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 392850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fetch the min and max possible match lengths. They are the operands 392950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // of this op in the pattern. 393050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minML = (int32_t)pat[fp->fPatIdx++]; 393150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxML = (int32_t)pat[fp->fPatIdx++]; 393250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(minML <= maxML); 393350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(minML >= 0); 393450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 393550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fetch (from data) the last input index where a match was attempted. 393650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 393750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *lbStartIdx = &fData[opValue+2]; 393850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*lbStartIdx < 0) { 393950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // First time through loop. 394050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *lbStartIdx = fp->fInputIdx - minML; 394150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 394250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 2nd through nth time through the loop. 394350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Back up start position for match by one. 394450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*lbStartIdx == 0) { 394550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*lbStartIdx)--; 394650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 394750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, *lbStartIdx); 3948b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_PREVIOUS32(fInputText); 394950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText); 395050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 395150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 395250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 395350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) { 395450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We have tried all potential match starting points without 395550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // getting a match. Backtrack out, and out of the 395650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Look Behind altogether. 395750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 395850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t restoreInputLen = fData[opValue+3]; 395950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(restoreInputLen >= fActiveLimit); 396050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(restoreInputLen <= fInputLength); 396150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = restoreInputLen; 396250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 396350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 396450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 396550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save state to this URX_LB_CONT op, so failure to match will repeat the loop. 396650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (successful match will fall off the end of the loop.) 396750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx-3, status); 396850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = *lbStartIdx; 396950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 397050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 397150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 397250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LB_END: 397350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // End of a look-behind block, after a successful match. 397450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 397550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 397650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx != fActiveLimit) { 397750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The look-behind expression matched, but the match did not 397850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // extend all the way to the point that we are looking behind from. 397950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // FAIL out of here, which will take us back to the LB_CONT, which 398050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // will retry the match starting at another position or fail 398150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the look-behind altogether, whichever is appropriate. 398250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 398350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 398450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 398550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 398650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Look-behind match is good. Restore the orignal input string length, 3987fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // which had been truncated to pin the end of the lookbehind match to the 398850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // position being looked-behind. 398950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t originalInputLen = fData[opValue+3]; 399050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(originalInputLen >= fActiveLimit); 399150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(originalInputLen <= fInputLength); 399250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = originalInputLen; 399350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 399450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 399550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 399650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 399750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LBN_CONT: 399850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 399950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Negative Look-Behind, at top of loop checking for matches of LB expression 400050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // at all possible input starting positions. 400150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 400250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fetch the extra parameters of this op. 400350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minML = (int32_t)pat[fp->fPatIdx++]; 400450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxML = (int32_t)pat[fp->fPatIdx++]; 400550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t continueLoc = (int32_t)pat[fp->fPatIdx++]; 400650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continueLoc = URX_VAL(continueLoc); 400750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(minML <= maxML); 400850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(minML >= 0); 400950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(continueLoc > fp->fPatIdx); 401050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 401150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fetch (from data) the last input index where a match was attempted. 401250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 401350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *lbStartIdx = &fData[opValue+2]; 401450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*lbStartIdx < 0) { 401550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // First time through loop. 401650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *lbStartIdx = fp->fInputIdx - minML; 401750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 401850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 2nd through nth time through the loop. 401950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Back up start position for match by one. 402050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*lbStartIdx == 0) { 402150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*lbStartIdx)--; 402250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 402350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, *lbStartIdx); 4024b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_PREVIOUS32(fInputText); 402550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText); 402650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 402750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 402850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 402950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) { 403050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We have tried all potential match starting points without 403150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // getting a match, which means that the negative lookbehind as 403250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // a whole has succeeded. Jump forward to the continue location 403350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t restoreInputLen = fData[opValue+3]; 403450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(restoreInputLen >= fActiveLimit); 403550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(restoreInputLen <= fInputLength); 403650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = restoreInputLen; 403750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = continueLoc; 403850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 403950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 404050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 404150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save state to this URX_LB_CONT op, so failure to match will repeat the loop. 404250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (successful match will cause a FAIL out of the loop altogether.) 404350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx-4, status); 404450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = *lbStartIdx; 404550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 404650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 404750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 404850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LBN_END: 404950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // End of a negative look-behind block, after a successful match. 405050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 405150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 405250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx != fActiveLimit) { 405350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The look-behind expression matched, but the match did not 405450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // extend all the way to the point that we are looking behind from. 405550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // FAIL out of here, which will take us back to the LB_CONT, which 405650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // will retry the match starting at another position or succeed 405750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the look-behind altogether, whichever is appropriate. 405850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 405950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 406050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 406150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 406250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Look-behind expression matched, which means look-behind test as 406350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // a whole Fails 4064fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4065fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Restore the orignal input string length, which had been truncated 4066fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // inorder to pin the end of the lookbehind match 406750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // to the position being looked-behind. 406850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t originalInputLen = fData[opValue+3]; 406950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(originalInputLen >= fActiveLimit); 407050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(originalInputLen <= fInputLength); 407150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = originalInputLen; 407250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 407350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Restore original stack position, discarding any state saved 407450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // by the successful pattern match. 407550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 407650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newStackSize = (int32_t)fData[opValue]; 407750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fStack->size() > newStackSize); 407850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStack->setSize(newStackSize); 4079fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4080fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // FAIL, which will take control back to someplace 408150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // prior to entering the look-behind test. 408250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 408350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 408450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 408550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 408650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 408750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LOOP_SR_I: 408850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Loop Initialization for the optimized implementation of 408950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // [some character set]* 409050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This op scans through all matching input. 409150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The following LOOP_C op emulates stack unwinding if the following pattern fails. 409250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 409350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue > 0 && opValue < sets->size()); 409450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fSets8[opValue]; 409550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); 409650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 409750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Loop through input, until either the input is exhausted or 409850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // we reach a character that is not a member of the set. 409950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t ix = fp->fInputIdx; 410050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, ix); 410150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 410250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (ix >= fActiveLimit) { 410350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 410450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 410550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 410650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 410750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c<256) { 410850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c) == FALSE) { 410950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 411050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 411150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 411250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c) == FALSE) { 411350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 411450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 411550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 411650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ix = UTEXT_GETNATIVEINDEX(fInputText); 411750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 411850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 411950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If there were no matching characters, skip over the loop altogether. 412050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The loop doesn't run at all, a * op always succeeds. 412150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (ix == fp->fInputIdx) { 412250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // skip the URX_LOOP_C op. 412350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 412450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 412550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 412650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Peek ahead in the compiled pattern, to the URX_LOOP_C that 412750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // must follow. It's operand is the stack location 412850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // that holds the starting input index for the match of this [set]* 412950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t loopcOp = (int32_t)pat[fp->fPatIdx]; 413050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C); 413150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t stackLoc = URX_VAL(loopcOp); 413250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize); 413350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[stackLoc] = fp->fInputIdx; 413450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = ix; 413550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 413650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save State to the URX_LOOP_C op that follows this one, 413750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // so that match failures in the following code will return to there. 413850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Then bump the pattern idx so the LOOP_C is skipped on the way out of here. 413950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 414050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; 414150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 414250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 414350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 414450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 414550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LOOP_DOT_I: 414650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Loop Initialization for the optimized implementation of .* 414750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This op scans through all remaining input. 414850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The following LOOP_C op emulates stack unwinding if the following pattern fails. 414950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 415050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Loop through input until the input is exhausted (we reach an end-of-line) 415150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // In DOTALL mode, we can just go straight to the end of the input. 415250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t ix; 415350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((opValue & 1) == 1) { 415450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Dot-matches-All mode. Jump straight to the end of the string. 415550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ix = fActiveLimit; 415650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 415750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 415850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // NOT DOT ALL mode. Line endings do not match '.' 415950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Scan forward until a line ending or end of input. 416050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ix = fp->fInputIdx; 416150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, ix); 416250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 416350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (ix >= fActiveLimit) { 416450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 416550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 416650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 416750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 416827f654740f2a26ad62a5c155af9199af9e69b889claireho if ((c & 0x7f) <= 0x29) { // Fast filter of non-new-line-s 416927f654740f2a26ad62a5c155af9199af9e69b889claireho if ((c == 0x0a) || // 0x0a is newline in both modes. 417027f654740f2a26ad62a5c155af9199af9e69b889claireho (((opValue & 2) == 0) && // IF not UNIX_LINES mode 41711b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert isLineTerminator(c))) { 417250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // char is a line ending. Exit the scanning loop. 417350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 417450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 417550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 417650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ix = UTEXT_GETNATIVEINDEX(fInputText); 417750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 417850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 417950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 418050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If there were no matching characters, skip over the loop altogether. 418150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The loop doesn't run at all, a * op always succeeds. 418250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (ix == fp->fInputIdx) { 418350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // skip the URX_LOOP_C op. 418450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 418550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 418650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 418750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Peek ahead in the compiled pattern, to the URX_LOOP_C that 418850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // must follow. It's operand is the stack location 418950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // that holds the starting input index for the match of this .* 419050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t loopcOp = (int32_t)pat[fp->fPatIdx]; 419150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C); 419250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t stackLoc = URX_VAL(loopcOp); 419350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize); 419450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[stackLoc] = fp->fInputIdx; 419550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = ix; 419650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 419750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save State to the URX_LOOP_C op that follows this one, 419850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // so that match failures in the following code will return to there. 419950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Then bump the pattern idx so the LOOP_C is skipped on the way out of here. 420050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 420150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; 420250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 420350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 420450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 420550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 420650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LOOP_C: 420750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 420850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue<fFrameSize); 420950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho backSearchIndex = fp->fExtra[opValue]; 421050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(backSearchIndex <= fp->fInputIdx); 421150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (backSearchIndex == fp->fInputIdx) { 421250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We've backed up the input idx to the point that the loop started. 4213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // The loop is done. Leave here without saving state. 421450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Subsequent failures won't come back here. 421550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 421650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 421750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Set up for the next iteration of the loop, with input index 421850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // backed up by one from the last time through, 421950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // and a state save to this instruction in case the following code fails again. 422050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (We're going backwards because this loop emulates stack unwinding, not 422150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the initial scan forward.) 422250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fp->fInputIdx > 0); 422350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 422450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 prevC = UTEXT_PREVIOUS32(fInputText); 422550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 4226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 422750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 twoPrevC = UTEXT_PREVIOUS32(fInputText); 4228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (prevC == 0x0a && 422950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx > backSearchIndex && 423050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho twoPrevC == 0x0d) { 423150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t prevOp = (int32_t)pat[fp->fPatIdx-2]; 423250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_TYPE(prevOp) == URX_LOOP_DOT_I) { 423350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // .*, stepping back over CRLF pair. 423450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 423550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 423650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 423950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx-1, status); 424050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 424150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 4242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 424450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 424550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho default: 424650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Trouble. The compiled pattern contains an entry with an 424750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // unrecognized type tag. 424850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(FALSE); 4249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 425050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 425150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 425250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isMatch = FALSE; 4253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 425750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehobreakFromLoop: 425850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = isMatch; 425950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (isMatch) { 426050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLastMatchEnd = fMatchEnd; 426150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatchStart = startIdx; 426250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatchEnd = fp->fInputIdx; 4263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifdef REGEX_RUN_DEBUG 4266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (fTraceDebug) { 4267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (isMatch) { 4268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd); 4269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 4270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("No match\n\n"); 4271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif 4274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 427550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fFrame = fp; // The active stack frame when the engine stopped. 427650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Contains the capture group results that we need to 427750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // access later. 427850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 4279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 4283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 428450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// MatchChunkAt This is the actual matching engine. Like MatchAt, but with the 428550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// assumption that the entire string is available in the UText's 428650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// chunk buffer. For now, that means we can use int32_t indexes, 428750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// except for anything that needs to be saved (like group starts 428850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// and ends). 4289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 4290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// startIdx: begin matching a this index. 4291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// toEnd: if true, match must extend to end of the input region 4292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 4293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 429450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status) { 4295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isMatch = FALSE; // True if the we have a match. 4296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 429750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t backSearchIndex = INT32_MAX; // used after greedy single-character matches for searching backwards 4298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t op; // Operation from the compiled pattern, split into 4300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t opType; // the opcode 4301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t opValue; // and the operand value. 4302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 430350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifdef REGEX_RUN_DEBUG 4304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (fTraceDebug) { 430559d709d503bab6e2b61931737e662dd293b40578ccornelius printf("MatchAt(startIdx=%d)\n", startIdx); 4306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("Original Pattern: "); 430750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = utext_next32From(fPattern->fPattern, 0); 430850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (c != U_SENTINEL) { 430950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c<32 || c>256) { 431050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = '.'; 431150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("%c", c); 4313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 431450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fPattern->fPattern); 4315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("\n"); 4317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("Input String: "); 431850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = utext_next32From(fInputText, 0); 431950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (c != U_SENTINEL) { 4320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c<32 || c>256) { 4321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = '.'; 4322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%c", c); 4324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 432550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 4326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("\n"); 4328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("\n"); 4329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 433050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 4331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 4333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 4334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Cache frequently referenced items from the compiled pattern 4337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 433850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *pat = fPattern->fCompiledPat->getBuffer(); 4339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *litText = fPattern->fLiteralText.getBuffer(); 4341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector *sets = fPattern->fSets; 4342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 434350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *inputBuf = fInputText->chunkContents; 4344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFrameSize = fPattern->fFrameSize; 4346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REStackFrame *fp = resetStack(); 4347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = 0; 4349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fInputIdx = startIdx; 4350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Zero out the pattern's static data 4352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 4353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i<fPattern->fDataSize; i++) { 4354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData[i] = 0; 4355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Main loop for interpreting the compiled pattern. 4359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // One iteration of the loop per pattern operation performed. 4360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 436250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho op = (int32_t)pat[fp->fPatIdx]; 4363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru opType = URX_TYPE(op); 4364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru opValue = URX_VAL(op); 436550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifdef REGEX_RUN_DEBUG 4366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fTraceDebug) { 436750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 436859d709d503bab6e2b61931737e662dd293b40578ccornelius printf("inputIdx=%ld inputChar=%x sp=%3ld activeLimit=%ld ", fp->fInputIdx, 436950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit); 4370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPattern->dumpOp(fp->fPatIdx); 4371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 437250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 4373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx++; 4374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (opType) { 4376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_NOP: 4379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKTRACK: 4383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Force a backtrack. In some circumstances, the pattern compiler 4384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // will notice that the pattern can't possibly match anything, and will 4385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // emit one of these at that point. 4386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_ONECHAR: 4391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx < fActiveLimit) { 439250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 4393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 4394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c == opValue) { 4395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 440050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 440150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 4402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STRING: 4405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 4406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test input against a literal string. 4407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Strings require two slots in the compiled pattern, one for the 4408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // offset to the string text, and one for the length. 4409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t stringStartIdx = opValue; 4410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t stringLen; 4411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 441250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho op = (int32_t)pat[fp->fPatIdx]; // Fetch the second operand 4413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx++; 4414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru opType = URX_TYPE(op); 4415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stringLen = URX_VAL(op); 4416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opType == URX_STRING_LEN); 4417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(stringLen >= 2); 4418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar * pInp = inputBuf + fp->fInputIdx; 4420103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UChar * pInpLimit = inputBuf + fActiveLimit; 4421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar * pPat = litText+stringStartIdx; 4422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar * pEnd = pInp + stringLen; 4423103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UBool success = TRUE; 4424103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius while (pInp < pEnd) { 4425103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (pInp >= pInpLimit) { 4426103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fHitEnd = TRUE; 4427103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 4428103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 4429103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 4430103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (*pInp++ != *pPat++) { 4431103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 4432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 443650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 443750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx += stringLen; 443850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 443950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 444050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STATE_SAVE: 4446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, opValue, status); 4447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_END: 4451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The match loop will exit via this path on a successful match, 4452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // when we reach the end of the pattern. 4453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (toEnd && fp->fInputIdx != fActiveLimit) { 4454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The pattern matched, but not to the end of input. Try some more. 4455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isMatch = TRUE; 4459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto breakFromLoop; 4460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 446150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Start and End Capture stack frame variables are laid out out like this: 4462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // fp->fExtra[opValue] - The start of a completed capture group 4463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // opValue+1 - The end of a completed capture group 4464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // opValue+2 - the start of a capture group whose end 4465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // has not yet been reached (and might not ever be). 4466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_START_CAPTURE: 4467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(opValue >= 0 && opValue < fFrameSize-3); 4468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[opValue+2] = fp->fInputIdx; 4469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4470fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4471fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_END_CAPTURE: 4473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(opValue >= 0 && opValue < fFrameSize-3); 4474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(fp->fExtra[opValue+2] >= 0); // Start pos for this group must be set. 4475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[opValue] = fp->fExtra[opValue+2]; // Tentative start becomes real. 4476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[opValue+1] = fp->fInputIdx; // End position 4477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(fp->fExtra[opValue] <= fp->fExtra[opValue+1]); 4478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4479fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4480fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOLLAR: // $, test for End of line 448250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // or for position before new line at end of input 4483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx < fAnchorLimit-2) { 4484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We are no where near the end of input. Fail. 4485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This is the common case. Keep it first. 4486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fAnchorLimit) { 4490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We really are at the end of input. Success. 4491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRequireEnd = TRUE; 4493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we are positioned just before a new-line that is located at the 4497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // end of input, succeed. 4498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx == fAnchorLimit-1) { 449950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 450050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_GET(inputBuf, fAnchorStart, fp->fInputIdx, fAnchorLimit, c); 4501fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 45021b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (isLineTerminator(c)) { 4503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp->fInputIdx-1]==0x0d)) { 4504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At new-line at end of input. Success 4505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRequireEnd = TRUE; 4507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 451050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (fp->fInputIdx == fAnchorLimit-2 && 451150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inputBuf[fp->fInputIdx]==0x0d && inputBuf[fp->fInputIdx+1]==0x0a) { 4512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRequireEnd = TRUE; 4514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // At CR/LF at end of input. Success 4515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4516fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 452250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOLLAR_D: // $, test for End of Line, in UNIX_LINES mode. 4523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fAnchorLimit-1) { 4524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Either at the last character of input, or off the end. 4525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx == fAnchorLimit-1) { 4526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // At last char of input. Success if it's a new line. 452750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (inputBuf[fp->fInputIdx] == 0x0a) { 4528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRequireEnd = TRUE; 4530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Off the end of input. Success. 4534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRequireEnd = TRUE; 4536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4539fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Not at end of input. Back-track out. 4541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4543fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4544fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 454550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOLLAR_M: // $, test for End of line in multi-line mode 454650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 454750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fAnchorLimit) { 454850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We really are at the end of input. Success. 454950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 455050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 455150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 455250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 455350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we are positioned just before a new-line, succeed. 455450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // It makes no difference where the new-line is within the input. 455550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = inputBuf[fp->fInputIdx]; 45561b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (isLineTerminator(c)) { 455750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // At a line end, except for the odd chance of being in the middle of a CR/LF sequence 455850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // In multi-line mode, hitting a new-line just before the end of input does not 455950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // set the hitEnd or requireEnd flags 456050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp->fInputIdx-1]==0x0d)) { 4561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 456250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 456350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 456450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // not at a new line. Fail. 456550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 456650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 456750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 4568fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4569fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 457050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOLLAR_MD: // $, test for End of line in multi-line and UNIX_LINES mode 457150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 457250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fAnchorLimit) { 457350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We really are at the end of input. Success. 457450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 457550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; // Java set requireEnd in this case, even though 457650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; // adding a new-line would not lose the match. 457750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 457850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we are not positioned just before a new-line, the test fails; backtrack out. 457950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // It makes no difference where the new-line is within the input. 458050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (inputBuf[fp->fInputIdx] != 0x0a) { 458150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 458250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 458350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 458450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 4585fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4586fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 458750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CARET: // ^, test for start of line 4588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx != fAnchorStart) { 4589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4592fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4593fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 459450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CARET_M: // ^, test for start of line in mulit-line mode 459550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 459650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx == fAnchorStart) { 459750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We are at the start input. Success. 459850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 459950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 460050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check whether character just before the current pos is a new-line 460150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // unless we are at the end of input 4602fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar c = inputBuf[fp->fInputIdx - 1]; 4603fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if ((fp->fInputIdx < fAnchorLimit) && 46041b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert isLineTerminator(c)) { 460550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // It's a new-line. ^ is true. Success. 460650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TODO: what should be done with positions between a CR and LF? 460750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 460850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 460950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Not at the start of a line. Fail. 461050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 461150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 461250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 4613fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4614fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 461550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CARET_M_UNIX: // ^, test for start of line in mulit-line + Unix-line mode 461650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 461750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fp->fInputIdx >= fAnchorStart); 461850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx <= fAnchorStart) { 461950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We are at the start input. Success. 462050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 462150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 462250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check whether character just before the current pos is a new-line 462350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fp->fInputIdx <= fAnchorLimit); 4624fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar c = inputBuf[fp->fInputIdx - 1]; 462550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c != 0x0a) { 462650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Not at the start of a line. Back-track out. 462750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 462850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 462950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 463050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 4631fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_B: // Test for word boundaries 4633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 463450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = isChunkWordBoundary((int32_t)fp->fInputIdx); 463554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius success ^= (UBool)(opValue != 0); // flip sense for \B 4636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!success) { 4637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4641fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4642fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_BU: // Test for word boundaries, Unicode-style 4644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 4645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool success = isUWordBoundary(fp->fInputIdx); 464654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius success ^= (UBool)(opValue != 0); // flip sense for \B 4647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!success) { 4648c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4652fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4653fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_D: // Test for decimal digit 4655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 4656c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fActiveLimit) { 4657c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4661fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 466250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 466350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 4664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int8_t ctype = u_charType(c); // TODO: make a unicode set for this. Will be faster. 4665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool success = (ctype == U_DECIMAL_DIGIT_NUMBER); 466654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius success ^= (UBool)(opValue != 0); // flip sense for \D 466750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!success) { 4668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4672fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4673fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_G: // Test for position at end of previous match 467527f654740f2a26ad62a5c155af9199af9e69b889claireho if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->fInputIdx==fActiveStart))) { 4676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4679fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4680fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 46811b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert case URX_BACKSLASH_H: // Test for \h, horizontal white space. 46821b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert { 46831b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (fp->fInputIdx >= fActiveLimit) { 46841b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fHitEnd = TRUE; 46851b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp = (REStackFrame *)fStack->popFrame(fFrameSize); 46861b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 46871b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 46881b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UChar32 c; 46891b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 46901b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int8_t ctype = u_charType(c); 46911b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UBool success = (ctype == U_SPACE_SEPARATOR || c == 9); // SPACE_SEPARATOR || TAB 46921b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert success ^= (UBool)(opValue != 0); // flip sense for \H 46931b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (!success) { 46941b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp = (REStackFrame *)fStack->popFrame(fFrameSize); 46951b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 46961b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 46971b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 46981b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 46991b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 47001b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert case URX_BACKSLASH_R: // Test for \R, any line break sequence. 47011b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert { 47021b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (fp->fInputIdx >= fActiveLimit) { 47031b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fHitEnd = TRUE; 47041b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp = (REStackFrame *)fStack->popFrame(fFrameSize); 47051b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 47061b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 47071b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UChar32 c; 47081b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 47091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (isLineTerminator(c)) { 47101b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (c == 0x0d && fp->fInputIdx < fActiveLimit) { 47111b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Check for CR/LF sequence. Consume both together when found. 47121b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UChar c2; 47131b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c2); 47141b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (c2 != 0x0a) { 47151b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert U16_PREV(inputBuf, 0, fp->fInputIdx, c2); 47161b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 47171b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 47181b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } else { 47191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp = (REStackFrame *)fStack->popFrame(fFrameSize); 47201b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 47211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 47221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 47231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 47241b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 47251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert case URX_BACKSLASH_V: // Any single code point line ending. 47261b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert { 47271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (fp->fInputIdx >= fActiveLimit) { 47281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fHitEnd = TRUE; 47291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp = (REStackFrame *)fStack->popFrame(fFrameSize); 47301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 47311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 47321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UChar32 c; 47331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 47341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UBool success = isLineTerminator(c); 47351b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert success ^= (UBool)(opValue != 0); // flip sense for \V 47361b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (!success) { 47371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert fp = (REStackFrame *)fStack->popFrame(fFrameSize); 47381b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 47391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 47401b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert break; 47411b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 47421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 47431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 4744fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case URX_BACKSLASH_X: 474550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Match a Grapheme, as defined by Unicode TR 29. 474650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Differs slightly from Perl, which consumes combining marks independently 474750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // of context. 474850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 4749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 475050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fail if at end of input 475150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 475250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 475350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 475450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 475550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 475750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Examine (and consume) the current char. 475850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Dispatch into a little state machine, based on the char. 475950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 476050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 476150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet **sets = fPattern->fStaticSets; 476250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_NORMAL]->contains(c)) goto GC_Extend; 476350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_CONTROL]->contains(c)) goto GC_Control; 476450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_L]->contains(c)) goto GC_L; 476550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LV]->contains(c)) goto GC_V; 476650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LVT]->contains(c)) goto GC_T; 476750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_V]->contains(c)) goto GC_V; 476850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_T]->contains(c)) goto GC_T; 476950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 4770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_L: 477450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 477550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 477650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_L]->contains(c)) goto GC_L; 477750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LV]->contains(c)) goto GC_V; 477850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LVT]->contains(c)) goto GC_T; 477950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_V]->contains(c)) goto GC_V; 478050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, 0, fp->fInputIdx, c); 478150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 4782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_V: 478450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 478550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 478650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_V]->contains(c)) goto GC_V; 478750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_T]->contains(c)) goto GC_T; 478850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, 0, fp->fInputIdx, c); 478950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 4790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_T: 479250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 479350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 479450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_T]->contains(c)) goto GC_T; 479550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, 0, fp->fInputIdx, c); 479650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 4797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_Extend: 479950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Combining characters are consumed here 480050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 480150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 480250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 4803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 480450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 480550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_EXTEND]->contains(c) == FALSE) { 480650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_BACK_1(inputBuf, 0, fp->fInputIdx); 480750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 480850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 480950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 481050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Done; 4811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_Control: 4813fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Most control chars stand alone (don't combine with combining chars), 481450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // except for that CR/LF sequence is a single grapheme cluster. 481550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0d && fp->fInputIdx < fActiveLimit && inputBuf[fp->fInputIdx] == 0x0a) { 481650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx++; 481750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_Done: 482050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 482150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 4822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 482350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 482450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4825fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4826fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4827fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4828fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case URX_BACKSLASH_Z: // Test for end of Input 4830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx < fAnchorLimit) { 4831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRequireEnd = TRUE; 4835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4837fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4838fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4839fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STATIC_SETREF: 4841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 4842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test input character against one of the predefined sets 4843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (Word Characters, for example) 4844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The high bit of the op value is a flag for the match polarity. 4845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 0: success if input char is in set. 4846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1: success if input char is not in set. 4847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fActiveLimit) { 4848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4852fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4853fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool success = ((opValue & URX_NEG_SET) == URX_NEG_SET); 4854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru opValue &= ~URX_NEG_SET; 4855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue > 0 && opValue < URX_LAST_SET); 4856fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 485750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 4858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 4859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < 256) { 4860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 4861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s8->contains(c)) { 4862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru success = !success; 4863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet *s = fPattern->fStaticSets[opValue]; 4866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s->contains(c)) { 4867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru success = !success; 4868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!success) { 4871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4875fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4876fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STAT_SETREF_N: 4878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 4879fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Test input character for NOT being a member of one of 4880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the predefined sets (Word Characters, for example) 4881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fActiveLimit) { 4882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4886fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue > 0 && opValue < URX_LAST_SET); 4888fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 4890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 4891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < 256) { 4892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 4893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s8->contains(c) == FALSE) { 4894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet *s = fPattern->fStaticSets[opValue]; 4898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s->contains(c) == FALSE) { 4899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4905fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4906fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_SETREF: 490850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 490950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 491050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 491150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4914fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 491550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue > 0 && opValue < sets->size()); 491650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 491750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There is input left. Pick up one char and test it for set membership. 491850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 491950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 492050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c<256) { 492150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fSets8[opValue]; 492250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c)) { 492350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The character is in the set. A Match. 492450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 492550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 492650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 492750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); 492850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c)) { 492950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The character is in the set. A Match. 493050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 493150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 493250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4933fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 493450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the character wasn't in the set. 493550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4938fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4939fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOTANY: 4941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 4942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // . matches anything, but stops at end-of-line. 4943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fActiveLimit) { 4944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At end of input. Match failed. Backtrack out. 4945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4949fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // There is input left. Advance over one char, unless we've hit end-of-line 495150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 4952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 49531b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if (isLineTerminator(c)) { 4954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // End of line in normal mode. . does not match. 495550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4960fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4961fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOTANY_ALL: 4963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 496450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // . in dot-matches-all (including new lines) mode 4965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fActiveLimit) { 4966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At end of input. Match failed. Backtrack out. 4967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4971fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // There is input left. Advance over one char, except if we are 4973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // at a cr/lf, advance over both of them. 4974fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c; 4975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 4976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c==0x0d && fp->fInputIdx < fActiveLimit) { 4977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // In the case of a CR/LF, we need to advance over both. 497850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (inputBuf[fp->fInputIdx] == 0x0a) { 497950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_FWD_1(inputBuf, fp->fInputIdx, fActiveLimit); 4980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4984fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4985fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case URX_DOTANY_UNIX: 4987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 4988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // '.' operator, matches all, but stops at end-of-line. 4989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // UNIX_LINES mode, so 0x0a is the only recognized line ending. 4990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fActiveLimit) { 4991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // At end of input. Match failed. Backtrack out. 4992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4996fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 4997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // There is input left. Advance over one char, unless we've hit end-of-line 4998fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c; 4999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 5000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c == 0x0a) { 5001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // End of line in normal mode. '.' does not match the \n 5002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5006fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5007fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP: 5009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = opValue; 5010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5011fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_FAIL: 5013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isMatch = FALSE; 5014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto breakFromLoop; 5015fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP_SAV: 5017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue < fPattern->fCompiledPat->size()); 5018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx, status); // State save to loc following current 5019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp->fPatIdx = opValue; // Then JMP. 5020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5021fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP_SAV_X: 5023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This opcode is used with (x)+, when x can match a zero length string. 5024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Same as JMP_SAV, except conditional on the match having made forward progress. 5025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Destination of the JMP must be a URX_STO_INP_LOC, from which we get the 5026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // data address of the input position at the start of the loop. 5027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue > 0 && opValue < fPattern->fCompiledPat->size()); 502950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t stoOp = (int32_t)pat[opValue-1]; 5030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(URX_TYPE(stoOp) == URX_STO_INP_LOC); 5031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t frameLoc = URX_VAL(stoOp); 5032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(frameLoc >= 0 && frameLoc < fFrameSize); 503350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t prevInputIdx = (int32_t)fp->fExtra[frameLoc]; 5034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(prevInputIdx <= fp->fInputIdx); 5035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (prevInputIdx < fp->fInputIdx) { 5036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The match did make progress. Repeat the loop. 5037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx, status); // State save to loc following current 5038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = opValue; 5039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[frameLoc] = fp->fInputIdx; 5040fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 5041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If the input position did not advance, we do nothing here, 5042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // execution will fall out of the loop. 5043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5045fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_INIT: 5047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(opValue >= 0 && opValue < fFrameSize-2); 504959d709d503bab6e2b61931737e662dd293b40578ccornelius fp->fExtra[opValue] = 0; // Set the loop counter variable to zero 5050fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Pick up the three extra operands that CTR_INIT has, and 5052fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // skip the pattern location counter past 505350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 5054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx += 3; 5055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t loopLoc = URX_VAL(pat[instrOperandLoc]); 505650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[instrOperandLoc+1]; 505750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[instrOperandLoc+2]; 5058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(minCount>=0); 5059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(maxCount>=minCount || maxCount==-1); 506059d709d503bab6e2b61931737e662dd293b40578ccornelius U_ASSERT(loopLoc>=fp->fPatIdx); 5061fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (minCount == 0) { 5063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, loopLoc+1, status); 5064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 506559d709d503bab6e2b61931737e662dd293b40578ccornelius if (maxCount == -1) { 506659d709d503bab6e2b61931737e662dd293b40578ccornelius fp->fExtra[opValue+1] = fp->fInputIdx; // For loop breaking. 506759d709d503bab6e2b61931737e662dd293b40578ccornelius } else if (maxCount == 0) { 5068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5072fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_LOOP: 5074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2); 507650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t initOp = (int32_t)pat[opValue]; 5077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT); 507850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)]; 507950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[opValue+2]; 508050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[opValue+3]; 5081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*pCounter)++; 508259d709d503bab6e2b61931737e662dd293b40578ccornelius if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) { 508359d709d503bab6e2b61931737e662dd293b40578ccornelius U_ASSERT(*pCounter == maxCount); 5084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*pCounter >= minCount) { 508759d709d503bab6e2b61931737e662dd293b40578ccornelius if (maxCount == -1) { 508859d709d503bab6e2b61931737e662dd293b40578ccornelius // Loop has no hard upper bound. 508959d709d503bab6e2b61931737e662dd293b40578ccornelius // Check that it is progressing through the input, break if it is not. 509059d709d503bab6e2b61931737e662dd293b40578ccornelius int64_t *pLastInputIdx = &fp->fExtra[URX_VAL(initOp) + 1]; 509159d709d503bab6e2b61931737e662dd293b40578ccornelius if (fp->fInputIdx == *pLastInputIdx) { 509259d709d503bab6e2b61931737e662dd293b40578ccornelius break; 509359d709d503bab6e2b61931737e662dd293b40578ccornelius } else { 509459d709d503bab6e2b61931737e662dd293b40578ccornelius *pLastInputIdx = fp->fInputIdx; 509559d709d503bab6e2b61931737e662dd293b40578ccornelius } 509659d709d503bab6e2b61931737e662dd293b40578ccornelius } 5097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx, status); 5098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = opValue + 4; // Loop back. 5100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_INIT_NG: 5104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Initialize a non-greedy loop 5106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(opValue >= 0 && opValue < fFrameSize-2); 510759d709d503bab6e2b61931737e662dd293b40578ccornelius fp->fExtra[opValue] = 0; // Set the loop counter variable to zero 5108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 510959d709d503bab6e2b61931737e662dd293b40578ccornelius // Pick up the three extra operands that CTR_INIT_NG has, and 5110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // skip the pattern location counter past 511150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 5112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx += 3; 5113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t loopLoc = URX_VAL(pat[instrOperandLoc]); 511450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[instrOperandLoc+1]; 511550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[instrOperandLoc+2]; 5116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(minCount>=0); 5117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(maxCount>=minCount || maxCount==-1); 5118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(loopLoc>fp->fPatIdx); 511959d709d503bab6e2b61931737e662dd293b40578ccornelius if (maxCount == -1) { 512059d709d503bab6e2b61931737e662dd293b40578ccornelius fp->fExtra[opValue+1] = fp->fInputIdx; // Save initial input index for loop breaking. 512159d709d503bab6e2b61931737e662dd293b40578ccornelius } 5122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (minCount == 0) { 5124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxCount != 0) { 5125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx, status); 5126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = loopLoc+1; // Continue with stuff after repeated block 5128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 5129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_LOOP_NG: 5133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Non-greedy {min, max} loops 5135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2); 513650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t initOp = (int32_t)pat[opValue]; 5137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT_NG); 513850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)]; 513950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[opValue+2]; 514050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[opValue+3]; 514159d709d503bab6e2b61931737e662dd293b40578ccornelius 5142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*pCounter)++; 514359d709d503bab6e2b61931737e662dd293b40578ccornelius if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) { 5144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The loop has matched the maximum permitted number of times. 5145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Break out of here with no action. Matching will 5146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // continue with the following pattern. 514759d709d503bab6e2b61931737e662dd293b40578ccornelius U_ASSERT(*pCounter == maxCount); 5148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*pCounter < minCount) { 5152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We haven't met the minimum number of matches yet. 5153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Loop back for another one. 5154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = opValue + 4; // Loop back. 5155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We do have the minimum number of matches. 515759d709d503bab6e2b61931737e662dd293b40578ccornelius 515859d709d503bab6e2b61931737e662dd293b40578ccornelius // If there is no upper bound on the loop iterations, check that the input index 515959d709d503bab6e2b61931737e662dd293b40578ccornelius // is progressing, and stop the loop if it is not. 516059d709d503bab6e2b61931737e662dd293b40578ccornelius if (maxCount == -1) { 516159d709d503bab6e2b61931737e662dd293b40578ccornelius int64_t *pLastInputIdx = &fp->fExtra[URX_VAL(initOp) + 1]; 516259d709d503bab6e2b61931737e662dd293b40578ccornelius if (fp->fInputIdx == *pLastInputIdx) { 516359d709d503bab6e2b61931737e662dd293b40578ccornelius break; 516459d709d503bab6e2b61931737e662dd293b40578ccornelius } 516559d709d503bab6e2b61931737e662dd293b40578ccornelius *pLastInputIdx = fp->fInputIdx; 516659d709d503bab6e2b61931737e662dd293b40578ccornelius } 516759d709d503bab6e2b61931737e662dd293b40578ccornelius 516859d709d503bab6e2b61931737e662dd293b40578ccornelius // Loop Continuation: we will fall into the pattern following the loop 516959d709d503bab6e2b61931737e662dd293b40578ccornelius // (non-greedy, don't execute loop body first), but first do 517059d709d503bab6e2b61931737e662dd293b40578ccornelius // a state save to the top of the loop, so that a match failure 5171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // in the following pattern will try another iteration of the loop. 5172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, opValue + 4, status); 5173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STO_SP: 5178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize); 5179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData[opValue] = fStack->size(); 5180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LD_SP: 5183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize); 518550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newStackSize = (int32_t)fData[opValue]; 5186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(newStackSize <= fStack->size()); 518750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; 518850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (newFP == (int64_t *)fp) { 5189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 5192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<fFrameSize; i++) { 519350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho newFP[i] = ((int64_t *)fp)[i]; 5194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp = (REStackFrame *)newFP; 5196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStack->setSize(newStackSize); 5197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKREF: 5201103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius { 5202103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U_ASSERT(opValue < fFrameSize); 5203103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int64_t groupStartIdx = fp->fExtra[opValue]; 5204103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int64_t groupEndIdx = fp->fExtra[opValue+1]; 5205103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U_ASSERT(groupStartIdx <= groupEndIdx); 5206103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int64_t inputIndex = fp->fInputIdx; 5207103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (groupStartIdx < 0) { 5208103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // This capture group has not participated in the match thus far, 5209103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match. 5210103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 5211103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 5212103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UBool success = TRUE; 5213103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius for (int64_t groupIndex = groupStartIdx; groupIndex < groupEndIdx; ++groupIndex,++inputIndex) { 5214103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (inputIndex >= fActiveLimit) { 5215103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 5216103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fHitEnd = TRUE; 5217103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 5218103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 5219103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (inputBuf[groupIndex] != inputBuf[inputIndex]) { 5220103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 5221103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 5222103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 5223103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 5224103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (success) { 5225103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fp->fInputIdx = inputIndex; 5226103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 5227103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5228103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 5229103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 5230103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 5231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKREF_I: 5233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(opValue < fFrameSize); 523550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t groupStartIdx = fp->fExtra[opValue]; 523650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t groupEndIdx = fp->fExtra[opValue+1]; 5237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(groupStartIdx <= groupEndIdx); 5238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (groupStartIdx < 0) { 5239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This capture group has not participated in the match thus far, 5240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match. 5241103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 5242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5243103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius CaseFoldingUCharIterator captureGroupItr(inputBuf, groupStartIdx, groupEndIdx); 5244103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius CaseFoldingUCharIterator inputItr(inputBuf, fp->fInputIdx, fActiveLimit); 5245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5246103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Note: if the capture group match was of an empty string the backref 5247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // match succeeds. Verified by testing: Perl matches succeed 5248103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // in this case, so we do too. 5249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5250103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UBool success = TRUE; 5251103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius for (;;) { 5252103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 captureGroupChar = captureGroupItr.next(); 5253103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (captureGroupChar == U_SENTINEL) { 5254103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = TRUE; 5255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5257103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 inputChar = inputItr.next(); 5258103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (inputChar == U_SENTINEL) { 5259103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 5260103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fHitEnd = TRUE; 5261103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 5262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5263103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (inputChar != captureGroupChar) { 5264103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 5265103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 5266103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 5267103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 5268103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 5269103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (success && inputItr.inExpansion()) { 5270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // We otained a match by consuming part of a string obtained from 5271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // case-folding a single code point of the input text. 5272103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // This does not count as an overall match. 5273103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 5274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5275103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 5276103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (success) { 5277103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fp->fInputIdx = inputItr.getIndex(); 5278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5279103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5283103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 5284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STO_INP_LOC: 5285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(opValue >= 0 && opValue < fFrameSize); 5287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[opValue] = fp->fInputIdx; 5288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMPX: 5292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 529350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 5294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx += 1; 5295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t dataLoc = URX_VAL(pat[instrOperandLoc]); 5296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(dataLoc >= 0 && dataLoc < fFrameSize); 529750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t savedInputIdx = (int32_t)fp->fExtra[dataLoc]; 5298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(savedInputIdx <= fp->fInputIdx); 5299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (savedInputIdx < fp->fInputIdx) { 5300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = opValue; // JMP 5301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 530250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no progress in loop. 5303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LA_START: 5308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Entering a lookahead block. 5310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save Stack Ptr, Input Pos. 5311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 5312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData[opValue] = fStack->size(); 5313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData[opValue+1] = fp->fInputIdx; 5314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveStart = fLookStart; // Set the match region change for 5315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = fLookLimit; // transparent bounds. 5316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LA_END: 5320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Leaving a look-ahead block. 5322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // restore Stack Ptr, Input Pos to positions they had on entry to block. 5323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 5324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t stackSize = fStack->size(); 532550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newStackSize = (int32_t)fData[opValue]; 5326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(stackSize >= newStackSize); 5327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (stackSize > newStackSize) { 5328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Copy the current top frame back to the new (cut back) top frame. 5329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This makes the capture groups from within the look-ahead 5330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // expression available. 533150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; 5332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 5333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<fFrameSize; i++) { 533450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho newFP[i] = ((int64_t *)fp)[i]; 5335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp = (REStackFrame *)newFP; 5337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStack->setSize(newStackSize); 5338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fInputIdx = fData[opValue+1]; 5340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Restore the active region bounds in the input string; they may have 5342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // been changed because of transparent bounds on a Region. 5343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveStart = fRegionStart; 5344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = fRegionLimit; 5345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_ONECHAR_I: 5349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx < fActiveLimit) { 5350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c; 5351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 5352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) { 5353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5358c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STRING_I: 5362103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Case-insensitive test input against a literal string. 5363103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Strings require two slots in the compiled pattern, one for the 5364103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // offset to the string text, and one for the length. 5365103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // The compiled string has already been case folded. 5366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5367103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UChar *patternString = litText + opValue; 5368103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 5369103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius op = (int32_t)pat[fp->fPatIdx]; 5370103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fp->fPatIdx++; 5371103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius opType = URX_TYPE(op); 5372103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius opValue = URX_VAL(op); 5373103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U_ASSERT(opType == URX_STRING_LEN); 5374103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t patternStringLen = opValue; // Length of the string from the pattern. 5375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5376103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 cText; 5377103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 cPattern; 5378103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UBool success = TRUE; 5379103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t patternStringIdx = 0; 5380103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius CaseFoldingUCharIterator inputIterator(inputBuf, fp->fInputIdx, fActiveLimit); 5381103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius while (patternStringIdx < patternStringLen) { 5382103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U16_NEXT(patternString, patternStringIdx, patternStringLen, cPattern); 5383103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius cText = inputIterator.next(); 5384103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (cText != cPattern) { 5385103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 5386103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (cText == U_SENTINEL) { 5387103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fHitEnd = TRUE; 538850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 5389103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius break; 5390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5392103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (inputIterator.inExpansion()) { 5393103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius success = FALSE; 5394103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 5395103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 5396103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (success) { 5397103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fp->fInputIdx = inputIterator.getIndex(); 5398103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else { 5399103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5400103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } 5401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5403103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 5404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_START: 5405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Entering a look-behind block. 5407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save Stack Ptr, Input Pos. 5408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // TODO: implement transparent bounds. Ticket #6067 5409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 5410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData[opValue] = fStack->size(); 5411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData[opValue+1] = fp->fInputIdx; 5412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Init the variable containing the start index for attempted matches. 5413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData[opValue+2] = -1; 5414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save input string length, then reset to pin any matches to end at 5415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the current position. 5416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fData[opValue+3] = fActiveLimit; 5417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = fp->fInputIdx; 5418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_CONT: 5423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Positive Look-Behind, at top of loop checking for matches of LB expression 5425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // at all possible input starting positions. 5426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Fetch the min and max possible match lengths. They are the operands 5428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // of this op in the pattern. 542950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minML = (int32_t)pat[fp->fPatIdx++]; 543050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxML = (int32_t)pat[fp->fPatIdx++]; 5431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(minML <= maxML); 5432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(minML >= 0); 5433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Fetch (from data) the last input index where a match was attempted. 5435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 543650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *lbStartIdx = &fData[opValue+2]; 5437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*lbStartIdx < 0) { 5438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // First time through loop. 5439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *lbStartIdx = fp->fInputIdx - minML; 5440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 2nd through nth time through the loop. 5442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Back up start position for match by one. 5443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*lbStartIdx == 0) { 544450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*lbStartIdx)--; 5445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_BACK_1(inputBuf, 0, *lbStartIdx); 5447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) { 5451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We have tried all potential match starting points without 5452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // getting a match. Backtrack out, and out of the 5453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Look Behind altogether. 5454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 545550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t restoreInputLen = fData[opValue+3]; 5456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(restoreInputLen >= fActiveLimit); 545750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(restoreInputLen <= fInputLength); 5458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = restoreInputLen; 5459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save state to this URX_LB_CONT op, so failure to match will repeat the loop. 5463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (successful match will fall off the end of the loop.) 5464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx-3, status); 5465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fInputIdx = *lbStartIdx; 5466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5468fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_END: 5470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // End of a look-behind block, after a successful match. 5471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 5473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx != fActiveLimit) { 5474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The look-behind expression matched, but the match did not 5475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // extend all the way to the point that we are looking behind from. 5476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // FAIL out of here, which will take us back to the LB_CONT, which 5477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // will retry the match starting at another position or fail 5478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the look-behind altogether, whichever is appropriate. 5479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5482fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Look-behind match is good. Restore the orignal input string length, 5484fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // which had been truncated to pin the end of the lookbehind match to the 5485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // position being looked-behind. 548650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t originalInputLen = fData[opValue+3]; 5487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(originalInputLen >= fActiveLimit); 548850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(originalInputLen <= fInputLength); 5489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = originalInputLen; 5490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5492fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LBN_CONT: 5495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Negative Look-Behind, at top of loop checking for matches of LB expression 5497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // at all possible input starting positions. 5498fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Fetch the extra parameters of this op. 550050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minML = (int32_t)pat[fp->fPatIdx++]; 550150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxML = (int32_t)pat[fp->fPatIdx++]; 550250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t continueLoc = (int32_t)pat[fp->fPatIdx++]; 550350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continueLoc = URX_VAL(continueLoc); 5504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(minML <= maxML); 5505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(minML >= 0); 5506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(continueLoc > fp->fPatIdx); 5507fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Fetch (from data) the last input index where a match was attempted. 5509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 551050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *lbStartIdx = &fData[opValue+2]; 5511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*lbStartIdx < 0) { 5512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // First time through loop. 5513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *lbStartIdx = fp->fInputIdx - minML; 5514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 2nd through nth time through the loop. 5516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Back up start position for match by one. 5517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*lbStartIdx == 0) { 5518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*lbStartIdx)--; // Because U16_BACK is unsafe starting at 0. 5519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_BACK_1(inputBuf, 0, *lbStartIdx); 5521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) { 5525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We have tried all potential match starting points without 5526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // getting a match, which means that the negative lookbehind as 5527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a whole has succeeded. Jump forward to the continue location 552850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t restoreInputLen = fData[opValue+3]; 5529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(restoreInputLen >= fActiveLimit); 553050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(restoreInputLen <= fInputLength); 5531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = restoreInputLen; 5532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = continueLoc; 5533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5535fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save state to this URX_LB_CONT op, so failure to match will repeat the loop. 5537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (successful match will cause a FAIL out of the loop altogether.) 5538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx-4, status); 5539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fInputIdx = *lbStartIdx; 5540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5542fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LBN_END: 5544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // End of a negative look-behind block, after a successful match. 5545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 5547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx != fActiveLimit) { 5548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The look-behind expression matched, but the match did not 5549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // extend all the way to the point that we are looking behind from. 5550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // FAIL out of here, which will take us back to the LB_CONT, which 5551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // will retry the match starting at another position or succeed 5552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the look-behind altogether, whichever is appropriate. 5553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5556fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Look-behind expression matched, which means look-behind test as 5558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a whole Fails 5559fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5560fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Restore the orignal input string length, which had been truncated 5561fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // inorder to pin the end of the lookbehind match 5562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to the position being looked-behind. 556350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t originalInputLen = fData[opValue+3]; 5564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(originalInputLen >= fActiveLimit); 556550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(originalInputLen <= fInputLength); 5566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = originalInputLen; 5567fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Restore original stack position, discarding any state saved 5569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // by the successful pattern match. 5570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 557150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newStackSize = (int32_t)fData[opValue]; 5572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(fStack->size() > newStackSize); 5573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStack->setSize(newStackSize); 5574fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5575fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // FAIL, which will take control back to someplace 5576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // prior to entering the look-behind test. 5577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5580fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5581fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_SR_I: 5583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Loop Initialization for the optimized implementation of 5584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [some character set]* 5585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This op scans through all matching input. 5586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The following LOOP_C op emulates stack unwinding if the following pattern fails. 5587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue > 0 && opValue < sets->size()); 5589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Regex8BitSet *s8 = &fPattern->fSets8[opValue]; 5590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); 5591fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Loop through input, until either the input is exhausted or 5593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we reach a character that is not a member of the set. 559450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t ix = (int32_t)fp->fInputIdx; 5595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 5596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ix >= fActiveLimit) { 5597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 5601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, ix, fActiveLimit, c); 5602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c<256) { 5603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s8->contains(c) == FALSE) { 5604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_BACK_1(inputBuf, 0, ix); 5605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s->contains(c) == FALSE) { 5609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_BACK_1(inputBuf, 0, ix); 5610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5614fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If there were no matching characters, skip over the loop altogether. 5616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The loop doesn't run at all, a * op always succeeds. 5617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ix == fp->fInputIdx) { 5618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx++; // skip the URX_LOOP_C op. 5619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5621fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Peek ahead in the compiled pattern, to the URX_LOOP_C that 5623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // must follow. It's operand is the stack location 5624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // that holds the starting input index for the match of this [set]* 562550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t loopcOp = (int32_t)pat[fp->fPatIdx]; 5626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C); 5627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t stackLoc = URX_VAL(loopcOp); 5628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize); 5629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[stackLoc] = fp->fInputIdx; 5630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fInputIdx = ix; 5631fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save State to the URX_LOOP_C op that follows this one, 5633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // so that match failures in the following code will return to there. 5634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Then bump the pattern idx so the LOOP_C is skipped on the way out of here. 5635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx, status); 5636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx++; 5637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5639fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5640fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_DOT_I: 5642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Loop Initialization for the optimized implementation of .* 5643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This op scans through all remaining input. 5644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The following LOOP_C op emulates stack unwinding if the following pattern fails. 5645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Loop through input until the input is exhausted (we reach an end-of-line) 5647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // In DOTALL mode, we can just go straight to the end of the input. 5648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t ix; 5649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((opValue & 1) == 1) { 5650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Dot-matches-All mode. Jump straight to the end of the string. 565150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ix = (int32_t)fActiveLimit; 5652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // NOT DOT ALL mode. Line endings do not match '.' 5655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scan forward until a line ending or end of input. 565650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ix = (int32_t)fp->fInputIdx; 5657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 5658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ix >= fActiveLimit) { 5659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 5663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, ix, fActiveLimit, c); // c = inputBuf[ix++] 566427f654740f2a26ad62a5c155af9199af9e69b889claireho if ((c & 0x7f) <= 0x29) { // Fast filter of non-new-line-s 566527f654740f2a26ad62a5c155af9199af9e69b889claireho if ((c == 0x0a) || // 0x0a is newline in both modes. 566627f654740f2a26ad62a5c155af9199af9e69b889claireho (((opValue & 2) == 0) && // IF not UNIX_LINES mode 56671b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert isLineTerminator(c))) { 5668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // char is a line ending. Put the input pos back to the 5669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // line ending char, and exit the scanning loop. 5670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_BACK_1(inputBuf, 0, ix); 5671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 5672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5676fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If there were no matching characters, skip over the loop altogether. 5678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The loop doesn't run at all, a * op always succeeds. 5679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ix == fp->fInputIdx) { 5680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx++; // skip the URX_LOOP_C op. 5681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5683fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Peek ahead in the compiled pattern, to the URX_LOOP_C that 5685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // must follow. It's operand is the stack location 5686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // that holds the starting input index for the match of this .* 568750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t loopcOp = (int32_t)pat[fp->fPatIdx]; 5688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C); 5689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t stackLoc = URX_VAL(loopcOp); 5690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize); 5691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[stackLoc] = fp->fInputIdx; 5692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fInputIdx = ix; 5693fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save State to the URX_LOOP_C op that follows this one, 5695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // so that match failures in the following code will return to there. 5696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Then bump the pattern idx so the LOOP_C is skipped on the way out of here. 5697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx, status); 5698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx++; 5699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5701fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5702fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_C: 5704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue<fFrameSize); 570650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho backSearchIndex = (int32_t)fp->fExtra[opValue]; 570750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(backSearchIndex <= fp->fInputIdx); 570850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (backSearchIndex == fp->fInputIdx) { 5709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We've backed up the input idx to the point that the loop started. 5710fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // The loop is done. Leave here without saving state. 5711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Subsequent failures won't come back here. 5712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Set up for the next iteration of the loop, with input index 5715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // backed up by one from the last time through, 5716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and a state save to this instruction in case the following code fails again. 5717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (We're going backwards because this loop emulates stack unwinding, not 5718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the initial scan forward.) 5719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(fp->fInputIdx > 0); 572050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 prevC; 572150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, 0, fp->fInputIdx, prevC); // !!!: should this 0 be one of f*Limit? 5722fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5723fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (prevC == 0x0a && 572450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx > backSearchIndex && 5725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru inputBuf[fp->fInputIdx-1] == 0x0d) { 572650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t prevOp = (int32_t)pat[fp->fPatIdx-2]; 5727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (URX_TYPE(prevOp) == URX_LOOP_DOT_I) { 5728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // .*, stepping back over CRLF pair. 572950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_BACK_1(inputBuf, 0, fp->fInputIdx); 5730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5732fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5733fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx-1, status); 5735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5737fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5738fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5739fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 5741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Trouble. The compiled pattern contains an entry with an 5742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // unrecognized type tag. 5743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(FALSE); 5744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5745fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 5747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru isMatch = FALSE; 5748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5751fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerubreakFromLoop: 5753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatch = isMatch; 5754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (isMatch) { 5755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fLastMatchEnd = fMatchEnd; 5756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatchStart = startIdx; 5757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatchEnd = fp->fInputIdx; 5758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5759fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5760fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifdef REGEX_RUN_DEBUG 5761fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (fTraceDebug) { 5762fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (isMatch) { 5763fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("Match. start=%ld end=%ld\n\n", fMatchStart, fMatchEnd); 5764fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 5765fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius printf("No match\n\n"); 5766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5768fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif 5769fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 5770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fFrame = fp; // The active stack frame when the engine stopped. 5771fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Contains the capture group results that we need to 5772fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // access later. 5773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 5775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 5776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexMatcher) 5779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 5781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 5783