1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru************************************************************************** 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 2002-2011 International Business Machines Corporation * 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* and others. All rights reserved. * 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru************************************************************************** 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 8c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// file: rematch.cpp 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Contains the implementation of class RegexMatcher, 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// which is one of the main API classes for the ICU regular expression package. 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/rbbi.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvectr32.h" 2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uvectr64.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regeximp.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regexst.h" 2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "regextxt.h" 3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "ucase.h" 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// #include <malloc.h> // Needed for heapcheck testing 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 3527f654740f2a26ad62a5c155af9199af9e69b889claireho// Find progress callback 3627f654740f2a26ad62a5c155af9199af9e69b889claireho// ---------------------- 3727f654740f2a26ad62a5c155af9199af9e69b889claireho// Macro to inline test & call to ReportFindProgress(). Eliminates unnecessary function call. 3827f654740f2a26ad62a5c155af9199af9e69b889claireho// 3927f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEXFINDPROGRESS_INTERRUPT(pos, status) \ 4027f654740f2a26ad62a5c155af9199af9e69b889claireho (fFindProgressCallbackFn != NULL) && (ReportFindProgress(pos, status) == FALSE) 4127f654740f2a26ad62a5c155af9199af9e69b889claireho 4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Smart Backtracking 4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// ------------------ 4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// When a failure would go back to a LOOP_C instruction, 4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// strings, characters, and setrefs scan backwards for a valid start 4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// character themselves, pop the stack, and save state, emulating the 4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// LOOP_C's effect but assured that the next character of input is a 4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// possible matching character. 5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Good idea in theory; unfortunately it only helps out a few specific 5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// cases and slows the engine down a little in the rest. 5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//#define REGEX_SMART_BACKTRACKING 1 5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 58c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Default limit for the size of the back track stack, to avoid system 59c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// failures causedby heap exhaustion. Units are in 32 bit words, not bytes. 60c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// This value puts ICU's limits higher than most other regexp implementations, 61c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// which use recursion rather than the heap, and take more storage per 62c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// backtrack point. 63c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 64c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t DEFAULT_BACKTRACK_STACK_CAPACITY = 8000000; 65c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 66c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Time limit counter constant. 67c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Time limits for expression evaluation are in terms of quanta of work by 68c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// the engine, each of which is 10,000 state saves. 69c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// This constant determines that state saves per tick number. 70c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t TIMER_INITIAL_VALUE = 10000; 71c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------------------- 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Constructor and Destructor 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------------------- 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher::RegexMatcher(const RegexPattern *pat) { 78c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fDeferredStatus = U_ZERO_ERROR; 79c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru init(fDeferredStatus); 80c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 81c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 82c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pat==NULL) { 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fDeferredStatus = U_ILLEGAL_ARGUMENT_ERROR; 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 87c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPattern = pat; 8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init2(RegexStaticSets::gStaticSets->fEmptyText, fDeferredStatus); 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher::RegexMatcher(const UnicodeString ®exp, const UnicodeString &input, 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t flags, UErrorCode &status) { 95c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru init(status); 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 99c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UParseError pe; 100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPatternOwned = RegexPattern::compile(regexp, flags, pe, status); 101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPattern = fPatternOwned; 10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText inputText = UTEXT_INITIALIZER; 10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openConstUnicodeString(&inputText, &input, &status); 10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init2(&inputText, status); 10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&inputText); 10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputUniStrMaybeMutable = TRUE; 10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 11250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher::RegexMatcher(UText *regexp, UText *input, 11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, UErrorCode &status) { 11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init(status); 11550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 11650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 11750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternOwned = RegexPattern::compile(regexp, flags, pe, status); 12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPattern = fPatternOwned; 125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru init2(input, status); 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher::RegexMatcher(const UnicodeString ®exp, 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t flags, UErrorCode &status) { 131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru init(status); 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UParseError pe; 136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPatternOwned = RegexPattern::compile(regexp, flags, pe, status); 13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPattern = fPatternOwned; 14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init2(RegexStaticSets::gStaticSets->fEmptyText, status); 14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 14450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher::RegexMatcher(UText *regexp, 14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint32_t flags, UErrorCode &status) { 14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init(status); 14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UParseError pe; 15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPatternOwned = RegexPattern::compile(regexp, flags, pe, status); 15250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPattern = fPatternOwned; 15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init2(RegexStaticSets::gStaticSets->fEmptyText, status); 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher::~RegexMatcher() { 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fStack; 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fData != fSmallData) { 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(fData); 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData = NULL; 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fPatternOwned) { 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fPatternOwned; 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPatternOwned = NULL; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPattern = NULL; 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInput) { 17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete fInput; 17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInputText) { 17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(fInputText); 18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fAltInputText) { 18250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(fAltInputText); 18350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru #if UCONFIG_NO_BREAK_ITERATION==0 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fWordBreakItr; 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru #endif 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// init() common initialization for use by all constructors. 192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Initialize all fields, get the object into a consistent state. 193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// This must be done even when the initial status shows an error, 194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// so that the object is initialized sufficiently well for the destructor 195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// to run safely. 196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid RegexMatcher::init(UErrorCode &status) { 198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPattern = NULL; 199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fPatternOwned = NULL; 200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFrameSize = 0; 201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRegionStart = 0; 202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRegionLimit = 0; 203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fAnchorStart = 0; 204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fAnchorLimit = 0; 205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fLookStart = 0; 206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fLookLimit = 0; 207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveStart = 0; 208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = 0; 209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fTransparentBounds = FALSE; 210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fAnchoringBounds = TRUE; 211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fMatch = FALSE; 212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fMatchStart = 0; 213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fMatchEnd = 0; 214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fLastMatchEnd = -1; 215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fAppendPosition = 0; 216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = FALSE; 217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRequireEnd = FALSE; 218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fStack = NULL; 219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFrame = NULL; 220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fTimeLimit = 0; 221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fTime = 0; 222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fTickCounter = 0; 223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fStackLimit = DEFAULT_BACKTRACK_STACK_CAPACITY; 224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fCallbackFn = NULL; 225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fCallbackContext = NULL; 22627f654740f2a26ad62a5c155af9199af9e69b889claireho fFindProgressCallbackFn = NULL; 22727f654740f2a26ad62a5c155af9199af9e69b889claireho fFindProgressCallbackContext = NULL; 228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fTraceDebug = FALSE; 229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fDeferredStatus = status; 230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fData = fSmallData; 231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fWordBreakItr = NULL; 232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStack = new UVector64(status); 23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputText = NULL; 23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAltInputText = NULL; 23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInput = NULL; 23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputLength = 0; 23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputUniStrMaybeMutable = FALSE; 23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fDeferredStatus = status; 242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// init2() Common initialization for use by RegexMatcher constructors, part 2. 247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// This handles the common setup to be done after the Pattern is available. 248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 24950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::init2(UText *input, UErrorCode &status) { 250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fDeferredStatus = status; 252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(fSmallData[0]))) { 25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData = (int64_t *)uprv_malloc(fPattern->fDataSize * sizeof(int64_t)); 257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fData == NULL) { 258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru reset(input); 264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setStackLimit(DEFAULT_BACKTRACK_STACK_CAPACITY, status); 265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fDeferredStatus = status; 267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar BACKSLASH = 0x5c; 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar DOLLARSIGN = 0x24; 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// appendReplacement 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher &RegexMatcher::appendReplacement(UnicodeString &dest, 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString &replacement, 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) { 28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText replacementText = UTEXT_INITIALIZER; 28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openConstUnicodeString(&replacementText, &replacement, &status); 28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText resultText = UTEXT_INITIALIZER; 28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &dest, &status); 28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho appendReplacement(&resultText, &replacementText, status); 29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&resultText); 29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&replacementText); 29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// appendReplacement, UText mode 30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 30250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::appendReplacement(UText *dest, 30350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *replacement, 30450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) { 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = fDeferredStatus; 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatch == FALSE) { 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_REGEX_INVALID_STATE; 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copy input string from the end of previous match to start of current match 31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t destLen = utext_nativeLength(dest); 31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatchStart > fAppendPosition) { 32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 32127f654740f2a26ad62a5c155af9199af9e69b889claireho destLen += utext_replace(dest, destLen, destLen, fInputText->chunkContents+fAppendPosition, 32227f654740f2a26ad62a5c155af9199af9e69b889claireho (int32_t)(fMatchStart-fAppendPosition), &status); 32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len16; 32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_USES_U16(fInputText)) { 32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = (int32_t)(fMatchStart-fAppendPosition); 32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = utext_extract(fInputText, fAppendPosition, fMatchStart, NULL, 0, &lengthStatus); 33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1)); 33227f654740f2a26ad62a5c155af9199af9e69b889claireho if (inputChars == NULL) { 33327f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_MEMORY_ALLOCATION_ERROR; 33427f654740f2a26ad62a5c155af9199af9e69b889claireho return *this; 33527f654740f2a26ad62a5c155af9199af9e69b889claireho } 33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(fInputText, fAppendPosition, fMatchStart, inputChars, len16+1, &status); 33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, inputChars, len16, &status); 33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(inputChars); 33950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fAppendPosition = fMatchEnd; 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // scan the replacement text, looking for substitutions ($n) and \escapes. 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO: optimize this loop by efficiently scanning for '$' or '\', 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // move entire ranges not containing substitutions. 34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(replacement, 0); 34850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(replacement); 34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (c != U_SENTINEL) { 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == BACKSLASH) { 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Backslash Escape. Copy the following char out without further checks. 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note: Surrogate pairs don't need any special handling 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The second half wont be a '$' or a '\', and 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // will move to the dest normally on the next 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // loop iteration. 35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_CURRENT32(replacement); 35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == U_SENTINEL) { 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c==0x55/*U*/ || c==0x75/*u*/) { 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We have a \udddd or \Udddddddd escape sequence. 36350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t offset = 0; 36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho struct URegexUTextUnescapeCharContext context = U_REGEX_UTEXT_UNESCAPE_CONTEXT(replacement); 36550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 escapedChar = u_unescapeAt(uregex_utext_unescape_charAt, &offset, INT32_MAX, &context); 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (escapedChar != (UChar32)0xFFFFFFFF) { 36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_IS_BMP(escapedChar)) { 36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c16 = (UChar)escapedChar; 36950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status); 37050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 37150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar surrogate[2]; 37250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho surrogate[0] = U16_LEAD(escapedChar); 37350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho surrogate[1] = U16_TRAIL(escapedChar); 37450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 37550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status); 37650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 37750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO: Report errors for mal-formed \u escapes? 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // As this is, the original sequence is output, which may be OK. 38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (context.lastOffset == offset) { 381b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_PREVIOUS32(replacement); 38250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (context.lastOffset != offset-1) { 38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_moveIndex32(replacement, offset - context.lastOffset - 1); 38450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 38550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 38650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 387b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_NEXT32(replacement); 38850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Plain backslash escape. Just put out the escaped character. 38950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_IS_BMP(c)) { 39050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c16 = (UChar)c; 39150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status); 39250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 39350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar surrogate[2]; 39450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho surrogate[0] = U16_LEAD(c); 39550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho surrogate[1] = U16_TRAIL(c); 39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 39750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status); 39850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 40150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (c != DOLLARSIGN) { 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Normal char, not a $. Copy it out without further checks. 40350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_IS_BMP(c)) { 40450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c16 = (UChar)c; 40550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status); 40650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar surrogate[2]; 40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho surrogate[0] = U16_LEAD(c); 40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho surrogate[1] = U16_TRAIL(c); 41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status); 41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We've got a $. Pick up a capture group number if one follows. 41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Consume at most the number of digits necessary for the largest capture 41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // number that is valid for this pattern. 41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 41950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t numDigits = 0; 42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t groupNum = 0; 42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 digitC; 42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho digitC = UTEXT_CURRENT32(replacement); 42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (digitC == U_SENTINEL) { 42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (u_isdigit(digitC) == FALSE) { 42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 430b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_NEXT32(replacement); 43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho groupNum=groupNum*10 + u_charDigitValue(digitC); 43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho numDigits++; 43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (numDigits >= fPattern->fMaxCaptureDigits) { 43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (numDigits == 0) { 44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The $ didn't introduce a group number at all. 44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Treat it as just part of the substitution text. 44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c16 = DOLLARSIGN; 44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status); 44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Finally, append the capture group data to the destination. 44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destLen += appendGroup(groupNum, dest, status); 44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Can fail if group number is out of range. 44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 45650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(replacement); 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// appendTail Intended to be used in conjunction with appendReplacement() 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// To the destination string, append everything following 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// the last match position from the input string. 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 472c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Note: Match ranges do not affect appendTail or appendReplacement 473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString &RegexMatcher::appendTail(UnicodeString &dest) { 47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText resultText = UTEXT_INITIALIZER; 47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &dest, &status); 47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 48127f654740f2a26ad62a5c155af9199af9e69b889claireho appendTail(&resultText, status); 48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&resultText); 48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 48750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 48850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// appendTail, UText mode 49050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 49127f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::appendTail(UText *dest, UErrorCode &status) { 49227f654740f2a26ad62a5c155af9199af9e69b889claireho UBool bailOut = FALSE; 49327f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 49427f654740f2a26ad62a5c155af9199af9e69b889claireho bailOut = TRUE; 49527f654740f2a26ad62a5c155af9199af9e69b889claireho } 49627f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(fDeferredStatus)) { 49727f654740f2a26ad62a5c155af9199af9e69b889claireho status = fDeferredStatus; 49827f654740f2a26ad62a5c155af9199af9e69b889claireho bailOut = TRUE; 49927f654740f2a26ad62a5c155af9199af9e69b889claireho } 50027f654740f2a26ad62a5c155af9199af9e69b889claireho 50127f654740f2a26ad62a5c155af9199af9e69b889claireho if (bailOut) { 50227f654740f2a26ad62a5c155af9199af9e69b889claireho // dest must not be NULL 50327f654740f2a26ad62a5c155af9199af9e69b889claireho if (dest) { 50427f654740f2a26ad62a5c155af9199af9e69b889claireho utext_replace(dest, utext_nativeLength(dest), utext_nativeLength(dest), NULL, 0, &status); 50527f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 50627f654740f2a26ad62a5c155af9199af9e69b889claireho } 50727f654740f2a26ad62a5c155af9199af9e69b889claireho } 50827f654740f2a26ad62a5c155af9199af9e69b889claireho 50950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInputLength > fAppendPosition) { 51050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 51150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t destLen = utext_nativeLength(dest); 51227f654740f2a26ad62a5c155af9199af9e69b889claireho utext_replace(dest, destLen, destLen, fInputText->chunkContents+fAppendPosition, 51327f654740f2a26ad62a5c155af9199af9e69b889claireho (int32_t)(fInputLength-fAppendPosition), &status); 51450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 51550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len16; 51650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_USES_U16(fInputText)) { 51750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = (int32_t)(fInputLength-fAppendPosition); 51850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 51950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = utext_extract(fInputText, fAppendPosition, fInputLength, NULL, 0, &status); 52050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; // buffer overflow 52150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 52250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 52350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16)); 52427f654740f2a26ad62a5c155af9199af9e69b889claireho if (inputChars == NULL) { 52527f654740f2a26ad62a5c155af9199af9e69b889claireho fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; 52627f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 52727f654740f2a26ad62a5c155af9199af9e69b889claireho utext_extract(fInputText, fAppendPosition, fInputLength, inputChars, len16, &status); // unterminated 52827f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t destLen = utext_nativeLength(dest); 52927f654740f2a26ad62a5c155af9199af9e69b889claireho utext_replace(dest, destLen, destLen, inputChars, len16, &status); 53027f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(inputChars); 53127f654740f2a26ad62a5c155af9199af9e69b889claireho } 53250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return dest; 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// end 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t RegexMatcher::end(UErrorCode &err) const { 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return end(0, err); 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 54827f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::end64(UErrorCode &err) const { 54927f654740f2a26ad62a5c155af9199af9e69b889claireho return end64(0, err); 55027f654740f2a26ad62a5c155af9199af9e69b889claireho} 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 55227f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::end64(int32_t group, UErrorCode &err) const { 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatch == FALSE) { 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_REGEX_INVALID_STATE; 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (group < 0 || group > fPattern->fGroupMap->size()) { 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_INDEX_OUTOFBOUNDS_ERROR; 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 56450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t e = -1; 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (group == 0) { 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru e = fMatchEnd; 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Get the position within the stack frame of the variables for 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // this capture group. 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t groupOffset = fPattern->fGroupMap->elementAti(group-1); 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(groupOffset < fPattern->fFrameSize); 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(groupOffset >= 0); 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru e = fFrame->fExtra[groupOffset + 1]; 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 57550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 57627f654740f2a26ad62a5c155af9199af9e69b889claireho return e; 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 57927f654740f2a26ad62a5c155af9199af9e69b889clairehoint32_t RegexMatcher::end(int32_t group, UErrorCode &err) const { 58027f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)end64(group, err); 58127f654740f2a26ad62a5c155af9199af9e69b889claireho} 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// find() 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool RegexMatcher::find() { 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Start at the position of the last match end. (Will be zero if the 59150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // matcher has been reset.) 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 59650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 59750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 59850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return findUsingChunk(); 59950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 60150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t startPos = fMatchEnd; 602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (startPos==0) { 603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru startPos = fActiveStart; 604c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatch) { 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save the position of any previous successful match. 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fLastMatchEnd = fMatchEnd; 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatchStart == fMatchEnd) { 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Previous match had zero length. Move start position up one position 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to avoid sending find() into a loop on zero-length matches. 613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (startPos >= fActiveLimit) { 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatch = FALSE; 615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 61850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 619b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_NEXT32(fInputText); 62050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = UTEXT_GETNATIVEINDEX(fInputText); 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fLastMatchEnd >= 0) { 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // A previous find() failed to match. Don't try again. 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (without this test, a pattern with a zero-length match 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // could match again at the end of an input string.) 627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compute the position in the input string beyond which a match can not begin, because 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the minimum length match would extend past the end of the input. 635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Note: some patterns that cannot match anything will have fMinMatchLength==Max Int. 636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Be aware of possible overflows if making changes here. 63750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t testStartLimit; 63850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_USES_U16(fInputText)) { 63950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho testStartLimit = fActiveLimit - fPattern->fMinMatchLen; 64050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos > testStartLimit) { 64150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 64250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 64350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 64450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 64550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 64650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // For now, let the matcher discover that it can't match on its own 64750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We don't know how long the match len is in native characters 64850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho testStartLimit = fActiveLimit; 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(startPos >= 0); 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (fPattern->fStartType) { 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case START_NO_INFO: 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No optimization was found. 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Try a match at each input position. 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru MatchAt(startPos, FALSE, fDeferredStatus); 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatch) { 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 66650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= testStartLimit) { 667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 67050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 671b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_NEXT32(fInputText); 67250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = UTEXT_GETNATIVEINDEX(fInputText); 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note that it's perfectly OK for a pattern to have a zero-length 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // match at the end of a string, so we must make sure that the loop 67550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // runs with startPos == testStartLimit the last time through. 67627f654740f2a26ad62a5c155af9199af9e69b889claireho if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 67727f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(FALSE); 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case START_START: 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Matches are only possible at the start of the input string 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (pattern begins with ^ or \A) 684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (startPos > fActiveStart) { 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatch = FALSE; 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru MatchAt(startPos, FALSE, fDeferredStatus); 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fMatch; 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case START_SET: 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Match may start on any char from a pre-computed set. 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(fPattern->fMinMatchLen > 0); 69950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t pos; 70050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 70250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 70350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pos = UTEXT_GETNATIVEINDEX(fInputText); 70450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // c will be -1 (U_SENTINEL) at end of text, in which case we 70550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // skip this next block (so we don't have a negative array index) 70650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // and handle end of text in the following block. 70727f654740f2a26ad62a5c155af9199af9e69b889claireho if (c >= 0 && ((c<256 && fPattern->fInitialChars8->contains(c)) || 70827f654740f2a26ad62a5c155af9199af9e69b889claireho (c>=256 && fPattern->fInitialChars->contains(c)))) { 70950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchAt(startPos, FALSE, fDeferredStatus); 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatch) { 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 71650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, pos); 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 71850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= testStartLimit) { 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatch = FALSE; 720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 72350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = pos; 72427f654740f2a26ad62a5c155af9199af9e69b889claireho if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 72527f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(FALSE); 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case START_STRING: 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case START_CHAR: 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Match starts on exactly one char. 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(fPattern->fMinMatchLen > 0); 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 theChar = fPattern->fInitialChar; 73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t pos; 73750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 73950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 74050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pos = UTEXT_GETNATIVEINDEX(fInputText); 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == theChar) { 74250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchAt(startPos, FALSE, fDeferredStatus); 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatch) { 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 74950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, pos); 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 75150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= testStartLimit) { 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatch = FALSE; 753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 75650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = pos; 75727f654740f2a26ad62a5c155af9199af9e69b889claireho if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 75827f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 75950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(FALSE); 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case START_LINE: 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (startPos == fAnchorStart) { 767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru MatchAt(startPos, FALSE, fDeferredStatus); 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fMatch) { 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 77450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 77550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 77650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = UTEXT_GETNATIVEINDEX(fInputText); 77750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 77850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 77950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_PREVIOUS32(fInputText); 78050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fPattern->fFlags & UREGEX_UNIX_LINES) { 78450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c == 0x0a) { 786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru MatchAt(startPos, FALSE, fDeferredStatus); 787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fMatch) { 791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return TRUE; 792c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 79350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 79550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= testStartLimit) { 796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fMatch = FALSE; 797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 80050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 80150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = UTEXT_GETNATIVEINDEX(fInputText); 802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Note that it's perfectly OK for a pattern to have a zero-length 803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // match at the end of a string, so we must make sure that the loop 80450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // runs with startPos == testStartLimit the last time through. 80527f654740f2a26ad62a5c155af9199af9e69b889claireho if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 80627f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (;;) { 810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (((c & 0x7f) <= 0x29) && // First quickly bypass as many chars as possible 811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029 )) { 81250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0d && startPos < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) { 813b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_NEXT32(fInputText); 81450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = UTEXT_GETNATIVEINDEX(fInputText); 815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru MatchAt(startPos, FALSE, fDeferredStatus); 817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fMatch) { 821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return TRUE; 822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 82350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, startPos); 824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 82550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= testStartLimit) { 826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fMatch = FALSE; 827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 83050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 83150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = UTEXT_GETNATIVEINDEX(fInputText); 832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Note that it's perfectly OK for a pattern to have a zero-length 833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // match at the end of a string, so we must make sure that the loop 83450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // runs with startPos == testStartLimit the last time through. 83527f654740f2a26ad62a5c155af9199af9e69b889claireho if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 83627f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(FALSE); 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(FALSE); 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 85127f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool RegexMatcher::find(int64_t start, UErrorCode &status) { 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = fDeferredStatus; 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru this->reset(); // Note: Reset() is specified by Java Matcher documentation. 860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This will reset the region to be the full input length. 86150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (start < 0) { 86250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 86350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 86450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 86550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 86627f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t nativeStart = start; 86727f654740f2a26ad62a5c155af9199af9e69b889claireho if (nativeStart < fActiveStart || nativeStart > fActiveLimit) { 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_INDEX_OUTOFBOUNDS_ERROR; 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 87150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatchEnd = nativeStart; 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return find(); 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 87850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// findUsingChunk() -- like find(), but with the advance knowledge that the 87950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// entire string is available in the UText's chunk buffer. 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 88250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::findUsingChunk() { 88350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Start at the position of the last match end. (Will be zero if the 88450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // matcher has been reset. 88550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 88750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t startPos = (int32_t)fMatchEnd; 88850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos==0) { 88950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos = (int32_t)fActiveStart; 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 89150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 89250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *inputBuf = fInputText->chunkContents; 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 89450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch) { 89550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save the position of any previous successful match. 89650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLastMatchEnd = fMatchEnd; 89750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 89850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatchStart == fMatchEnd) { 89950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Previous match had zero length. Move start position up one position 90050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // to avoid sending find() into a loop on zero-length matches. 90150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= fActiveLimit) { 90250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 90350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 90450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 90550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 90650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_FWD_1(inputBuf, startPos, fInputLength); 90750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 90850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 90950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fLastMatchEnd >= 0) { 91050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // A previous find() failed to match. Don't try again. 91150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (without this test, a pattern with a zero-length match 91250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // could match again at the end of an input string.) 91350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 91450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 91550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 91750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 91850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 91950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compute the position in the input string beyond which a match can not begin, because 92050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the minimum length match would extend past the end of the input. 92150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Note: some patterns that cannot match anything will have fMinMatchLength==Max Int. 92250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Be aware of possible overflows if making changes here. 92350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t testLen = (int32_t)(fActiveLimit - fPattern->fMinMatchLen); 92450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos > testLen) { 92550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 92650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 92950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 93050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 93150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(startPos >= 0); 93250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 93350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho switch (fPattern->fStartType) { 93450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case START_NO_INFO: 93550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // No optimization was found. 93650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Try a match at each input position. 93750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 93850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt(startPos, FALSE, fDeferredStatus); 93950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 94050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 94150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 94250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch) { 94350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 94450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 94550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= testLen) { 94650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 94750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 94850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 94950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_FWD_1(inputBuf, startPos, fActiveLimit); 95050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Note that it's perfectly OK for a pattern to have a zero-length 95150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // match at the end of a string, so we must make sure that the loop 95250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // runs with startPos == testLen the last time through. 95327f654740f2a26ad62a5c155af9199af9e69b889claireho if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 95427f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 95550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 95650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(FALSE); 95750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 95850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case START_START: 95950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Matches are only possible at the start of the input string 96050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (pattern begins with ^ or \A) 96150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos > fActiveStart) { 96250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 96350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 96450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 96550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt(startPos, FALSE, fDeferredStatus); 96650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 96750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 96850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 96950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fMatch; 97050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 97150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 97250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case START_SET: 97350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 97450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Match may start on any char from a pre-computed set. 97550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fPattern->fMinMatchLen > 0); 97650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 97750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t pos = startPos; 97850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, startPos, fActiveLimit, c); // like c = inputBuf[startPos++]; 97927f654740f2a26ad62a5c155af9199af9e69b889claireho if ((c<256 && fPattern->fInitialChars8->contains(c)) || 98027f654740f2a26ad62a5c155af9199af9e69b889claireho (c>=256 && fPattern->fInitialChars->contains(c))) { 98150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt(pos, FALSE, fDeferredStatus); 98250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 98350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 98450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 98550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch) { 98650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 98750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 98850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 98950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (pos >= testLen) { 99050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 99150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 99250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 99350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 99427f654740f2a26ad62a5c155af9199af9e69b889claireho if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 99527f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 99650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 99850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(FALSE); 99950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 100050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case START_STRING: 100150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case START_CHAR: 100250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 100350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Match starts on exactly one char. 100450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fPattern->fMinMatchLen > 0); 100550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 theChar = fPattern->fInitialChar; 100650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 100750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t pos = startPos; 100850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, startPos, fActiveLimit, c); // like c = inputBuf[startPos++]; 100950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == theChar) { 101050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt(pos, FALSE, fDeferredStatus); 101150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 101250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 101350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 101450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch) { 101550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 101650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 101750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 101850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (pos >= testLen) { 101950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 102050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 102150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 102250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 102327f654740f2a26ad62a5c155af9199af9e69b889claireho if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 102427f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 102550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 102750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(FALSE); 102850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 102950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case START_LINE: 103050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 103150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 103250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos == fAnchorStart) { 103350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt(startPos, FALSE, fDeferredStatus); 103450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 103550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 103650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 103750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch) { 103850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 103950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 104050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_FWD_1(inputBuf, startPos, fActiveLimit); 104150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 104250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 104350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPattern->fFlags & UREGEX_UNIX_LINES) { 104450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 104550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = inputBuf[startPos-1]; 104650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0a) { 104750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt(startPos, FALSE, fDeferredStatus); 104850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 104950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 105050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 105150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch) { 105250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 105350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 105450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 105550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= testLen) { 105650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 105750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 105850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 105950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 106050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_FWD_1(inputBuf, startPos, fActiveLimit); 106150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Note that it's perfectly OK for a pattern to have a zero-length 106250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // match at the end of a string, so we must make sure that the loop 106350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // runs with startPos == testLen the last time through. 106427f654740f2a26ad62a5c155af9199af9e69b889claireho if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 106527f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 106650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 106750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 106850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 106950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = inputBuf[startPos-1]; 107050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (((c & 0x7f) <= 0x29) && // First quickly bypass as many chars as possible 107150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029 )) { 107250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) { 107350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho startPos++; 107450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 107550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt(startPos, FALSE, fDeferredStatus); 107650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 107750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 107850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 107950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch) { 108050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return TRUE; 108150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 108250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 108350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (startPos >= testLen) { 108450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 108550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 108650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 108750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 108850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_FWD_1(inputBuf, startPos, fActiveLimit); 108950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Note that it's perfectly OK for a pattern to have a zero-length 109050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // match at the end of a string, so we must make sure that the loop 109150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // runs with startPos == testLen the last time through. 109227f654740f2a26ad62a5c155af9199af9e69b889claireho if (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus)) 109327f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 109450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 109550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 109750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 109850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho default: 109950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(FALSE); 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 110150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 110250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(FALSE); 110350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 111050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// group() 1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 111350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::group(UErrorCode &status) const { 111450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return group(0, status); 111550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 111650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 111727f654740f2a26ad62a5c155af9199af9e69b889claireho// Return immutable shallow clone 111827f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::group(UText *dest, int64_t &group_len, UErrorCode &status) const { 111927f654740f2a26ad62a5c155af9199af9e69b889claireho return group(0, dest, group_len, status); 112050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 112150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 112227f654740f2a26ad62a5c155af9199af9e69b889claireho// Return immutable shallow clone 112327f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const { 112427f654740f2a26ad62a5c155af9199af9e69b889claireho group_len = 0; 112527f654740f2a26ad62a5c155af9199af9e69b889claireho UBool bailOut = FALSE; 112627f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 112727f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 112827f654740f2a26ad62a5c155af9199af9e69b889claireho } 112927f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(fDeferredStatus)) { 113027f654740f2a26ad62a5c155af9199af9e69b889claireho status = fDeferredStatus; 113127f654740f2a26ad62a5c155af9199af9e69b889claireho bailOut = TRUE; 113227f654740f2a26ad62a5c155af9199af9e69b889claireho } 113327f654740f2a26ad62a5c155af9199af9e69b889claireho if (fMatch == FALSE) { 113427f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_REGEX_INVALID_STATE; 113527f654740f2a26ad62a5c155af9199af9e69b889claireho bailOut = TRUE; 113627f654740f2a26ad62a5c155af9199af9e69b889claireho } 113727f654740f2a26ad62a5c155af9199af9e69b889claireho if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) { 113827f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 113927f654740f2a26ad62a5c155af9199af9e69b889claireho bailOut = TRUE; 114027f654740f2a26ad62a5c155af9199af9e69b889claireho } 114127f654740f2a26ad62a5c155af9199af9e69b889claireho 114227f654740f2a26ad62a5c155af9199af9e69b889claireho if (bailOut) { 114327f654740f2a26ad62a5c155af9199af9e69b889claireho return (dest) ? dest : utext_openUChars(NULL, NULL, 0, &status); 114427f654740f2a26ad62a5c155af9199af9e69b889claireho } 114527f654740f2a26ad62a5c155af9199af9e69b889claireho 114627f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t s, e; 114727f654740f2a26ad62a5c155af9199af9e69b889claireho if (groupNum == 0) { 114827f654740f2a26ad62a5c155af9199af9e69b889claireho s = fMatchStart; 114927f654740f2a26ad62a5c155af9199af9e69b889claireho e = fMatchEnd; 115027f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 115127f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1); 115227f654740f2a26ad62a5c155af9199af9e69b889claireho U_ASSERT(groupOffset < fPattern->fFrameSize); 115327f654740f2a26ad62a5c155af9199af9e69b889claireho U_ASSERT(groupOffset >= 0); 115427f654740f2a26ad62a5c155af9199af9e69b889claireho s = fFrame->fExtra[groupOffset]; 115527f654740f2a26ad62a5c155af9199af9e69b889claireho e = fFrame->fExtra[groupOffset+1]; 115627f654740f2a26ad62a5c155af9199af9e69b889claireho } 115750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 115827f654740f2a26ad62a5c155af9199af9e69b889claireho if (s < 0) { 115927f654740f2a26ad62a5c155af9199af9e69b889claireho // A capture group wasn't part of the match 116027f654740f2a26ad62a5c155af9199af9e69b889claireho return utext_clone(dest, fInputText, FALSE, TRUE, &status); 116127f654740f2a26ad62a5c155af9199af9e69b889claireho } 116227f654740f2a26ad62a5c155af9199af9e69b889claireho U_ASSERT(s <= e); 116327f654740f2a26ad62a5c155af9199af9e69b889claireho group_len = e - s; 116427f654740f2a26ad62a5c155af9199af9e69b889claireho 116527f654740f2a26ad62a5c155af9199af9e69b889claireho dest = utext_clone(dest, fInputText, FALSE, TRUE, &status); 116627f654740f2a26ad62a5c155af9199af9e69b889claireho if (dest) 116727f654740f2a26ad62a5c155af9199af9e69b889claireho UTEXT_SETNATIVEINDEX(dest, s); 116827f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 116927f654740f2a26ad62a5c155af9199af9e69b889claireho} 117050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 117150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const { 117250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString result; 117327f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 117427f654740f2a26ad62a5c155af9199af9e69b889claireho return result; 117527f654740f2a26ad62a5c155af9199af9e69b889claireho } 117650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText resultText = UTEXT_INITIALIZER; 117750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &result, &status); 117850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho group(groupNum, &resultText, status); 117950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&resultText); 118050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return result; 118150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 118250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 118350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 118427f654740f2a26ad62a5c155af9199af9e69b889claireho// Return deep (mutable) clone 118527f654740f2a26ad62a5c155af9199af9e69b889claireho// Technology Preview (as an API), but note that the UnicodeString API is implemented 118627f654740f2a26ad62a5c155af9199af9e69b889claireho// using this function. 118750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUText *RegexMatcher::group(int32_t groupNum, UText *dest, UErrorCode &status) const { 118850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool bailOut = FALSE; 1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 119027f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = fDeferredStatus; 119450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bailOut = TRUE; 1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 119650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 119750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch == FALSE) { 119850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_INVALID_STATE; 119950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bailOut = TRUE; 120050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 120150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) { 120250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 120350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho bailOut = TRUE; 120450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 120550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 120650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (bailOut) { 120750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest) { 120850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status); 120950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 121050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 121150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return utext_openUChars(NULL, NULL, 0, &status); 121250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 121350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 121450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 121550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t s, e; 121650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (groupNum == 0) { 121750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho s = fMatchStart; 121850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho e = fMatchEnd; 121950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 122050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1); 122150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(groupOffset < fPattern->fFrameSize); 122250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(groupOffset >= 0); 122350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho s = fFrame->fExtra[groupOffset]; 122450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho e = fFrame->fExtra[groupOffset+1]; 122550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 122650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 122750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s < 0) { 122850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // A capture group wasn't part of the match 122950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest) { 123050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status); 123150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 123250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 123350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return utext_openUChars(NULL, NULL, 0, &status); 123450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 123550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 123650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(s <= e); 123750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 123850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 123950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(e <= fInputLength); 124050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest) { 124150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest, 0, utext_nativeLength(dest), fInputText->chunkContents+s, (int32_t)(e-s), &status); 124250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 124350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText groupText = UTEXT_INITIALIZER; 124450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&groupText, fInputText->chunkContents+s, e-s, &status); 124550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest = utext_clone(NULL, &groupText, TRUE, FALSE, &status); 124650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&groupText); 124750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 124850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 124950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len16; 125050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_USES_U16(fInputText)) { 125150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = (int32_t)(e-s); 125250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 125350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 125450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = utext_extract(fInputText, s, e, NULL, 0, &lengthStatus); 125550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 125650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *groupChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1)); 125727f654740f2a26ad62a5c155af9199af9e69b889claireho if (groupChars == NULL) { 125827f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_MEMORY_ALLOCATION_ERROR; 125927f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 126027f654740f2a26ad62a5c155af9199af9e69b889claireho } 126150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(fInputText, s, e, groupChars, len16+1, &status); 126250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 126350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest) { 126450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest, 0, utext_nativeLength(dest), groupChars, len16, &status); 126550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 126650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText groupText = UTEXT_INITIALIZER; 126750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&groupText, groupChars, len16, &status); 126850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest = utext_clone(NULL, &groupText, TRUE, FALSE, &status); 126950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&groupText); 127050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 127150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 127250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(groupChars); 127350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 127450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 127750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 127850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 127950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// appendGroup() -- currently internal only, appends a group to a UText rather 128050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// than replacing its contents 128150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 128250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 128350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 128450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint64_t RegexMatcher::appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const { 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 128627f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = fDeferredStatus; 129027f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 129227f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t destLen = utext_nativeLength(dest); 129350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 129450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch == FALSE) { 129550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_INVALID_STATE; 129650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return utext_replace(dest, destLen, destLen, NULL, 0, &status); 129750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 129850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) { 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_INDEX_OUTOFBOUNDS_ERROR; 130050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return utext_replace(dest, destLen, destLen, NULL, 0, &status); 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 130250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 130350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t s, e; 130450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (groupNum == 0) { 130550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho s = fMatchStart; 130650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho e = fMatchEnd; 130750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 130850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1); 130950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(groupOffset < fPattern->fFrameSize); 131050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(groupOffset >= 0); 131150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho s = fFrame->fExtra[groupOffset]; 131250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho e = fFrame->fExtra[groupOffset+1]; 131350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 131450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 131550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s < 0) { 131650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // A capture group wasn't part of the match 131750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return utext_replace(dest, destLen, destLen, NULL, 0, &status); 131850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 131950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(s <= e); 132050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 132150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t deltaLen; 132250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 132350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(e <= fInputLength); 132450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho deltaLen = utext_replace(dest, destLen, destLen, fInputText->chunkContents+s, (int32_t)(e-s), &status); 132550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 132650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len16; 132750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_USES_U16(fInputText)) { 132850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = (int32_t)(e-s); 132950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 133050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 133150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = utext_extract(fInputText, s, e, NULL, 0, &lengthStatus); 133250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 133350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *groupChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1)); 133427f654740f2a26ad62a5c155af9199af9e69b889claireho if (groupChars == NULL) { 133527f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_MEMORY_ALLOCATION_ERROR; 133627f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 133727f654740f2a26ad62a5c155af9199af9e69b889claireho } 133850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(fInputText, s, e, groupChars, len16+1, &status); 133950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 134050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho deltaLen = utext_replace(dest, destLen, destLen, groupChars, len16, &status); 134150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(groupChars); 134250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 134350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return deltaLen; 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 135050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// groupCount() 1351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 135350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::groupCount() const { 135450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fPattern->fGroupMap->size(); 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// hasAnchoringBounds() 1362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 136450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::hasAnchoringBounds() const { 136550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fAnchoringBounds; 1366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 137150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// hasTransparentBounds() 1372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 137450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::hasTransparentBounds() const { 137550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fTransparentBounds; 1376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 137950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 138250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// hitEnd() 1383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 138550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::hitEnd() const { 138650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fHitEnd; 1387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 139250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// input() 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 139550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UnicodeString &RegexMatcher::input() const { 139650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!fInput) { 139750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 139850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t len16; 139950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_USES_U16(fInputText)) { 140050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = (int32_t)fInputLength; 140150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 140250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho len16 = utext_extract(fInputText, 0, fInputLength, NULL, 0, &status); 140350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; // overflow, length status 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 140550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString *result = new UnicodeString(len16, 0, 0); 140650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 140750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *inputChars = result->getBuffer(len16); 140850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(fInputText, 0, fInputLength, inputChars, len16, &status); // unterminated warning 140950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result->releaseBuffer(len16); 141050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 141150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*(const UnicodeString **)&fInput) = result; // pointer assignment, rather than operator= 141250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 141350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 141450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *fInput; 141550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 141650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 141750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 141850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 141950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// inputText() 142050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 142150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 142250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUText *RegexMatcher::inputText() const { 142350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fInputText; 142450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 142550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 142650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 142750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 142850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 142950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// getInput() -- like inputText(), but makes a clone or copies into another UText 143050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 143150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 143227f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::getInput (UText *dest, UErrorCode &status) const { 143327f654740f2a26ad62a5c155af9199af9e69b889claireho UBool bailOut = FALSE; 143427f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 143527f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 143627f654740f2a26ad62a5c155af9199af9e69b889claireho } 143727f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(fDeferredStatus)) { 143827f654740f2a26ad62a5c155af9199af9e69b889claireho status = fDeferredStatus; 143927f654740f2a26ad62a5c155af9199af9e69b889claireho bailOut = TRUE; 144027f654740f2a26ad62a5c155af9199af9e69b889claireho } 144127f654740f2a26ad62a5c155af9199af9e69b889claireho 144227f654740f2a26ad62a5c155af9199af9e69b889claireho if (bailOut) { 144327f654740f2a26ad62a5c155af9199af9e69b889claireho if (dest) { 144427f654740f2a26ad62a5c155af9199af9e69b889claireho utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status); 144527f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 144627f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 144727f654740f2a26ad62a5c155af9199af9e69b889claireho return utext_clone(NULL, fInputText, FALSE, TRUE, &status); 144827f654740f2a26ad62a5c155af9199af9e69b889claireho } 144927f654740f2a26ad62a5c155af9199af9e69b889claireho } 145027f654740f2a26ad62a5c155af9199af9e69b889claireho 145150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest) { 145250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 145350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest, 0, utext_nativeLength(dest), fInputText->chunkContents, (int32_t)fInputLength, &status); 145450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 145550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t input16Len; 145650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_USES_U16(fInputText)) { 145750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho input16Len = (int32_t)fInputLength; 145850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 145950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 146050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho input16Len = utext_extract(fInputText, 0, fInputLength, NULL, 0, &lengthStatus); // buffer overflow error 146150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 146250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(input16Len)); 146327f654740f2a26ad62a5c155af9199af9e69b889claireho if (inputChars == NULL) { 146427f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 146527f654740f2a26ad62a5c155af9199af9e69b889claireho } 146650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 146750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 146850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(fInputText, 0, fInputLength, inputChars, input16Len, &status); // not terminated warning 146950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ZERO_ERROR; 147050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest, 0, utext_nativeLength(dest), inputChars, input16Len, &status); 147150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 147250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(inputChars); 147350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 147450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 147550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 147650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return utext_clone(NULL, fInputText, FALSE, TRUE, &status); 1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 148150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool compat_SyncMutableUTextContents(UText *ut); 148250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool compat_SyncMutableUTextContents(UText *ut) { 148350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool retVal = FALSE; 148450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 148550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // In the following test, we're really only interested in whether the UText should switch 148650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // between heap and stack allocation. If length hasn't changed, we won't, so the chunkContents 148750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // will still point to the correct data. 148850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (utext_nativeLength(ut) != ut->nativeIndexingLimit) { 148950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString *us=(UnicodeString *)ut->context; 149050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 149150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Update to the latest length. 149250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // For example, (utext_nativeLength(ut) != ut->nativeIndexingLimit). 149350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newLength = us->length(); 149450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 149550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Update the chunk description. 149650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The buffer may have switched between stack- and heap-based. 149750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ut->chunkContents = us->getBuffer(); 149850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ut->chunkLength = newLength; 149950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ut->chunkNativeLimit = newLength; 150050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ut->nativeIndexingLimit = newLength; 150150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho retVal = TRUE; 150250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 150450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return retVal; 150550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 150950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// lookingAt() 1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 151250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::lookingAt(UErrorCode &status) { 1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 151450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = fDeferredStatus; 151850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 151950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 152050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 152150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInputUniStrMaybeMutable) { 152250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (compat_SyncMutableUTextContents(fInputText)) { 152350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputLength = utext_nativeLength(fInputText); 152450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 152550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 152650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 152750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else { 152850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resetPreserveRegion(); 152950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 153050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 153150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt((int32_t)fActiveStart, FALSE, status); 153250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 153350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchAt(fActiveStart, FALSE, status); 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 153550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fMatch; 153650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 153750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 153927f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool RegexMatcher::lookingAt(int64_t start, UErrorCode &status) { 154050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 154150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 154250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 154350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 154450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = fDeferredStatus; 154550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 154650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reset(); 154850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 154950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (start < 0) { 155050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 155150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 155250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 155350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 155450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInputUniStrMaybeMutable) { 155550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (compat_SyncMutableUTextContents(fInputText)) { 155650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputLength = utext_nativeLength(fInputText); 155750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 155850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 156150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t nativeStart; 156227f654740f2a26ad62a5c155af9199af9e69b889claireho nativeStart = start; 156327f654740f2a26ad62a5c155af9199af9e69b889claireho if (nativeStart < fActiveStart || nativeStart > fActiveLimit) { 156450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 156550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 156650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 156750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 156850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 156950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt((int32_t)nativeStart, FALSE, status); 157050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 157150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchAt(nativeStart, FALSE, status); 157250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 157350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fMatch; 1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 158050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// matches() 1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 158350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::matches(UErrorCode &status) { 158450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 158550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 158650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 158750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 158850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = fDeferredStatus; 158950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 159050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 159250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInputUniStrMaybeMutable) { 159350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (compat_SyncMutableUTextContents(fInputText)) { 159450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputLength = utext_nativeLength(fInputText); 159550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 159650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 159750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 159850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else { 159950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resetPreserveRegion(); 160050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 160250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 160350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt((int32_t)fActiveStart, TRUE, status); 160450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 160550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchAt(fActiveStart, TRUE, status); 160650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 160750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fMatch; 1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 161127f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool RegexMatcher::matches(int64_t start, UErrorCode &status) { 161250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 161350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 161450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 161550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 161650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = fDeferredStatus; 161750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 161850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reset(); 162050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 162150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (start < 0) { 162250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 162350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 162650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInputUniStrMaybeMutable) { 162750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (compat_SyncMutableUTextContents(fInputText)) { 162850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputLength = utext_nativeLength(fInputText); 162950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 163050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 163150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 163350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t nativeStart; 163427f654740f2a26ad62a5c155af9199af9e69b889claireho nativeStart = start; 163527f654740f2a26ad62a5c155af9199af9e69b889claireho if (nativeStart < fActiveStart || nativeStart > fActiveLimit) { 1636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_INDEX_OUTOFBOUNDS_ERROR; 163750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 164050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) { 164150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchChunkAt((int32_t)nativeStart, TRUE, status); 164250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 164350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho MatchAt(nativeStart, TRUE, status); 164450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 164550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fMatch; 164650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 165250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// pattern 1653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 165550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst RegexPattern &RegexMatcher::pattern() const { 165650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *fPattern; 1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 166150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 166350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// region 1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 166550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 166627f654740f2a26ad62a5c155af9199af9e69b889clairehoRegexMatcher &RegexMatcher::region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status) { 1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 166850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 167050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 167127f654740f2a26ad62a5c155af9199af9e69b889claireho if (regionStart>regionLimit || regionStart<0 || regionLimit<0) { 167250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ILLEGAL_ARGUMENT_ERROR; 1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 167450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 167527f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t nativeStart = regionStart; 167627f654740f2a26ad62a5c155af9199af9e69b889claireho int64_t nativeLimit = regionLimit; 167727f654740f2a26ad62a5c155af9199af9e69b889claireho if (nativeStart > fInputLength || nativeLimit > fInputLength) { 167827f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ILLEGAL_ARGUMENT_ERROR; 1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 168050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 168127f654740f2a26ad62a5c155af9199af9e69b889claireho if (startIndex == -1) 168227f654740f2a26ad62a5c155af9199af9e69b889claireho this->reset(); 168327f654740f2a26ad62a5c155af9199af9e69b889claireho else 168427f654740f2a26ad62a5c155af9199af9e69b889claireho resetPreserveRegion(); 168527f654740f2a26ad62a5c155af9199af9e69b889claireho 168650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRegionStart = nativeStart; 168750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRegionLimit = nativeLimit; 168850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveStart = nativeStart; 168950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = nativeLimit; 169027f654740f2a26ad62a5c155af9199af9e69b889claireho 169127f654740f2a26ad62a5c155af9199af9e69b889claireho if (startIndex != -1) { 169227f654740f2a26ad62a5c155af9199af9e69b889claireho if (startIndex < fActiveStart || startIndex > fActiveLimit) { 169327f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 169427f654740f2a26ad62a5c155af9199af9e69b889claireho } 169527f654740f2a26ad62a5c155af9199af9e69b889claireho fMatchEnd = startIndex; 169627f654740f2a26ad62a5c155af9199af9e69b889claireho } 169727f654740f2a26ad62a5c155af9199af9e69b889claireho 169850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!fTransparentBounds) { 169950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLookStart = nativeStart; 170050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLookLimit = nativeLimit; 170150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 170250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fAnchoringBounds) { 170350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAnchorStart = nativeStart; 170450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAnchorLimit = nativeLimit; 170550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 170650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 170927f654740f2a26ad62a5c155af9199af9e69b889clairehoRegexMatcher &RegexMatcher::region(int64_t start, int64_t limit, UErrorCode &status) { 171027f654740f2a26ad62a5c155af9199af9e69b889claireho return region(start, limit, -1, status); 171127f654740f2a26ad62a5c155af9199af9e69b889claireho} 1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 171550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// regionEnd 1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 171850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::regionEnd() const { 171927f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)fRegionLimit; 1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 172227f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::regionEnd64() const { 172327f654740f2a26ad62a5c155af9199af9e69b889claireho return fRegionLimit; 172427f654740f2a26ad62a5c155af9199af9e69b889claireho} 1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 172850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// regionStart 1729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 173150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::regionStart() const { 173227f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)fRegionStart; 173327f654740f2a26ad62a5c155af9199af9e69b889claireho} 173427f654740f2a26ad62a5c155af9199af9e69b889claireho 173527f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::regionStart64() const { 173627f654740f2a26ad62a5c155af9199af9e69b889claireho return fRegionStart; 1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 174250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// replaceAll 1743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 174550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::replaceAll(const UnicodeString &replacement, UErrorCode &status) { 174650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText replacementText = UTEXT_INITIALIZER; 174750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText resultText = UTEXT_INITIALIZER; 174850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString resultString; 174927f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 175027f654740f2a26ad62a5c155af9199af9e69b889claireho return resultString; 175127f654740f2a26ad62a5c155af9199af9e69b889claireho } 175250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 175350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openConstUnicodeString(&replacementText, &replacement, &status); 175450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &resultString, &status); 175550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 175650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho replaceAll(&replacementText, &resultText, status); 1757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 175850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&resultText); 175950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&replacementText); 176050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 176150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return resultString; 1762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 176450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 176650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// replaceAll, UText mode 1767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 176850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUText *RegexMatcher::replaceAll(UText *replacement, UText *dest, UErrorCode &status) { 1769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 177027f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 1771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 1773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = fDeferredStatus; 177427f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 1775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 177650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 177750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest == NULL) { 177850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString emptyString; 177950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText empty = UTEXT_INITIALIZER; 178050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 178150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&empty, &emptyString, &status); 178250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest = utext_clone(NULL, &empty, TRUE, FALSE, &status); 178350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&empty); 1784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 178550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 178650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_SUCCESS(status)) { 178750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 178850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (find()) { 178950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho appendReplacement(dest, replacement, status); 179050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 179150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 179250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 179350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 179427f654740f2a26ad62a5c155af9199af9e69b889claireho appendTail(dest, status); 179550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 179650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 179750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 1798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 180350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// replaceFirst 1804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 180650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::replaceFirst(const UnicodeString &replacement, UErrorCode &status) { 180750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText replacementText = UTEXT_INITIALIZER; 180850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText resultText = UTEXT_INITIALIZER; 180950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString resultString; 181050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 181150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openConstUnicodeString(&replacementText, &replacement, &status); 181250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&resultText, &resultString, &status); 181350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 181450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho replaceFirst(&replacementText, &resultText, status); 181550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 181650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&resultText); 181750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&replacementText); 181850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 181950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return resultString; 1820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 182350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// replaceFirst, UText mode 1824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 182550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUText *RegexMatcher::replaceFirst(UText *replacement, UText *dest, UErrorCode &status) { 1826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 182727f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 1828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(fDeferredStatus)) { 1830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = fDeferredStatus; 183127f654740f2a26ad62a5c155af9199af9e69b889claireho return dest; 1832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 183350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru reset(); 183550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!find()) { 183627f654740f2a26ad62a5c155af9199af9e69b889claireho return getInput(dest, status); 183750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 1838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 183950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest == NULL) { 184050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString emptyString; 184150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText empty = UTEXT_INITIALIZER; 184250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 184350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUnicodeString(&empty, &emptyString, &status); 184450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest = utext_clone(NULL, &empty, TRUE, FALSE, &status); 184550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&empty); 1846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 184750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 184850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho appendReplacement(dest, replacement, status); 184927f654740f2a26ad62a5c155af9199af9e69b889claireho appendTail(dest, status); 185050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 185150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 1852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 185750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// requireEnd 1858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 186050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::requireEnd() const { 186150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fRequireEnd; 1862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 1866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 186750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// reset 1868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 1869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 187050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::reset() { 187150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRegionStart = 0; 187250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRegionLimit = fInputLength; 187350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveStart = 0; 187450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = fInputLength; 187550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAnchorStart = 0; 187650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAnchorLimit = fInputLength; 187750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLookStart = 0; 187850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLookLimit = fInputLength; 187950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho resetPreserveRegion(); 188050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 188150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 188250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 188350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 188450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 188550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::resetPreserveRegion() { 188650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatchStart = 0; 188750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatchEnd = 0; 188850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLastMatchEnd = -1; 188950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAppendPosition = 0; 189050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = FALSE; 189150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = FALSE; 189250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = FALSE; 189350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTime = 0; 189450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTickCounter = TIMER_INITIAL_VALUE; 189550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho //resetStack(); // more expensive than it looks... 189650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 189750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 189850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 189950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::reset(const UnicodeString &input) { 190050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputText = utext_openConstUnicodeString(fInputText, &input, &fDeferredStatus); 190127f654740f2a26ad62a5c155af9199af9e69b889claireho if (fPattern->fNeedsAltInput) { 190227f654740f2a26ad62a5c155af9199af9e69b889claireho fAltInputText = utext_clone(fAltInputText, fInputText, FALSE, TRUE, &fDeferredStatus); 190327f654740f2a26ad62a5c155af9199af9e69b889claireho } 190450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputLength = utext_nativeLength(fInputText); 190550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 190650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 190750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete fInput; 190850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInput = NULL; 190950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 191050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Do the following for any UnicodeString. 191150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This is for compatibility for those clients who modify the input string "live" during regex operations. 191250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputUniStrMaybeMutable = TRUE; 191350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 191450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fWordBreakItr != NULL) { 191550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==0 191650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 191750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fWordBreakItr->setText(fInputText, status); 191850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 191950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 192050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 192150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 192250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 192350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 192450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::reset(UText *input) { 192550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fInputText != input) { 192650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputText = utext_clone(fInputText, input, FALSE, TRUE, &fDeferredStatus); 192750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fPattern->fNeedsAltInput) fAltInputText = utext_clone(fAltInputText, fInputText, FALSE, TRUE, &fDeferredStatus); 192850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputLength = utext_nativeLength(fInputText); 192950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 193050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete fInput; 193150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInput = NULL; 193250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 193350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fWordBreakItr != NULL) { 193450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==0 193550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 193650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fWordBreakItr->setText(input, status); 193750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 193850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 193950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 194050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 194150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fInputUniStrMaybeMutable = FALSE; 194250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 194350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 194450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 194550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 194650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*RegexMatcher &RegexMatcher::reset(const UChar *) { 194750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fDeferredStatus = U_INTERNAL_PROGRAM_ERROR; 194850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 194950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}*/ 195050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 195127f654740f2a26ad62a5c155af9199af9e69b889clairehoRegexMatcher &RegexMatcher::reset(int64_t position, UErrorCode &status) { 195250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 195350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 195450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 195550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); // Reset also resets the region to be the entire string. 195650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 195727f654740f2a26ad62a5c155af9199af9e69b889claireho if (position < 0 || position > fActiveLimit) { 195850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 195950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 196050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 196127f654740f2a26ad62a5c155af9199af9e69b889claireho fMatchEnd = position; 196250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 1963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 196527f654740f2a26ad62a5c155af9199af9e69b889claireho 19662e615e9896b12236afe0ff2695e8afc2ee73f961claireho//-------------------------------------------------------------------------------- 19672e615e9896b12236afe0ff2695e8afc2ee73f961claireho// 19682e615e9896b12236afe0ff2695e8afc2ee73f961claireho// refresh 19692e615e9896b12236afe0ff2695e8afc2ee73f961claireho// 19702e615e9896b12236afe0ff2695e8afc2ee73f961claireho//-------------------------------------------------------------------------------- 19712e615e9896b12236afe0ff2695e8afc2ee73f961clairehoRegexMatcher &RegexMatcher::refreshInputText(UText *input, UErrorCode &status) { 19722e615e9896b12236afe0ff2695e8afc2ee73f961claireho if (U_FAILURE(status)) { 19732e615e9896b12236afe0ff2695e8afc2ee73f961claireho return *this; 19742e615e9896b12236afe0ff2695e8afc2ee73f961claireho } 19752e615e9896b12236afe0ff2695e8afc2ee73f961claireho if (input == NULL) { 19762e615e9896b12236afe0ff2695e8afc2ee73f961claireho status = U_ILLEGAL_ARGUMENT_ERROR; 19772e615e9896b12236afe0ff2695e8afc2ee73f961claireho return *this; 19782e615e9896b12236afe0ff2695e8afc2ee73f961claireho } 19792e615e9896b12236afe0ff2695e8afc2ee73f961claireho if (utext_nativeLength(fInputText) != utext_nativeLength(input)) { 19802e615e9896b12236afe0ff2695e8afc2ee73f961claireho status = U_ILLEGAL_ARGUMENT_ERROR; 19812e615e9896b12236afe0ff2695e8afc2ee73f961claireho return *this; 19822e615e9896b12236afe0ff2695e8afc2ee73f961claireho } 19832e615e9896b12236afe0ff2695e8afc2ee73f961claireho int64_t pos = utext_getNativeIndex(fInputText); 19842e615e9896b12236afe0ff2695e8afc2ee73f961claireho // Shallow read-only clone of the new UText into the existing input UText 19852e615e9896b12236afe0ff2695e8afc2ee73f961claireho fInputText = utext_clone(fInputText, input, FALSE, TRUE, &status); 19862e615e9896b12236afe0ff2695e8afc2ee73f961claireho if (U_FAILURE(status)) { 19872e615e9896b12236afe0ff2695e8afc2ee73f961claireho return *this; 19882e615e9896b12236afe0ff2695e8afc2ee73f961claireho } 19892e615e9896b12236afe0ff2695e8afc2ee73f961claireho utext_setNativeIndex(fInputText, pos); 1990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 19912e615e9896b12236afe0ff2695e8afc2ee73f961claireho if (fAltInputText != NULL) { 19922e615e9896b12236afe0ff2695e8afc2ee73f961claireho pos = utext_getNativeIndex(fAltInputText); 19932e615e9896b12236afe0ff2695e8afc2ee73f961claireho fAltInputText = utext_clone(fAltInputText, input, FALSE, TRUE, &status); 19942e615e9896b12236afe0ff2695e8afc2ee73f961claireho if (U_FAILURE(status)) { 19952e615e9896b12236afe0ff2695e8afc2ee73f961claireho return *this; 19962e615e9896b12236afe0ff2695e8afc2ee73f961claireho } 19972e615e9896b12236afe0ff2695e8afc2ee73f961claireho utext_setNativeIndex(fAltInputText, pos); 19982e615e9896b12236afe0ff2695e8afc2ee73f961claireho } 19992e615e9896b12236afe0ff2695e8afc2ee73f961claireho return *this; 20002e615e9896b12236afe0ff2695e8afc2ee73f961claireho} 2001b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 200250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 200350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 2005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 200650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// setTrace 2007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 2008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//-------------------------------------------------------------------------------- 200950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setTrace(UBool state) { 201050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTraceDebug = state; 2011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 2012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 2013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 201450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 201550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------- 2016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 201750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// split 2018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 201950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------- 202050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::split(const UnicodeString &input, 202150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString dest[], 202250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 202350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) 202450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 202550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText inputText = UTEXT_INITIALIZER; 202650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openConstUnicodeString(&inputText, &input, &status); 202727f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 202827f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 202927f654740f2a26ad62a5c155af9199af9e69b889claireho } 203050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 203127f654740f2a26ad62a5c155af9199af9e69b889claireho UText **destText = (UText **)uprv_malloc(sizeof(UText*)*destCapacity); 203227f654740f2a26ad62a5c155af9199af9e69b889claireho if (destText == NULL) { 203327f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_MEMORY_ALLOCATION_ERROR; 203427f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 203527f654740f2a26ad62a5c155af9199af9e69b889claireho } 203650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 203750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i = 0; i < destCapacity; i++) { 203850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destText[i] = utext_openUnicodeString(NULL, &dest[i], &status); 203950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 204050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 204150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t fieldCount = split(&inputText, destText, destCapacity, status); 204250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 204350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i = 0; i < destCapacity; i++) { 204450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(destText[i]); 204550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 204727f654740f2a26ad62a5c155af9199af9e69b889claireho uprv_free(destText); 204850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&inputText); 204950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fieldCount; 205050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 2051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 205350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// split, UText mode 205450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 205550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::split(UText *input, 205650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText *dest[], 205750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 205850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) 205950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{ 206050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 206150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check arguements for validity 206250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 206350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 206450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 206550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }; 206650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 206750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (destCapacity < 1) { 206850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ILLEGAL_ARGUMENT_ERROR; 206950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 207050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 207150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 207250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 207350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Reset for the input text 207450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 207550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(input); 207650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t nextOutputStringStart = 0; 207750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fActiveLimit == 0) { 207850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 207950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 208050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 208150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 208250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Loop through the input text, searching for the delimiter pattern 208350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 208450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 208550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t numCaptureGroups = fPattern->fGroupMap->size(); 208650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; ; i++) { 208750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (i>=destCapacity-1) { 208850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There is one or zero output string left. 208950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fill the last output string with whatever is left from the input, then exit the loop. 209050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // ( i will be == destCapacity if we filled the output array while processing 209150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // capture groups of the delimiter expression, in which case we will discard the 209250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // last capture group saved in favor of the unprocessed remainder of the 209350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // input string.) 209450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho i = destCapacity-1; 209550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fActiveLimit > nextOutputStringStart) { 209650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) { 209750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest[i]) { 209827f654740f2a26ad62a5c155af9199af9e69b889claireho utext_replace(dest[i], 0, utext_nativeLength(dest[i]), 209927f654740f2a26ad62a5c155af9199af9e69b889claireho input->chunkContents+nextOutputStringStart, 210027f654740f2a26ad62a5c155af9199af9e69b889claireho (int32_t)(fActiveLimit-nextOutputStringStart), &status); 210150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 210250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText remainingText = UTEXT_INITIALIZER; 210327f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart, 210427f654740f2a26ad62a5c155af9199af9e69b889claireho fActiveLimit-nextOutputStringStart, &status); 210550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 210650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&remainingText); 210750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 210850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 210950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 211027f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t remaining16Length = 211127f654740f2a26ad62a5c155af9199af9e69b889claireho utext_extract(input, nextOutputStringStart, fActiveLimit, NULL, 0, &lengthStatus); 211250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1)); 211327f654740f2a26ad62a5c155af9199af9e69b889claireho if (remainingChars == NULL) { 211427f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_MEMORY_ALLOCATION_ERROR; 211527f654740f2a26ad62a5c155af9199af9e69b889claireho break; 211627f654740f2a26ad62a5c155af9199af9e69b889claireho } 211727f654740f2a26ad62a5c155af9199af9e69b889claireho 211850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(input, nextOutputStringStart, fActiveLimit, remainingChars, remaining16Length+1, &status); 211950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest[i]) { 212050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status); 212150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 212250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText remainingText = UTEXT_INITIALIZER; 212350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&remainingText, remainingChars, remaining16Length, &status); 212450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 212550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&remainingText); 212650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 212750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 212850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(remainingChars); 212950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 213050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 213150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 213250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 213350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (find()) { 213450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We found another delimiter. Move everything from where we started looking 213550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // up until the start of the delimiter into the next output string. 213650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) { 213750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest[i]) { 213827f654740f2a26ad62a5c155af9199af9e69b889claireho utext_replace(dest[i], 0, utext_nativeLength(dest[i]), 213927f654740f2a26ad62a5c155af9199af9e69b889claireho input->chunkContents+nextOutputStringStart, 214027f654740f2a26ad62a5c155af9199af9e69b889claireho (int32_t)(fMatchStart-nextOutputStringStart), &status); 214150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 214250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText remainingText = UTEXT_INITIALIZER; 214327f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart, 214427f654740f2a26ad62a5c155af9199af9e69b889claireho fMatchStart-nextOutputStringStart, &status); 214550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 214650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&remainingText); 214750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 214850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 214950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 215050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t remaining16Length = utext_extract(input, nextOutputStringStart, fMatchStart, NULL, 0, &lengthStatus); 215150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1)); 215227f654740f2a26ad62a5c155af9199af9e69b889claireho if (remainingChars == NULL) { 215327f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_MEMORY_ALLOCATION_ERROR; 215427f654740f2a26ad62a5c155af9199af9e69b889claireho break; 215527f654740f2a26ad62a5c155af9199af9e69b889claireho } 215650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(input, nextOutputStringStart, fMatchStart, remainingChars, remaining16Length+1, &status); 215750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest[i]) { 215850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status); 215950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 216050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText remainingText = UTEXT_INITIALIZER; 216150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&remainingText, remainingChars, remaining16Length, &status); 216250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 216350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&remainingText); 216450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 216550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 216650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(remainingChars); 216750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 216850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho nextOutputStringStart = fMatchEnd; 216950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 217050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If the delimiter pattern has capturing parentheses, the captured 217150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // text goes out into the next n destination strings. 217250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t groupNum; 217350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) { 2174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (i >= destCapacity-2) { 2175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Never fill the last available output string with capture group text. 2176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // It will filled with the last field, the remainder of the 2177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // unsplit input text. 217850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 217950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 218050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho i++; 218150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i] = group(groupNum, dest[i], status); 218250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 218350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 218450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (nextOutputStringStart == fActiveLimit) { 2185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // The delimiter was at the end of the string. We're done, but first 2186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // we output one last empty string, for the empty field following 2187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // the delimiter at the end of input. 2188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (i+1 < destCapacity) { 2189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++i; 2190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (dest[i] == NULL) { 2191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho dest[i] = utext_openUChars(NULL, NULL, 0, &status); 2192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 2193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static UChar emptyString[] = {(UChar)0}; 2194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho utext_replace(dest[i], 0, utext_nativeLength(dest[i]), emptyString, 0, &status); 2195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 219650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 2197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 2198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 2199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 220050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 220150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else 220250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 220350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We ran off the end of the input while looking for the next delimiter. 220450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // All the remaining text goes into the current output string. 220550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) { 220650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest[i]) { 220727f654740f2a26ad62a5c155af9199af9e69b889claireho utext_replace(dest[i], 0, utext_nativeLength(dest[i]), 220827f654740f2a26ad62a5c155af9199af9e69b889claireho input->chunkContents+nextOutputStringStart, 220927f654740f2a26ad62a5c155af9199af9e69b889claireho (int32_t)(fActiveLimit-nextOutputStringStart), &status); 221050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 221150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText remainingText = UTEXT_INITIALIZER; 221227f654740f2a26ad62a5c155af9199af9e69b889claireho utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart, 221327f654740f2a26ad62a5c155af9199af9e69b889claireho fActiveLimit-nextOutputStringStart, &status); 221450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 221550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&remainingText); 221650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 221750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 221850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode lengthStatus = U_ZERO_ERROR; 221950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t remaining16Length = utext_extract(input, nextOutputStringStart, fActiveLimit, NULL, 0, &lengthStatus); 222050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1)); 222127f654740f2a26ad62a5c155af9199af9e69b889claireho if (remainingChars == NULL) { 222227f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_MEMORY_ALLOCATION_ERROR; 222327f654740f2a26ad62a5c155af9199af9e69b889claireho break; 222427f654740f2a26ad62a5c155af9199af9e69b889claireho } 222550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 222650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_extract(input, nextOutputStringStart, fActiveLimit, remainingChars, remaining16Length+1, &status); 222750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (dest[i]) { 222850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status); 222950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 223050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UText remainingText = UTEXT_INITIALIZER; 223150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_openUChars(&remainingText, remainingChars, remaining16Length, &status); 223250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status); 223350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utext_close(&remainingText); 223450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 223550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 223650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uprv_free(remainingChars); 223750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 223850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 223950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 224027f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 224127f654740f2a26ad62a5c155af9199af9e69b889claireho break; 224227f654740f2a26ad62a5c155af9199af9e69b889claireho } 224327f654740f2a26ad62a5c155af9199af9e69b889claireho } // end of for loop 224450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return i+1; 224550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 224650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 224750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 224850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 224950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 225050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// start 225150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 225250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 225350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::start(UErrorCode &status) const { 225450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return start(0, status); 225550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 225650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 225727f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::start64(UErrorCode &status) const { 225827f654740f2a26ad62a5c155af9199af9e69b889claireho return start64(0, status); 225927f654740f2a26ad62a5c155af9199af9e69b889claireho} 226050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 226150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 226250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 226350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// start(int32_t group, UErrorCode &status) 226450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 226550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 226627f654740f2a26ad62a5c155af9199af9e69b889claireho 226727f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::start64(int32_t group, UErrorCode &status) const { 226850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 226950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return -1; 227050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 227150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 227250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = fDeferredStatus; 227350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return -1; 227450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 227550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fMatch == FALSE) { 227650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_INVALID_STATE; 227750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return -1; 227850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 227950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (group < 0 || group > fPattern->fGroupMap->size()) { 228050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_INDEX_OUTOFBOUNDS_ERROR; 228150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return -1; 228250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 228350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t s; 228450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (group == 0) { 228550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho s = fMatchStart; 228650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 228750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t groupOffset = fPattern->fGroupMap->elementAti(group-1); 228850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(groupOffset < fPattern->fFrameSize); 228950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(groupOffset >= 0); 229050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho s = fFrame->fExtra[groupOffset]; 229150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 229250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 229327f654740f2a26ad62a5c155af9199af9e69b889claireho return s; 229450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 229550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 229650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 229727f654740f2a26ad62a5c155af9199af9e69b889clairehoint32_t RegexMatcher::start(int32_t group, UErrorCode &status) const { 229827f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)start64(group, status); 229927f654740f2a26ad62a5c155af9199af9e69b889claireho} 230050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 230150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 230250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 230350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// useAnchoringBounds 230450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 230550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 230650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::useAnchoringBounds(UBool b) { 230750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAnchoringBounds = b; 230850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAnchorStart = (fAnchoringBounds ? fRegionStart : 0); 230950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fAnchorLimit = (fAnchoringBounds ? fRegionLimit : fInputLength); 231050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 231150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 231250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 231350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 231450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 231550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 231650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// useTransparentBounds 231750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 231850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 231950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::useTransparentBounds(UBool b) { 232050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTransparentBounds = b; 232150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLookStart = (fTransparentBounds ? 0 : fRegionStart); 232250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLookLimit = (fTransparentBounds ? fInputLength : fRegionLimit); 232350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return *this; 232450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 232550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 232650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 232750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 232850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// setTimeLimit 232950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 233050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 233150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setTimeLimit(int32_t limit, UErrorCode &status) { 233250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 233350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 233450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 233550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 233650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = fDeferredStatus; 233750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 233850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 233950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (limit < 0) { 234050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ILLEGAL_ARGUMENT_ERROR; 234150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 234250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 234350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTimeLimit = limit; 234450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 234550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 234650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 234750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 234850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 234950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// getTimeLimit 235050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 235150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 235250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::getTimeLimit() const { 235350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fTimeLimit; 235450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 235550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 235650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 235750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 235850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 235950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// setStackLimit 236050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 236150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 236250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setStackLimit(int32_t limit, UErrorCode &status) { 236350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 236450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 236550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 236650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 236750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = fDeferredStatus; 236850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 236950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 237050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (limit < 0) { 237150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_ILLEGAL_ARGUMENT_ERROR; 237250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 237350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 237450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 237550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Reset the matcher. This is needed here in case there is a current match 237650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // whose final stack frame (containing the match results, pointed to by fFrame) 237750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // would be lost by resizing to a smaller stack size. 237850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reset(); 237950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 238050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (limit == 0) { 238150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Unlimited stack expansion 238250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStack->setMaxCapacity(0); 238350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 238450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Change the units of the limit from bytes to ints, and bump the size up 238550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // to be big enough to hold at least one stack frame for the pattern, 238650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // if it isn't there already. 238750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t adjustedLimit = limit / sizeof(int32_t); 238850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (adjustedLimit < fPattern->fFrameSize) { 238950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho adjustedLimit = fPattern->fFrameSize; 239050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 239150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStack->setMaxCapacity(adjustedLimit); 239250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 239350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStackLimit = limit; 239450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 239550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 239650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 239750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 239850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 239950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// getStackLimit 240050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 240150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 240250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::getStackLimit() const { 240350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fStackLimit; 240450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 240550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 240650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 240750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 240850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 240950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// setMatchCallback 241050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 241150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 241250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setMatchCallback(URegexMatchCallback *callback, 241350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const void *context, 241450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) { 241550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 241650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 241750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 241850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fCallbackFn = callback; 241950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fCallbackContext = context; 242050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 242150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 242250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 242350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 242450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 242550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// getMatchCallback 242650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 242750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 242850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::getMatchCallback(URegexMatchCallback *&callback, 242950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const void *&context, 243050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) { 243150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 243250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 243350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 243450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho callback = fCallbackFn; 243550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho context = fCallbackContext; 243650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 243750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 243850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 243927f654740f2a26ad62a5c155af9199af9e69b889claireho//-------------------------------------------------------------------------------- 244027f654740f2a26ad62a5c155af9199af9e69b889claireho// 244127f654740f2a26ad62a5c155af9199af9e69b889claireho// setMatchCallback 244227f654740f2a26ad62a5c155af9199af9e69b889claireho// 244327f654740f2a26ad62a5c155af9199af9e69b889claireho//-------------------------------------------------------------------------------- 244427f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexMatcher::setFindProgressCallback(URegexFindProgressCallback *callback, 244527f654740f2a26ad62a5c155af9199af9e69b889claireho const void *context, 244627f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode &status) { 244727f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 244827f654740f2a26ad62a5c155af9199af9e69b889claireho return; 244927f654740f2a26ad62a5c155af9199af9e69b889claireho } 245027f654740f2a26ad62a5c155af9199af9e69b889claireho fFindProgressCallbackFn = callback; 245127f654740f2a26ad62a5c155af9199af9e69b889claireho fFindProgressCallbackContext = context; 245227f654740f2a26ad62a5c155af9199af9e69b889claireho} 245327f654740f2a26ad62a5c155af9199af9e69b889claireho 245427f654740f2a26ad62a5c155af9199af9e69b889claireho 245527f654740f2a26ad62a5c155af9199af9e69b889claireho//-------------------------------------------------------------------------------- 245627f654740f2a26ad62a5c155af9199af9e69b889claireho// 245727f654740f2a26ad62a5c155af9199af9e69b889claireho// getMatchCallback 245827f654740f2a26ad62a5c155af9199af9e69b889claireho// 245927f654740f2a26ad62a5c155af9199af9e69b889claireho//-------------------------------------------------------------------------------- 246027f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexMatcher::getFindProgressCallback(URegexFindProgressCallback *&callback, 246127f654740f2a26ad62a5c155af9199af9e69b889claireho const void *&context, 246227f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode &status) { 246327f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(status)) { 246427f654740f2a26ad62a5c155af9199af9e69b889claireho return; 246527f654740f2a26ad62a5c155af9199af9e69b889claireho } 246627f654740f2a26ad62a5c155af9199af9e69b889claireho callback = fFindProgressCallbackFn; 246727f654740f2a26ad62a5c155af9199af9e69b889claireho context = fFindProgressCallbackContext; 246827f654740f2a26ad62a5c155af9199af9e69b889claireho} 246927f654740f2a26ad62a5c155af9199af9e69b889claireho 247027f654740f2a26ad62a5c155af9199af9e69b889claireho 247150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//================================================================================ 247250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 247350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Code following this point in this file is the internal 247450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Match Engine Implementation. 247550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 247650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//================================================================================ 247750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 247850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 247950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 248050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 248150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// resetStack 248250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Discard any previous contents of the state save stack, and initialize a 248350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// new stack frame to all -1. The -1s are needed for capture group limits, 248450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// where they indicate that a group has not yet matched anything. 248550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 248650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoREStackFrame *RegexMatcher::resetStack() { 248750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Discard any previous contents of the state save stack, and initialize a 248850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // new stack frame with all -1 data. The -1s are needed for capture group limits, 248950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // where they indicate that a group has not yet matched anything. 249050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStack->removeAllElements(); 249150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 249250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *iFrame = (REStackFrame *)fStack->reserveBlock(fPattern->fFrameSize, fDeferredStatus); 249350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 249450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<fPattern->fFrameSize-RESTACKFRAME_HDRCOUNT; i++) { 249550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho iFrame->fExtra[i] = -1; 249650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 249750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return iFrame; 249850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 249950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 250050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 250150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 250250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 250350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 250450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// isWordBoundary 250550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// in perl, "xab..cd..", \b is true at positions 0,3,5,7 250650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// For us, 250750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// If the current char is a combining mark, 250850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// \b is FALSE. 250950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Else Scan backwards to the first non-combining char. 251050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// We are at a boundary if the this char and the original chars are 251150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// opposite in membership in \w set 251250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 251350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// parameters: pos - the current position in the input buffer 251450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 251550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// TODO: double-check edge cases at region boundaries. 251650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 251750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 251850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::isWordBoundary(int64_t pos) { 251950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool isBoundary = FALSE; 252050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool cIsWord = FALSE; 252150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 252250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (pos >= fLookLimit) { 252350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 252450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 252550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Determine whether char c at current position is a member of the word set of chars. 252650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we're off the end of the string, behave as though we're not at a word char. 252750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, pos); 252850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_CURRENT32(fInputText); 252950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND) || u_charType(c) == U_FORMAT_CHAR) { 253050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Current char is a combining one. Not a boundary. 253150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 253250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 253350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho cIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(c); 253450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 253550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 253650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Back up until we come to a non-combining char, determine whether 253750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // that char is a word char. 253850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool prevCIsWord = FALSE; 253950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 254050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_GETNATIVEINDEX(fInputText) <= fLookStart) { 254150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 254250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 254350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 prevChar = UTEXT_PREVIOUS32(fInputText); 254450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!(u_hasBinaryProperty(prevChar, UCHAR_GRAPHEME_EXTEND) 254550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho || u_charType(prevChar) == U_FORMAT_CHAR)) { 254650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(prevChar); 254750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 254850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 254950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 255050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isBoundary = cIsWord ^ prevCIsWord; 255150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return isBoundary; 255250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 255350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 255450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::isChunkWordBoundary(int32_t pos) { 255550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool isBoundary = FALSE; 255650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool cIsWord = FALSE; 255750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 255850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *inputBuf = fInputText->chunkContents; 255950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 256050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (pos >= fLookLimit) { 256150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 256250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 256350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Determine whether char c at current position is a member of the word set of chars. 256450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we're off the end of the string, behave as though we're not at a word char. 256550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 256650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_GET(inputBuf, fLookStart, pos, fLookLimit, c); 256750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND) || u_charType(c) == U_FORMAT_CHAR) { 256850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Current char is a combining one. Not a boundary. 256950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 257050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 257150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho cIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(c); 257250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 257350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 257450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Back up until we come to a non-combining char, determine whether 257550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // that char is a word char. 257650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool prevCIsWord = FALSE; 257750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 257850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (pos <= fLookStart) { 257950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 258050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 258150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 prevChar; 258250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, fLookStart, pos, prevChar); 258350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!(u_hasBinaryProperty(prevChar, UCHAR_GRAPHEME_EXTEND) 258450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho || u_charType(prevChar) == U_FORMAT_CHAR)) { 258550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho prevCIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(prevChar); 258650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 258750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 258850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 258950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isBoundary = cIsWord ^ prevCIsWord; 259050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return isBoundary; 259150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 259250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 259350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 259450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 259550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// isUWordBoundary 259650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 259750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Test for a word boundary using RBBI word break. 259850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 259950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// parameters: pos - the current position in the input buffer 260050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 260150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 260250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::isUWordBoundary(int64_t pos) { 260350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool returnVal = FALSE; 260450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==0 260550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 260650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we haven't yet created a break iterator for this matcher, do it now. 260750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fWordBreakItr == NULL) { 260850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fWordBreakItr = 260950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), fDeferredStatus); 261050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(fDeferredStatus)) { 261150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 261250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 261350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fWordBreakItr->setText(fInputText, fDeferredStatus); 261450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 261550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 261650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (pos >= fLookLimit) { 261750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 261850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho returnVal = TRUE; // With Unicode word rules, only positions within the interior of "real" 261950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // words are not boundaries. All non-word chars stand by themselves, 262050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // with word boundaries on both sides. 262150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 262250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!UTEXT_USES_U16(fInputText)) { 262350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // !!!: Would like a better way to do this! 262450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode status = U_ZERO_ERROR; 262550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pos = utext_extract(fInputText, 0, pos, NULL, 0, &status); 262650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 262750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho returnVal = fWordBreakItr->isBoundary((int32_t)pos); 262850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 262950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 263050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return returnVal; 263150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 263250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 263350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 263450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 263550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// IncrementTime This function is called once each TIMER_INITIAL_VALUE state 263650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// saves. Increment the "time" counter, and call the 263750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// user callback function if there is one installed. 263850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 263950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// If the match operation needs to be aborted, either for a time-out 264050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// or because the user callback asked for it, just set an error status. 264150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// The engine will pick that up and stop in its outer loop. 264250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 264350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 264450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::IncrementTime(UErrorCode &status) { 264550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTickCounter = TIMER_INITIAL_VALUE; 264650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTime++; 264750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fCallbackFn != NULL) { 264850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((*fCallbackFn)(fCallbackContext, fTime) == FALSE) { 264950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_STOPPED_BY_CALLER; 265050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 265150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 265250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 265350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fTimeLimit > 0 && fTime >= fTimeLimit) { 265450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_TIME_OUT; 265550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 265650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 265750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 265850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 265950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 266027f654740f2a26ad62a5c155af9199af9e69b889claireho// ReportFindProgress This function is called once for each advance in the target 266127f654740f2a26ad62a5c155af9199af9e69b889claireho// string from the find() function, and calls the user progress callback 266227f654740f2a26ad62a5c155af9199af9e69b889claireho// function if there is one installed. 266327f654740f2a26ad62a5c155af9199af9e69b889claireho// 266427f654740f2a26ad62a5c155af9199af9e69b889claireho// NOTE: 266527f654740f2a26ad62a5c155af9199af9e69b889claireho// 266627f654740f2a26ad62a5c155af9199af9e69b889claireho// If the match operation needs to be aborted because the user 266727f654740f2a26ad62a5c155af9199af9e69b889claireho// callback asked for it, just set an error status. 266827f654740f2a26ad62a5c155af9199af9e69b889claireho// The engine will pick that up and stop in its outer loop. 266927f654740f2a26ad62a5c155af9199af9e69b889claireho// 267027f654740f2a26ad62a5c155af9199af9e69b889claireho//-------------------------------------------------------------------------------- 267127f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool RegexMatcher::ReportFindProgress(int64_t matchIndex, UErrorCode &status) { 267227f654740f2a26ad62a5c155af9199af9e69b889claireho if (fFindProgressCallbackFn != NULL) { 267327f654740f2a26ad62a5c155af9199af9e69b889claireho if ((*fFindProgressCallbackFn)(fFindProgressCallbackContext, matchIndex) == FALSE) { 267427f654740f2a26ad62a5c155af9199af9e69b889claireho status = U_ZERO_ERROR /*U_REGEX_STOPPED_BY_CALLER*/; 267527f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 267627f654740f2a26ad62a5c155af9199af9e69b889claireho } 267727f654740f2a26ad62a5c155af9199af9e69b889claireho } 267827f654740f2a26ad62a5c155af9199af9e69b889claireho return TRUE; 267927f654740f2a26ad62a5c155af9199af9e69b889claireho} 268027f654740f2a26ad62a5c155af9199af9e69b889claireho 268127f654740f2a26ad62a5c155af9199af9e69b889claireho//-------------------------------------------------------------------------------- 268227f654740f2a26ad62a5c155af9199af9e69b889claireho// 268350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// StateSave 268450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Make a new stack frame, initialized as a copy of the current stack frame. 268550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Set the pattern index in the original stack frame from the operand value 268650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// in the opcode. Execution of the engine continues with the state in 268750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// the newly created stack frame 268850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 268950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Note that reserveBlock() may grow the stack, resulting in the 269050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// whole thing being relocated in memory. 269150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 269250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Parameters: 269350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// fp The top frame pointer when called. At return, a new 269450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// fame will be present 269550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// savePatIdx An index into the compiled pattern. Goes into the original 269650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// (not new) frame. If execution ever back-tracks out of the 269750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// new frame, this will be where we continue from in the pattern. 269850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Return 269950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// The new frame pointer. 270050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 270150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 270250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status) { 270350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // push storage for a new frame. 270450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *newFP = fStack->reserveBlock(fFrameSize, status); 270550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (newFP == NULL) { 270650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Failure on attempted stack expansion. 270750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Stack function set some other error code, change it to a more 270850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // specific one for regular expressions. 270950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho status = U_REGEX_STACK_OVERFLOW; 271050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We need to return a writable stack frame, so just return the 271150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // previous frame. The match operation will stop quickly 271250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // because of the error status, after which the frame will never 271350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // be looked at again. 271450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return fp; 271550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 271650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)(newFP - fFrameSize); // in case of realloc of stack. 271750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 271850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // New stack frame = copy of old top frame. 271950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *source = (int64_t *)fp; 272050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *dest = newFP; 272150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 272250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *dest++ = *source++; 272350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (source == newFP) { 272450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 272550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 272650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 272750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 272850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fTickCounter--; 272950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fTickCounter <= 0) { 273050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IncrementTime(status); // Re-initializes fTickCounter 273150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 273250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = savePatIdx; 273350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return (REStackFrame *)newFP; 273450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 273550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 273650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 273750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 273850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 273950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// MatchAt This is the actual matching engine. 274050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 274150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// startIdx: begin matching a this index. 274250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// toEnd: if true, match must extend to end of the input region 274350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// 274450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//-------------------------------------------------------------------------------- 274550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { 274650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool isMatch = FALSE; // True if the we have a match. 274750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 274850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t backSearchIndex = U_INT64_MAX; // used after greedy single-character matches for searching backwards 274950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 275050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t op; // Operation from the compiled pattern, split into 275150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t opType; // the opcode 275250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t opValue; // and the operand value. 275350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 275450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_RUN_DEBUG 275550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fTraceDebug) 275650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 275750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("MatchAt(startIdx=%ld)\n", startIdx); 275850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("Original Pattern: "); 275950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = utext_next32From(fPattern->fPattern, 0); 276050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (c != U_SENTINEL) { 276150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c<32 || c>256) { 276250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = '.'; 276350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 276450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_DUMP_DEBUG_PRINTF(("%c", c)); 276550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 276650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fPattern->fPattern); 276750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 276850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("\n"); 276950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("Input String: "); 277050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = utext_next32From(fInputText, 0); 277150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (c != U_SENTINEL) { 277250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c<32 || c>256) { 277350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = '.'; 277450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 277550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("%c", c); 277650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 277750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 277850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 277950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("\n"); 278050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("\n"); 278150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 278250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 278350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 278450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 278550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 278650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 278750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 278850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Cache frequently referenced items from the compiled pattern 278950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 279050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *pat = fPattern->fCompiledPat->getBuffer(); 279150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 279250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *litText = fPattern->fLiteralText.getBuffer(); 279350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UVector *sets = fPattern->fSets; 279450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 279550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fFrameSize = fPattern->fFrameSize; 279650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *fp = resetStack(); 279750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 279850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = 0; 279950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = startIdx; 280050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 280150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Zero out the pattern's static data 280250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 280350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i = 0; i<fPattern->fDataSize; i++) { 280450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[i] = 0; 280550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 280650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 280750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 280850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Main loop for interpreting the compiled pattern. 280950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // One iteration of the loop per pattern operation performed. 281050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 281150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 281250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if 0 281350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (_heapchk() != _HEAPOK) { 281450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, "Heap Trouble\n"); 281550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 281650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 281750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 281850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho op = (int32_t)pat[fp->fPatIdx]; 281950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho opType = URX_TYPE(op); 282050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho opValue = URX_VAL(op); 282150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_RUN_DEBUG 282250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fTraceDebug) { 282350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 282450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("inputIdx=%d inputChar=%x sp=%3d activeLimit=%d ", fp->fInputIdx, 282550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit); 282650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fPattern->dumpOp(fp->fPatIdx); 282750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 282850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 282950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; 283050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 283150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho switch (opType) { 283250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 283350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 283450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_NOP: 283550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 283650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 283750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 283850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKTRACK: 283950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Force a backtrack. In some circumstances, the pattern compiler 284050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // will notice that the pattern can't possibly match anything, and will 284150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // emit one of these at that point. 284250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 284350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 284450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 284550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 284650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_ONECHAR: 284750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx < fActiveLimit) { 284850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 284950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 285050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == opValue) { 285150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 285250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 285350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 285450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 285550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 285650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 285750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 285850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 285950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) { 286050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize); 286150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) { 286250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = FALSE; 286350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_PREVIOUS32(fInputText); 286450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (UTEXT_GETNATIVEINDEX(fInputText) >= backSearchIndex) { 286550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == opValue) { 286650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 286750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 286850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (c == U_SENTINEL) { 286950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 287050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 287150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_PREVIOUS32(fInputText); 287250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 287350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 287450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = FALSE; 287550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 287650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 287750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex) { 287850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 287950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 288050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // Skip the LOOP_C, we just did that 288150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 288250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 288350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 288450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 288550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 288650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 288750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 288850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 288950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 289050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 289150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_STRING: 289250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 289350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Test input against a literal string. 289450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Strings require two slots in the compiled pattern, one for the 289550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // offset to the string text, and one for the length. 289650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t stringStartIdx = opValue; 289750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t stringLen; 289850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 289950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho op = (int32_t)pat[fp->fPatIdx]; // Fetch the second operand 290050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; 290150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho opType = URX_TYPE(op); 290250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho stringLen = URX_VAL(op); 290350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opType == URX_STRING_LEN); 290450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(stringLen >= 2); 290550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 290650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *patternChars = litText+stringStartIdx; 290750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *patternEnd = patternChars+stringLen; 290850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 290950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 291050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 291150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = TRUE; 291250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 291350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (patternChars < patternEnd && success) { 291450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 291550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 291650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c != U_SENTINEL && UTEXT_GETNATIVEINDEX(fInputText) <= fActiveLimit) { 291750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_IS_BMP(c)) { 291850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = (*patternChars == c); 291950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternChars += 1; 292050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (patternChars+1 < patternEnd) { 292150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = (*patternChars == U16_LEAD(c) && *(patternChars+1) == U16_TRAIL(c)); 292250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternChars += 2; 292350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 292450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 292550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = FALSE; 292650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; // TODO: See ticket 6074 292750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 292850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 292950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 293050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 293150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 293250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 293350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 293450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex && fStack->size()) { 293550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize); 293650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) { 293750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Reset to last start point 293850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 293950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternChars = litText+stringStartIdx; 294050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 294150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Search backwards for a possible start 294250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 294350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_PREVIOUS32(fInputText); 294450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == U_SENTINEL) { 294550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 294650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if ((U_IS_BMP(c) && *patternChars == c) || 294750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*patternChars == U16_LEAD(c) && *(patternChars+1) == U16_TRAIL(c))) { 294850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 294950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 295050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 295150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while (UTEXT_GETNATIVEINDEX(fInputText) >= backSearchIndex); 295250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 295350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // And try again 295450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 295550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 295650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 295750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex) { 295850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 295950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 296050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // Skip the LOOP_C, we just did that 296150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 296250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 296350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 296450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 296550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 296650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 296750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 296850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 296950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 297050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 297150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 297250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_STATE_SAVE: 297350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, opValue, status); 297450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 297550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 297650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 297750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_END: 297850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The match loop will exit via this path on a successful match, 297950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // when we reach the end of the pattern. 298050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (toEnd && fp->fInputIdx != fActiveLimit) { 298150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The pattern matched, but not to the end of input. Try some more. 298250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 298350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 298450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 298550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isMatch = TRUE; 298650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto breakFromLoop; 298750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 298850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Start and End Capture stack frame variables are laid out out like this: 298950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // fp->fExtra[opValue] - The start of a completed capture group 299050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // opValue+1 - The end of a completed capture group 299150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // opValue+2 - the start of a capture group whose end 299250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // has not yet been reached (and might not ever be). 299350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_START_CAPTURE: 299450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue >= 0 && opValue < fFrameSize-3); 299550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[opValue+2] = fp->fInputIdx; 299650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 299750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 299850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 299950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_END_CAPTURE: 300050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue >= 0 && opValue < fFrameSize-3); 300150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fp->fExtra[opValue+2] >= 0); // Start pos for this group must be set. 300250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[opValue] = fp->fExtra[opValue+2]; // Tentative start becomes real. 300350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[opValue+1] = fp->fInputIdx; // End position 300450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fp->fExtra[opValue] <= fp->fExtra[opValue+1]); 300550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 300650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 300750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 300850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOLLAR: // $, test for End of line 300950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // or for position before new line at end of input 301050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 301150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fAnchorLimit) { 301250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We really are at the end of input. Success. 301350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 301450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 301550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 301650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 301750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 301850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 301950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 302050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we are positioned just before a new-line that is located at the 302150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // end of input, succeed. 302250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 302350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) { 302450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((c>=0x0a && c<=0x0d) || c==0x85 || c==0x2028 || c==0x2029) { 302550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If not in the middle of a CR/LF sequence 3026b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && ((void)UTEXT_PREVIOUS32(fInputText), UTEXT_PREVIOUS32(fInputText))==0x0d)) { 302750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // At new-line at end of input. Success 302850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 302950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 303050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 303150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 303250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 303350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 303450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 303550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 nextC = UTEXT_NEXT32(fInputText); 303650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0d && nextC == 0x0a && UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) { 303750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 303850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 303950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; // At CR/LF at end of input. Success 304050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 304150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 304250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 304350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 304450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 304550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 304650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 304750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 304850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOLLAR_D: // $, test for End of Line, in UNIX_LINES mode. 304950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fAnchorLimit) { 305050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Off the end of input. Success. 305150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 305250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 305350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 305450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 305550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 305650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 305750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Either at the last character of input, or off the end. 305850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0a && UTEXT_GETNATIVEINDEX(fInputText) == fAnchorLimit) { 305950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 306050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 306150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 306250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 306350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 306450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 306550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Not at end of input. Back-track out. 306650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 306750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 306850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 306950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 307050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOLLAR_M: // $, test for End of line in multi-line mode 307150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 307250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fAnchorLimit) { 307350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We really are at the end of input. Success. 307450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 307550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 307650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 307750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 307850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we are positioned just before a new-line, succeed. 307950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // It makes no difference where the new-line is within the input. 308050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 308150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_CURRENT32(fInputText); 308250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((c>=0x0a && c<=0x0d) || c==0x85 ||c==0x2028 || c==0x2029) { 308350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // At a line end, except for the odd chance of being in the middle of a CR/LF sequence 308450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // In multi-line mode, hitting a new-line just before the end of input does not 308550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // set the hitEnd or requireEnd flags 308650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && UTEXT_PREVIOUS32(fInputText)==0x0d)) { 308750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 308850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 308950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 309050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // not at a new line. Fail. 309150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 309250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 309350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 309450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 309550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 309650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOLLAR_MD: // $, test for End of line in multi-line and UNIX_LINES mode 309750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 309850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fAnchorLimit) { 309950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We really are at the end of input. Success. 310050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 310150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; // Java set requireEnd in this case, even though 310250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; // adding a new-line would not lose the match. 310350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 310450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we are not positioned just before a new-line, the test fails; backtrack out. 310550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // It makes no difference where the new-line is within the input. 310650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 310750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (UTEXT_CURRENT32(fInputText) != 0x0a) { 310850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 310950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 311050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 311150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 311250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 311350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 311450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CARET: // ^, test for start of line 311550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx != fAnchorStart) { 311650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 311750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 311850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 311950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 312050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 312150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CARET_M: // ^, test for start of line in mulit-line mode 312250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 312350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx == fAnchorStart) { 312450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We are at the start input. Success. 312550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 312650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 312750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check whether character just before the current pos is a new-line 312850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // unless we are at the end of input 312950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 313050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_PREVIOUS32(fInputText); 313150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((fp->fInputIdx < fAnchorLimit) && 313250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) { 313350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // It's a new-line. ^ is true. Success. 313450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TODO: what should be done with positions between a CR and LF? 313550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 313650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 313750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Not at the start of a line. Fail. 313850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 313950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 314050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 314150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 314250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 314350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CARET_M_UNIX: // ^, test for start of line in mulit-line + Unix-line mode 314450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 314550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fp->fInputIdx >= fAnchorStart); 314650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx <= fAnchorStart) { 314750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We are at the start input. Success. 314850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 314950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 315050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check whether character just before the current pos is a new-line 315150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fp->fInputIdx <= fAnchorLimit); 315250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 315350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_PREVIOUS32(fInputText); 315450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c != 0x0a) { 315550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Not at the start of a line. Back-track out. 315650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 315750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 315850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 315950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 316050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 316150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKSLASH_B: // Test for word boundaries 316250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 316350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = isWordBoundary(fp->fInputIdx); 316450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success ^= (opValue != 0); // flip sense for \B 316550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!success) { 316650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 316750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 316850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 316950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 317050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 317150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 317250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKSLASH_BU: // Test for word boundaries, Unicode-style 317350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 317450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = isUWordBoundary(fp->fInputIdx); 317550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success ^= (opValue != 0); // flip sense for \B 317650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!success) { 317750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 317850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 317950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 318050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 318150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 318250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 318350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKSLASH_D: // Test for decimal digit 318450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 318550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 318650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 318750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 318850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 318950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 319050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 319150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 319250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 319350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 319450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int8_t ctype = u_charType(c); // TODO: make a unicode set for this. Will be faster. 319550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = (ctype == U_DECIMAL_DIGIT_NUMBER); 319650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success ^= (opValue != 0); // flip sense for \D 319750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 319850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 319950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 320050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 320150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 320250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 320350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 320450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 320550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 320650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKSLASH_G: // Test for position at end of previous match 320727f654740f2a26ad62a5c155af9199af9e69b889claireho if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->fInputIdx==fActiveStart))) { 320850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 320950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 321050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 321150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 321250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 321350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKSLASH_X: 321450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Match a Grapheme, as defined by Unicode TR 29. 321550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Differs slightly from Perl, which consumes combining marks independently 321650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // of context. 321750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 321850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 321950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fail if at end of input 322050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 322150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 322250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 322350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 322450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 322550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 322650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 322750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 322850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Examine (and consume) the current char. 322950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Dispatch into a little state machine, based on the char. 323050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 323150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 323250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 323350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet **sets = fPattern->fStaticSets; 323450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_NORMAL]->contains(c)) goto GC_Extend; 323550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_CONTROL]->contains(c)) goto GC_Control; 323650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_L]->contains(c)) goto GC_L; 323750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LV]->contains(c)) goto GC_V; 323850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LVT]->contains(c)) goto GC_T; 323950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_V]->contains(c)) goto GC_V; 324050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_T]->contains(c)) goto GC_T; 324150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 324250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 324350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 324450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 324550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_L: 324650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 324750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 324850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 324950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_L]->contains(c)) goto GC_L; 325050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LV]->contains(c)) goto GC_V; 325150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LVT]->contains(c)) goto GC_T; 325250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_V]->contains(c)) goto GC_V; 3253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_PREVIOUS32(fInputText); 325450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 325550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 325650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 325750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_V: 325850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 325950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 326050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 326150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_V]->contains(c)) goto GC_V; 326250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_T]->contains(c)) goto GC_T; 3263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_PREVIOUS32(fInputText); 326450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 326550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 326650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 326750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_T: 326850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 326950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 327050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 327150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_T]->contains(c)) goto GC_T; 3272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_PREVIOUS32(fInputText); 327350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 327450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 327550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 327650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_Extend: 327750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Combining characters are consumed here 327850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 327950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 328050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 328150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 328250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_CURRENT32(fInputText); 328350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_EXTEND]->contains(c) == FALSE) { 328450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 328550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 3286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_NEXT32(fInputText); 328750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 328850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 328950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Done; 329050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 329150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_Control: 329250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Most control chars stand alone (don't combine with combining chars), 329350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // except for that CR/LF sequence is a single grapheme cluster. 329450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0d && fp->fInputIdx < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) { 329550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 329650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 329750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 329850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 329950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_Done: 330050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 330150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 330250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 330350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 330450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 330550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 330650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 330750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 330850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 330950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKSLASH_Z: // Test for end of Input 331050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx < fAnchorLimit) { 331150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 331250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 331350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 331450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 331550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 331650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 331750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 331850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 331950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 332050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_STATIC_SETREF: 332150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 332250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Test input character against one of the predefined sets 332350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (Word Characters, for example) 332450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The high bit of the op value is a flag for the match polarity. 332550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 0: success if input char is in set. 332650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 1: success if input char is not in set. 332750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 332850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 332950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 333050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 333150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 333250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 333350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = ((opValue & URX_NEG_SET) == URX_NEG_SET); 333450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho opValue &= ~URX_NEG_SET; 333550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue > 0 && opValue < URX_LAST_SET); 333650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 333750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 333850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 333950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c < 256) { 334050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 334150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c)) { 334250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = !success; 334350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 334450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 334550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeSet *s = fPattern->fStaticSets[opValue]; 334650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c)) { 334750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = !success; 334850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 334950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 335050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 335150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 335250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 335350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the character wasn't in the set. 335450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 335550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) { 335650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize); 335750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) { 335850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Try to find it, backwards 335950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_PREVIOUS32(fInputText); // skip the first character we tried 336050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = ((opValue & URX_NEG_SET) == URX_NEG_SET); // reset 336150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 336250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_PREVIOUS32(fInputText); 336350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == U_SENTINEL) { 336450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 336550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (c < 256) { 336650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 336750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c)) { 336850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = !success; 336950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 337050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 337150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeSet *s = fPattern->fStaticSets[opValue]; 337250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c)) { 337350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = !success; 337450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 337550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 337650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while (UTEXT_GETNATIVEINDEX(fInputText) >= backSearchIndex && !success); 337750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 337850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success && c != U_SENTINEL) { 337950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 338050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 338150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex) { 338250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 338350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 338450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // Skip the LOOP_C, we just did that 338550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 338650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 338750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 338850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 338950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 339050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 339150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 339250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 339350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 339450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 339550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 339650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_STAT_SETREF_N: 339750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 339850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Test input character for NOT being a member of one of 339950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the predefined sets (Word Characters, for example) 340050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 340150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 340250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 340350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 340450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 340550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 340650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue > 0 && opValue < URX_LAST_SET); 340750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 340850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 340950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 341050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 341150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c < 256) { 341250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 341350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c) == FALSE) { 341450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 341550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 341650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 341750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 341850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeSet *s = fPattern->fStaticSets[opValue]; 341950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c) == FALSE) { 342050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 342150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 342250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 342350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 342450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the character wasn't in the set. 342550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 342650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) { 342750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize); 342850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) { 342950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Try to find it, backwards 343050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_PREVIOUS32(fInputText); // skip the first character we tried 343150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = FALSE; 343250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 343350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_PREVIOUS32(fInputText); 343450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == U_SENTINEL) { 343550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 343650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (c < 256) { 343750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 343850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c) == FALSE) { 343950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 344050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 344150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 344250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 344350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeSet *s = fPattern->fStaticSets[opValue]; 344450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c) == FALSE) { 344550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 344650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 344750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 344850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 344950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while (UTEXT_GETNATIVEINDEX(fInputText) >= backSearchIndex); 345050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 345150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 345250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 345350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 345450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex) { 345550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 345650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 345750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // Skip the LOOP_C, we just did that 345850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 345950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 346050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 346150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 346250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 346350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 346450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 346550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 346650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 346750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 346850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_SETREF: 346950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 347050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 347150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 347250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 347350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 347450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 347550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 347650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There is input left. Pick up one char and test it for set membership. 347750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 347850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue > 0 && opValue < sets->size()); 347950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c<256) { 348050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fSets8[opValue]; 348150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c)) { 348250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 348350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 348450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 348550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 348650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); 348750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c)) { 348850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The character is in the set. A Match. 348950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 349050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 349150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 349250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 349350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 349450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the character wasn't in the set. 349550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 349650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) { 349750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize); 349850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) { 349950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Try to find it, backwards 350050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_PREVIOUS32(fInputText); // skip the first character we tried 350150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = FALSE; 350250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 350350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_PREVIOUS32(fInputText); 350450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == U_SENTINEL) { 350550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 350650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (c < 256) { 350750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fSets8[opValue]; 350850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c)) { 350950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 351050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 351150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 351250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 351350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); 351450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c)) { 351550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 351650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 351750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 351850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 351950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while (UTEXT_GETNATIVEINDEX(fInputText) >= backSearchIndex); 352050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 352150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 352250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 352350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 352450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex) { 352550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 352650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 352750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // Skip the LOOP_C, we just did that 352850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 352950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 353050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 353150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 353250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 353350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 353450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 353550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 353650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 353750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 353850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOTANY: 353950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 354050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // . matches anything, but stops at end-of-line. 354150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 354250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // At end of input. Match failed. Backtrack out. 354350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 354450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 354550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 354650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 354750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 354850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 354950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 355050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There is input left. Advance over one char, unless we've hit end-of-line 355150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 355250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (((c & 0x7f) <= 0x29) && // First quickly bypass as many chars as possible 355350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) { 355450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // End of line in normal mode. . does not match. 355550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 355650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 355750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 355850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 355950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 356050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 356150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 356250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 356350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOTANY_ALL: 356450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 356550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // ., in dot-matches-all (including new lines) mode 356650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 356750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // At end of input. Match failed. Backtrack out. 356850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 356950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 357050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 357150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 357250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 357350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 357450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 357550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There is input left. Advance over one char, except if we are 357650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // at a cr/lf, advance over both of them. 357750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 357850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 357950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 358050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c==0x0d && fp->fInputIdx < fActiveLimit) { 358150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // In the case of a CR/LF, we need to advance over both. 358250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 nextc = UTEXT_CURRENT32(fInputText); 358350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (nextc == 0x0a) { 3584b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_NEXT32(fInputText); 358550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 358650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 358750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 358850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 358950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 359050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 359150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 359250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOTANY_UNIX: 359350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 359450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // '.' operator, matches all, but stops at end-of-line. 359550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // UNIX_LINES mode, so 0x0a is the only recognized line ending. 359650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 359750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // At end of input. Match failed. Backtrack out. 359850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 359950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 360050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 360150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 360250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 360350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 360450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 360550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There is input left. Advance over one char, unless we've hit end-of-line 360650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 360750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0a) { 360850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // End of line in normal mode. '.' does not match the \n 360950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 361050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 361150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 361250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 361350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 361450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 361550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 361650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 361750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_JMP: 361850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = opValue; 361950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 362050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 362150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_FAIL: 362250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isMatch = FALSE; 362350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto breakFromLoop; 362450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 362550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_JMP_SAV: 362650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue < fPattern->fCompiledPat->size()); 362750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); // State save to loc following current 362850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = opValue; // Then JMP. 362950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 363050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 363150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_JMP_SAV_X: 363250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This opcode is used with (x)+, when x can match a zero length string. 363350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Same as JMP_SAV, except conditional on the match having made forward progress. 363450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Destination of the JMP must be a URX_STO_INP_LOC, from which we get the 363550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // data address of the input position at the start of the loop. 363650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 363750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue > 0 && opValue < fPattern->fCompiledPat->size()); 363850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t stoOp = (int32_t)pat[opValue-1]; 363950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(URX_TYPE(stoOp) == URX_STO_INP_LOC); 364050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t frameLoc = URX_VAL(stoOp); 364150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(frameLoc >= 0 && frameLoc < fFrameSize); 364250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t prevInputIdx = fp->fExtra[frameLoc]; 364350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(prevInputIdx <= fp->fInputIdx); 364450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (prevInputIdx < fp->fInputIdx) { 364550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The match did make progress. Repeat the loop. 364650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); // State save to loc following current 364750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = opValue; 364850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[frameLoc] = fp->fInputIdx; 364950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 365050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If the input position did not advance, we do nothing here, 365150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // execution will fall out of the loop. 365250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 365350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 365450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 365550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CTR_INIT: 365650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 365750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue >= 0 && opValue < fFrameSize-2); 365850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[opValue] = 0; // Set the loop counter variable to zero 365950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 366050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Pick up the three extra operands that CTR_INIT has, and 366150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // skip the pattern location counter past 366250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 366350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx += 3; 366450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t loopLoc = URX_VAL(pat[instrOperandLoc]); 366550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[instrOperandLoc+1]; 366650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[instrOperandLoc+2]; 366750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(minCount>=0); 366850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(maxCount>=minCount || maxCount==-1); 366950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(loopLoc>fp->fPatIdx); 367050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 367150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (minCount == 0) { 367250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, loopLoc+1, status); 367350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 367450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (maxCount == 0) { 367550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 367650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 367750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 367850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 367950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 368050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CTR_LOOP: 368150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 368250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2); 368350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t initOp = (int32_t)pat[opValue]; 368450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT); 368550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)]; 368650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[opValue+2]; 368750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[opValue+3]; 368850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Increment the counter. Note: we DIDN'T worry about counter 368950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // overflow, since the data comes from UnicodeStrings, which 369050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // stores its length in an int32_t. Do we have to think about 369150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // this now that we're using UText? Probably not, since the length 369250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // in UChar32s is still an int32_t. 369350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*pCounter)++; 369450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(*pCounter > 0); 369550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((uint64_t)*pCounter >= (uint32_t)maxCount) { 369650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(*pCounter == maxCount || maxCount == -1); 369750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 369850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 369950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*pCounter >= minCount) { 370050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 370150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 370250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = opValue + 4; // Loop back. 370350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 370450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 370550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 370650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CTR_INIT_NG: 370750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 370850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Initialize a non-greedy loop 370950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue >= 0 && opValue < fFrameSize-2); 371050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[opValue] = 0; // Set the loop counter variable to zero 371150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 371250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Pick up the three extra operands that CTR_INIT has, and 371350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // skip the pattern location counter past 371450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 371550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx += 3; 371650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t loopLoc = URX_VAL(pat[instrOperandLoc]); 371750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[instrOperandLoc+1]; 371850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[instrOperandLoc+2]; 371950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(minCount>=0); 372050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(maxCount>=minCount || maxCount==-1); 372150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(loopLoc>fp->fPatIdx); 372250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 372350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (minCount == 0) { 372450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (maxCount != 0) { 372550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 372650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 372750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = loopLoc+1; // Continue with stuff after repeated block 372850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 372950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 373050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 373150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 373250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CTR_LOOP_NG: 373350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 373450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Non-greedy {min, max} loops 373550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2); 373650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t initOp = (int32_t)pat[opValue]; 373750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT_NG); 373850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)]; 373950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[opValue+2]; 374050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[opValue+3]; 374150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Increment the counter. Note: we DIDN'T worry about counter 374250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // overflow, since the data comes from UnicodeStrings, which 374350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // stores its length in an int32_t. Do we have to think about 374450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // this now that we're using UText? Probably not, since the length 374550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // in UChar32s is still an int32_t. 374650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*pCounter)++; 374750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(*pCounter > 0); 374850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 374950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((uint64_t)*pCounter >= (uint32_t)maxCount) { 375050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The loop has matched the maximum permitted number of times. 375150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Break out of here with no action. Matching will 375250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // continue with the following pattern. 375350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(*pCounter == maxCount || maxCount == -1); 375450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 375550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 375650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 375750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*pCounter < minCount) { 375850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We haven't met the minimum number of matches yet. 375950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Loop back for another one. 376050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = opValue + 4; // Loop back. 376150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 376250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We do have the minimum number of matches. 376350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fall into the following pattern, but first do 376450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // a state save to the top of the loop, so that a failure 376550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // in the following pattern will try another iteration of the loop. 376650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, opValue + 4, status); 376750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 376850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 376950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 377050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 377150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_STO_SP: 377250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize); 377350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[opValue] = fStack->size(); 377450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 377550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 377650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LD_SP: 377750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 377850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize); 377950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newStackSize = (int32_t)fData[opValue]; 378050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(newStackSize <= fStack->size()); 378150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; 378250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (newFP == (int64_t *)fp) { 378350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 378450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 378550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 378650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<fFrameSize; i++) { 378750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho newFP[i] = ((int64_t *)fp)[i]; 378850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 378950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)newFP; 379050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStack->setSize(newStackSize); 379150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 379250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 379350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 379450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKREF: 379550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_BACKREF_I: 379650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 379750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue < fFrameSize); 379850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t groupStartIdx = fp->fExtra[opValue]; 379950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t groupEndIdx = fp->fExtra[opValue+1]; 380050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(groupStartIdx <= groupEndIdx); 380150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (groupStartIdx < 0) { 380250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This capture group has not participated in the match thus far, 380350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match. 380450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 380550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 380650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (groupEndIdx == groupStartIdx) { 380750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The capture group match was of an empty string. 380850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Verified by testing: Perl matches succeed in this case, so 380950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // we do too. 381050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 381150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 381250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 381350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fAltInputText, groupStartIdx); 381450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 381550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 381650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool haveMatch = (opType == URX_BACKREF ? 381750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (0 == utext_compareNativeLimit(fAltInputText, groupEndIdx, fInputText, -1)) : 381850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (0 == utext_caseCompareNativeLimit(fAltInputText, groupEndIdx, fInputText, -1, U_FOLD_CASE_DEFAULT, &status))); 381950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 382050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 382150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > fActiveLimit) { 382250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 382350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match. 382450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (!haveMatch) { 382550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx == fActiveLimit) { 382650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 382750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 382850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match. 382950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 383050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 383150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 383250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 383350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_STO_INP_LOC: 383450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 383550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue >= 0 && opValue < fFrameSize); 383650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[opValue] = fp->fInputIdx; 383750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 383850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 383950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 384050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_JMPX: 384150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 384250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 384350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx += 1; 384450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t dataLoc = URX_VAL(pat[instrOperandLoc]); 384550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(dataLoc >= 0 && dataLoc < fFrameSize); 384650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t savedInputIdx = fp->fExtra[dataLoc]; 384750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(savedInputIdx <= fp->fInputIdx); 384850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (savedInputIdx < fp->fInputIdx) { 384950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = opValue; // JMP 385050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 385150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no progress in loop. 385250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 385350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 385450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 385550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 385650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LA_START: 385750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 385850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Entering a lookahead block. 385950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save Stack Ptr, Input Pos. 386050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 386150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[opValue] = fStack->size(); 386250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[opValue+1] = fp->fInputIdx; 386350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveStart = fLookStart; // Set the match region change for 386450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = fLookLimit; // transparent bounds. 386550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 386650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 386750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 386850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LA_END: 386950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 387050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Leaving a look-ahead block. 387150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // restore Stack Ptr, Input Pos to positions they had on entry to block. 387250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 387350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t stackSize = fStack->size(); 387450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newStackSize =(int32_t)fData[opValue]; 387550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(stackSize >= newStackSize); 387650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (stackSize > newStackSize) { 387750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Copy the current top frame back to the new (cut back) top frame. 387850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This makes the capture groups from within the look-ahead 387950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // expression available. 388050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; 388150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t i; 388250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (i=0; i<fFrameSize; i++) { 388350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho newFP[i] = ((int64_t *)fp)[i]; 388450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 388550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)newFP; 388650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStack->setSize(newStackSize); 388750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 388850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = fData[opValue+1]; 388950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 389050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Restore the active region bounds in the input string; they may have 389150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // been changed because of transparent bounds on a Region. 389250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveStart = fRegionStart; 389350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = fRegionLimit; 389450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 389550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 389650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 389750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_ONECHAR_I: 389850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx < fActiveLimit) { 389950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 390050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 390150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 390250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) { 390350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 390450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 390550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 390650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 390750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 390850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 390950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 391050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 391150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) { 391250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize); 391350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) { 391450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = FALSE; 391550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_PREVIOUS32(fInputText); 391650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (UTEXT_GETNATIVEINDEX(fInputText) >= backSearchIndex) { 391750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) { 391850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 391950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 392050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (c == U_SENTINEL) { 392150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 392250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 392350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_PREVIOUS32(fInputText); 392450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 392550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 392650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = FALSE; 392750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 392850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 392950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex) { 393050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 393150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 393250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // Skip the LOOP_C, we just did that 393350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 393450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 393550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 393650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 393750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 393850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 393950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 394050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 394150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 394250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_STRING_I: 394350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 394450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Test input against a literal string. 394550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Strings require two slots in the compiled pattern, one for the 394650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // offset to the string text, and one for the length. 394727f654740f2a26ad62a5c155af9199af9e69b889claireho const UCaseProps *csp = ucase_getSingleton(); 394827f654740f2a26ad62a5c155af9199af9e69b889claireho { 394950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t stringStartIdx, stringLen; 395050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho stringStartIdx = opValue; 395150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 395250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho op = (int32_t)pat[fp->fPatIdx]; 395350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; 395450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho opType = URX_TYPE(op); 395550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho opValue = URX_VAL(op); 395650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opType == URX_STRING_LEN); 395750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho stringLen = opValue; 395850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 395950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *patternChars = litText+stringStartIdx; 396050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *patternEnd = patternChars+stringLen; 396150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 396250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *foldChars = NULL; 396350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t foldOffset, foldLength; 396450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 396550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 396650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho foldOffset = foldLength = 0; 396750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = TRUE; 396850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 396950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 397050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (patternChars < patternEnd && success) { 397150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(foldOffset < foldLength) { 397250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT_UNSAFE(foldChars, foldOffset, c); 397350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 397450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 397550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c != U_SENTINEL) { 397650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT); 397750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(foldLength >= 0) { 397850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(foldLength <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle chars that fold to 0-length strings 397950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho foldOffset = 0; 398050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT_UNSAFE(foldChars, foldOffset, c); 398150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 398250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = foldLength; 398350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho foldLength = foldOffset; // to avoid reading chars from the folding buffer 398450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 398550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 398650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 398750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 398850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 398950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 399050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 399150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = FALSE; 399250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c != U_SENTINEL && (fp->fInputIdx <= fActiveLimit)) { 399350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_IS_BMP(c)) { 399450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = (*patternChars == c); 399550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternChars += 1; 399650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (patternChars+1 < patternEnd) { 399750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = (*patternChars == U16_LEAD(c) && *(patternChars+1) == U16_TRAIL(c)); 399850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternChars += 2; 399950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 400050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 400150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; // TODO: See ticket 6074 400250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 400350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 400450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 400550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!success) { 400650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 400750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex && fStack->size()) { 400850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize); 400950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) { 401050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Reset to last start point 401150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 401250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternChars = litText+stringStartIdx; 401350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 401450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Search backwards for a possible start 401550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 401650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_PREVIOUS32(fInputText); 401750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == U_SENTINEL) { 401850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 401950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 402050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT); 402150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(foldLength >= 0) { 402250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(foldLength <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle chars that fold to 0-length strings 402350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho foldOffset = 0; 402450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT_UNSAFE(foldChars, foldOffset, c); 402550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 402650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = foldLength; 402750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho foldLength = foldOffset; // to avoid reading chars from the folding buffer 402850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 402950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 403050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 403150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((U_IS_BMP(c) && *patternChars == c) || 403250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*patternChars == U16_LEAD(c) && *(patternChars+1) == U16_TRAIL(c))) { 403350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 403450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 403550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 403650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 403750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while (UTEXT_GETNATIVEINDEX(fInputText) >= backSearchIndex); 403850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 403950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // And try again 404050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 404150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 404250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 404350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex) { 404450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 404550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 404650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // Skip the LOOP_C, we just did that 404750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 404850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 404950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 405050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 405150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 405250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 405350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 405450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 405550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 405650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 405750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 405850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LB_START: 405950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 406050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Entering a look-behind block. 406150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save Stack Ptr, Input Pos. 406250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TODO: implement transparent bounds. Ticket #6067 406350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 406450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[opValue] = fStack->size(); 406550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[opValue+1] = fp->fInputIdx; 406650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Init the variable containing the start index for attempted matches. 406750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[opValue+2] = -1; 406850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save input string length, then reset to pin any matches to end at 406950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the current position. 407050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fData[opValue+3] = fActiveLimit; 407150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = fp->fInputIdx; 407250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 407350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 407450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 407550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 407650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LB_CONT: 407750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 407850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Positive Look-Behind, at top of loop checking for matches of LB expression 407950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // at all possible input starting positions. 408050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 408150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fetch the min and max possible match lengths. They are the operands 408250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // of this op in the pattern. 408350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minML = (int32_t)pat[fp->fPatIdx++]; 408450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxML = (int32_t)pat[fp->fPatIdx++]; 408550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(minML <= maxML); 408650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(minML >= 0); 408750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 408850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fetch (from data) the last input index where a match was attempted. 408950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 409050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *lbStartIdx = &fData[opValue+2]; 409150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*lbStartIdx < 0) { 409250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // First time through loop. 409350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *lbStartIdx = fp->fInputIdx - minML; 409450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 409550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 2nd through nth time through the loop. 409650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Back up start position for match by one. 409750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*lbStartIdx == 0) { 409850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*lbStartIdx)--; 409950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 410050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, *lbStartIdx); 4101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_PREVIOUS32(fInputText); 410250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText); 410350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 410450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 410550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 410650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) { 410750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We have tried all potential match starting points without 410850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // getting a match. Backtrack out, and out of the 410950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Look Behind altogether. 411050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 411150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t restoreInputLen = fData[opValue+3]; 411250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(restoreInputLen >= fActiveLimit); 411350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(restoreInputLen <= fInputLength); 411450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = restoreInputLen; 411550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 411650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 411750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 411850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save state to this URX_LB_CONT op, so failure to match will repeat the loop. 411950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (successful match will fall off the end of the loop.) 412050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx-3, status); 412150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = *lbStartIdx; 412250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 412350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 412450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 412550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LB_END: 412650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // End of a look-behind block, after a successful match. 412750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 412850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 412950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx != fActiveLimit) { 413050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The look-behind expression matched, but the match did not 413150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // extend all the way to the point that we are looking behind from. 413250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // FAIL out of here, which will take us back to the LB_CONT, which 413350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // will retry the match starting at another position or fail 413450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the look-behind altogether, whichever is appropriate. 413550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 413650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 413750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 413850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 413950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Look-behind match is good. Restore the orignal input string length, 414050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // which had been truncated to pin the end of the lookbehind match to the 414150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // position being looked-behind. 414250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t originalInputLen = fData[opValue+3]; 414350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(originalInputLen >= fActiveLimit); 414450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(originalInputLen <= fInputLength); 414550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = originalInputLen; 414650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 414750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 414850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 414950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 415050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LBN_CONT: 415150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 415250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Negative Look-Behind, at top of loop checking for matches of LB expression 415350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // at all possible input starting positions. 415450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 415550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fetch the extra parameters of this op. 415650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minML = (int32_t)pat[fp->fPatIdx++]; 415750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxML = (int32_t)pat[fp->fPatIdx++]; 415850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t continueLoc = (int32_t)pat[fp->fPatIdx++]; 415950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continueLoc = URX_VAL(continueLoc); 416050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(minML <= maxML); 416150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(minML >= 0); 416250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(continueLoc > fp->fPatIdx); 416350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 416450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fetch (from data) the last input index where a match was attempted. 416550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 416650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *lbStartIdx = &fData[opValue+2]; 416750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*lbStartIdx < 0) { 416850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // First time through loop. 416950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *lbStartIdx = fp->fInputIdx - minML; 417050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 417150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // 2nd through nth time through the loop. 417250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Back up start position for match by one. 417350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*lbStartIdx == 0) { 417450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*lbStartIdx)--; 417550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 417650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, *lbStartIdx); 4177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho (void)UTEXT_PREVIOUS32(fInputText); 417850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText); 417950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 418050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 418150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 418250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) { 418350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We have tried all potential match starting points without 418450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // getting a match, which means that the negative lookbehind as 418550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // a whole has succeeded. Jump forward to the continue location 418650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t restoreInputLen = fData[opValue+3]; 418750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(restoreInputLen >= fActiveLimit); 418850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(restoreInputLen <= fInputLength); 418950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = restoreInputLen; 419050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx = continueLoc; 419150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 419250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 419350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 419450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save state to this URX_LB_CONT op, so failure to match will repeat the loop. 419550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (successful match will cause a FAIL out of the loop altogether.) 419650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx-4, status); 419750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = *lbStartIdx; 419850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 419950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 420050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 420150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LBN_END: 420250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // End of a negative look-behind block, after a successful match. 420350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 420450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 420550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx != fActiveLimit) { 420650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The look-behind expression matched, but the match did not 420750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // extend all the way to the point that we are looking behind from. 420850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // FAIL out of here, which will take us back to the LB_CONT, which 420950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // will retry the match starting at another position or succeed 421050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the look-behind altogether, whichever is appropriate. 421150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 421250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 421350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 421450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 421550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Look-behind expression matched, which means look-behind test as 421650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // a whole Fails 421750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 421850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Restore the orignal input string length, which had been truncated 421950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // inorder to pin the end of the lookbehind match 422050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // to the position being looked-behind. 422150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t originalInputLen = fData[opValue+3]; 422250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(originalInputLen >= fActiveLimit); 422350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(originalInputLen <= fInputLength); 422450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fActiveLimit = originalInputLen; 422550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 422650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Restore original stack position, discarding any state saved 422750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // by the successful pattern match. 422850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 422950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newStackSize = (int32_t)fData[opValue]; 423050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fStack->size() > newStackSize); 423150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fStack->setSize(newStackSize); 423250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 423350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // FAIL, which will take control back to someplace 423450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // prior to entering the look-behind test. 423550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 423650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 423750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 423850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 423950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 424050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LOOP_SR_I: 424150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Loop Initialization for the optimized implementation of 424250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // [some character set]* 424350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This op scans through all matching input. 424450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The following LOOP_C op emulates stack unwinding if the following pattern fails. 424550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 424650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue > 0 && opValue < sets->size()); 424750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fSets8[opValue]; 424850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); 424950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 425050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Loop through input, until either the input is exhausted or 425150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // we reach a character that is not a member of the set. 425250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t ix = fp->fInputIdx; 425350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, ix); 425450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 425550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (ix >= fActiveLimit) { 425650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 425750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 425850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 425950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 426050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c<256) { 426150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c) == FALSE) { 426250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 426350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 426450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 426550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c) == FALSE) { 426650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 426750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 426850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 426950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ix = UTEXT_GETNATIVEINDEX(fInputText); 427050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 427150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 427250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If there were no matching characters, skip over the loop altogether. 427350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The loop doesn't run at all, a * op always succeeds. 427450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (ix == fp->fInputIdx) { 427550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // skip the URX_LOOP_C op. 427650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 427750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 427850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 427950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Peek ahead in the compiled pattern, to the URX_LOOP_C that 428050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // must follow. It's operand is the stack location 428150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // that holds the starting input index for the match of this [set]* 428250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t loopcOp = (int32_t)pat[fp->fPatIdx]; 428350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C); 428450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t stackLoc = URX_VAL(loopcOp); 428550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize); 428650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[stackLoc] = fp->fInputIdx; 428750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 428850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho backSearchIndex = fp->fInputIdx; 428950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 429050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = ix; 429150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 429250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save State to the URX_LOOP_C op that follows this one, 429350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // so that match failures in the following code will return to there. 429450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Then bump the pattern idx so the LOOP_C is skipped on the way out of here. 429550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 429650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; 429750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 429850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 429950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 430050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 430150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LOOP_DOT_I: 430250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Loop Initialization for the optimized implementation of .* 430350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // This op scans through all remaining input. 430450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The following LOOP_C op emulates stack unwinding if the following pattern fails. 430550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 430650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Loop through input until the input is exhausted (we reach an end-of-line) 430750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // In DOTALL mode, we can just go straight to the end of the input. 430850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t ix; 430950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((opValue & 1) == 1) { 431050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Dot-matches-All mode. Jump straight to the end of the string. 431150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ix = fActiveLimit; 431250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 431350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 431450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // NOT DOT ALL mode. Line endings do not match '.' 431550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Scan forward until a line ending or end of input. 431650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ix = fp->fInputIdx; 431750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, ix); 431850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 431950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (ix >= fActiveLimit) { 432050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 432150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 432250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 432350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = UTEXT_NEXT32(fInputText); 432427f654740f2a26ad62a5c155af9199af9e69b889claireho if ((c & 0x7f) <= 0x29) { // Fast filter of non-new-line-s 432527f654740f2a26ad62a5c155af9199af9e69b889claireho if ((c == 0x0a) || // 0x0a is newline in both modes. 432627f654740f2a26ad62a5c155af9199af9e69b889claireho (((opValue & 2) == 0) && // IF not UNIX_LINES mode 432727f654740f2a26ad62a5c155af9199af9e69b889claireho (c<=0x0d && c>=0x0a)) || c==0x85 ||c==0x2028 || c==0x2029) { 432850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // char is a line ending. Exit the scanning loop. 432950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 433050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 433150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 433250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ix = UTEXT_GETNATIVEINDEX(fInputText); 433350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 433450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 433550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 433650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If there were no matching characters, skip over the loop altogether. 433750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The loop doesn't run at all, a * op always succeeds. 433850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (ix == fp->fInputIdx) { 433950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // skip the URX_LOOP_C op. 434050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 434150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 434250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 434350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Peek ahead in the compiled pattern, to the URX_LOOP_C that 434450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // must follow. It's operand is the stack location 434550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // that holds the starting input index for the match of this .* 434650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t loopcOp = (int32_t)pat[fp->fPatIdx]; 434750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C); 434850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t stackLoc = URX_VAL(loopcOp); 434950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize); 435050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fExtra[stackLoc] = fp->fInputIdx; 435150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 435250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho backSearchIndex = fp->fInputIdx; 435350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 435450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = ix; 435550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 435650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Save State to the URX_LOOP_C op that follows this one, 435750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // so that match failures in the following code will return to there. 435850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Then bump the pattern idx so the LOOP_C is skipped on the way out of here. 435950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 436050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; 436150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 436250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 436350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 436450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 436550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_LOOP_C: 436650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 436750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue>=0 && opValue<fFrameSize); 436850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho backSearchIndex = fp->fExtra[opValue]; 436950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(backSearchIndex <= fp->fInputIdx); 437050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (backSearchIndex == fp->fInputIdx) { 437150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We've backed up the input idx to the point that the loop started. 437250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The loop is done. Leave here without saving state. 437350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Subsequent failures won't come back here. 437450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 437550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 437650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Set up for the next iteration of the loop, with input index 437750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // backed up by one from the last time through, 437850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // and a state save to this instruction in case the following code fails again. 437950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (We're going backwards because this loop emulates stack unwinding, not 438050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the initial scan forward.) 438150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fp->fInputIdx > 0); 438250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 438350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 prevC = UTEXT_PREVIOUS32(fInputText); 438450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 438550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 438650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 twoPrevC = UTEXT_PREVIOUS32(fInputText); 438750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (prevC == 0x0a && 438850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx > backSearchIndex && 438950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho twoPrevC == 0x0d) { 439050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t prevOp = (int32_t)pat[fp->fPatIdx-2]; 439150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_TYPE(prevOp) == URX_LOOP_DOT_I) { 439250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // .*, stepping back over CRLF pair. 439350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); 439450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 439550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 439850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx-1, status); 439950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 440050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 4401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 440350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 440450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho default: 440550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Trouble. The compiled pattern contains an entry with an 440650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // unrecognized type tag. 440750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(FALSE); 4408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 440950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 441050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 441150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isMatch = FALSE; 4412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 441650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehobreakFromLoop: 441750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatch = isMatch; 441850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (isMatch) { 441950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fLastMatchEnd = fMatchEnd; 442050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatchStart = startIdx; 442150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fMatchEnd = fp->fInputIdx; 442250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fTraceDebug) { 442350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_RUN_DEBUG_PRINTF(("Match. start=%d end=%d\n\n", fMatchStart, fMatchEnd)); 4424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 442650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho else 442750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 442850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fTraceDebug) { 442950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_RUN_DEBUG_PRINTF(("No match\n\n")); 4430c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 443350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fFrame = fp; // The active stack frame when the engine stopped. 443450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Contains the capture group results that we need to 443550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // access later. 443650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 4437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 4438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 4440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 4441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 444250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// MatchChunkAt This is the actual matching engine. Like MatchAt, but with the 444350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// assumption that the entire string is available in the UText's 444450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// chunk buffer. For now, that means we can use int32_t indexes, 444550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// except for anything that needs to be saved (like group starts 444650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// and ends). 4447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 4448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// startIdx: begin matching a this index. 4449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// toEnd: if true, match must extend to end of the input region 4450c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// 4451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------------- 445250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status) { 4453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isMatch = FALSE; // True if the we have a match. 445450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 445550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t backSearchIndex = INT32_MAX; // used after greedy single-character matches for searching backwards 4456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t op; // Operation from the compiled pattern, split into 4458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t opType; // the opcode 4459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t opValue; // and the operand value. 446050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 446150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifdef REGEX_RUN_DEBUG 4462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fTraceDebug) 4463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 446450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("MatchAt(startIdx=%ld)\n", startIdx); 4465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("Original Pattern: "); 446650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = utext_next32From(fPattern->fPattern, 0); 446750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (c != U_SENTINEL) { 446850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c<32 || c>256) { 446950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = '.'; 447050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 447150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REGEX_DUMP_DEBUG_PRINTF(("%c", c)); 447250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 447350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fPattern->fPattern); 4474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("\n"); 4476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("Input String: "); 447750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = utext_next32From(fInputText, 0); 447850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (c != U_SENTINEL) { 4479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c<32 || c>256) { 4480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = '.'; 4481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%c", c); 448350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 448450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = UTEXT_NEXT32(fInputText); 4485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("\n"); 4487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("\n"); 4488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 448950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 449050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 4492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 4493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 449450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Cache frequently referenced items from the compiled pattern 4496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 449750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *pat = fPattern->fCompiledPat->getBuffer(); 449850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *litText = fPattern->fLiteralText.getBuffer(); 4500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector *sets = fPattern->fSets; 450150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 450250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *inputBuf = fInputText->chunkContents; 450350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFrameSize = fPattern->fFrameSize; 4505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REStackFrame *fp = resetStack(); 450650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = 0; 4508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fInputIdx = startIdx; 450950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Zero out the pattern's static data 4511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 4512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i<fPattern->fDataSize; i++) { 4513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData[i] = 0; 4514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 451550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Main loop for interpreting the compiled pattern. 4518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // One iteration of the loop per pattern operation performed. 4519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 4521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0 4522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (_heapchk() != _HEAPOK) { 4523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "Heap Trouble\n"); 4524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 452650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 452750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho op = (int32_t)pat[fp->fPatIdx]; 4528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru opType = URX_TYPE(op); 4529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru opValue = URX_VAL(op); 453050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifdef REGEX_RUN_DEBUG 4531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fTraceDebug) { 453250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx); 453350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho printf("inputIdx=%d inputChar=%x sp=%3d activeLimit=%d ", fp->fInputIdx, 453450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit); 4535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fPattern->dumpOp(fp->fPatIdx); 4536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 453750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 4538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx++; 453950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (opType) { 454150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 454250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_NOP: 4544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 454550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 454650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKTRACK: 4548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Force a backtrack. In some circumstances, the pattern compiler 4549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // will notice that the pattern can't possibly match anything, and will 4550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // emit one of these at that point. 4551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 455350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 455450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_ONECHAR: 4556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx < fActiveLimit) { 455750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 4558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 4559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c == opValue) { 4560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 456650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 456750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) { 456850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize); 456950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) { 457050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t reverseIndex = fp->fInputIdx; 457150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 457250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 457350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, backSearchIndex, reverseIndex, c); 457450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == opValue) { 457550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 457650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 457750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while (reverseIndex > backSearchIndex); 457850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == opValue) { 457950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = FALSE; 458050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 458150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = reverseIndex; 458250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex) { 458350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 458450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 458550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // Skip the LOOP_C, we just did that 458650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 458750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 458850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 458950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 459050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 4591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 459250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 459350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 459450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 459550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STRING: 4597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 4598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test input against a literal string. 4599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Strings require two slots in the compiled pattern, one for the 4600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // offset to the string text, and one for the length. 4601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t stringStartIdx = opValue; 4602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t stringLen; 460350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 460450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho op = (int32_t)pat[fp->fPatIdx]; // Fetch the second operand 4605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx++; 4606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru opType = URX_TYPE(op); 4607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stringLen = URX_VAL(op); 4608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opType == URX_STRING_LEN); 4609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(stringLen >= 2); 461050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx + stringLen > fActiveLimit) { 4612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No match. String is longer than the remaining input text. 4613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; // TODO: See ticket 6074 4614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 461750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar * pInp = inputBuf + fp->fInputIdx; 4619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar * pPat = litText+stringStartIdx; 4620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar * pEnd = pInp + stringLen; 462150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = FALSE; 4622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 4623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*pInp == *pPat) { 4624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pInp++; 4625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pPat++; 4626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pInp == pEnd) { 4627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Successful Match. 462850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 4629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 4632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Match failed. 4633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 463650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 463750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 463850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx += stringLen; 463950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 464050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 464150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex && fStack->size()) { 464250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize); 464350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) { 464450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Reset to last start point 464550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t reverseIndex = fp->fInputIdx; 464650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 464750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pPat = litText+stringStartIdx; 464850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 464950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Search backwards for a possible start 465050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 465150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, backSearchIndex, reverseIndex, c); 465250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((U_IS_BMP(c) && *pPat == c) || 465350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*pPat == U16_LEAD(c) && *(pPat+1) == U16_TRAIL(c))) { 465450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 465550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 465650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 465750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while (reverseIndex > backSearchIndex); 465850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 465950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // And try again 466050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 466150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 466250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = reverseIndex; 466350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex) { 466450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 466550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 466650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // Skip the LOOP_C, we just did that 466750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 466850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 466950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 467050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 467150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 467250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 467350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 467650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 467750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STATE_SAVE: 4679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, opValue, status); 4680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 468150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 468250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_END: 4684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The match loop will exit via this path on a successful match, 4685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // when we reach the end of the pattern. 4686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (toEnd && fp->fInputIdx != fActiveLimit) { 4687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // The pattern matched, but not to the end of input. Try some more. 4688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isMatch = TRUE; 4692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto breakFromLoop; 469350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 469450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Start and End Capture stack frame variables are laid out out like this: 4695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // fp->fExtra[opValue] - The start of a completed capture group 4696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // opValue+1 - The end of a completed capture group 4697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // opValue+2 - the start of a capture group whose end 4698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // has not yet been reached (and might not ever be). 4699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_START_CAPTURE: 4700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(opValue >= 0 && opValue < fFrameSize-3); 4701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[opValue+2] = fp->fInputIdx; 4702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 470350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 470450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_END_CAPTURE: 4706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(opValue >= 0 && opValue < fFrameSize-3); 4707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(fp->fExtra[opValue+2] >= 0); // Start pos for this group must be set. 4708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[opValue] = fp->fExtra[opValue+2]; // Tentative start becomes real. 4709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[opValue+1] = fp->fInputIdx; // End position 4710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(fp->fExtra[opValue] <= fp->fExtra[opValue+1]); 4711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 471250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 471350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOLLAR: // $, test for End of line 471550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // or for position before new line at end of input 4716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx < fAnchorLimit-2) { 4717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We are no where near the end of input. Fail. 4718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This is the common case. Keep it first. 4719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fAnchorLimit) { 4723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We really are at the end of input. Success. 4724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRequireEnd = TRUE; 4726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 472850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we are positioned just before a new-line that is located at the 4730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // end of input, succeed. 4731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx == fAnchorLimit-1) { 473250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 473350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_GET(inputBuf, fAnchorStart, fp->fInputIdx, fAnchorLimit, c); 473450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((c>=0x0a && c<=0x0d) || c==0x85 || c==0x2028 || c==0x2029) { 4736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp->fInputIdx-1]==0x0d)) { 4737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At new-line at end of input. Success 4738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRequireEnd = TRUE; 4740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 474350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (fp->fInputIdx == fAnchorLimit-2 && 474450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho inputBuf[fp->fInputIdx]==0x0d && inputBuf[fp->fInputIdx+1]==0x0a) { 4745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRequireEnd = TRUE; 4747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // At CR/LF at end of input. Success 4748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 474950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 475150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 475350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 475450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 475550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOLLAR_D: // $, test for End of Line, in UNIX_LINES mode. 4756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fAnchorLimit-1) { 4757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Either at the last character of input, or off the end. 4758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx == fAnchorLimit-1) { 4759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // At last char of input. Success if it's a new line. 476050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (inputBuf[fp->fInputIdx] == 0x0a) { 4761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRequireEnd = TRUE; 4763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 4766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Off the end of input. Success. 4767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRequireEnd = TRUE; 4769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 4770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 477250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Not at end of input. Back-track out. 4774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 477650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 477750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 477850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOLLAR_M: // $, test for End of line in multi-line mode 477950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 478050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fAnchorLimit) { 478150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We really are at the end of input. Success. 478250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 478350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; 478450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 478550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 478650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we are positioned just before a new-line, succeed. 478750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // It makes no difference where the new-line is within the input. 478850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c = inputBuf[fp->fInputIdx]; 478950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((c>=0x0a && c<=0x0d) || c==0x85 ||c==0x2028 || c==0x2029) { 479050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // At a line end, except for the odd chance of being in the middle of a CR/LF sequence 479150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // In multi-line mode, hitting a new-line just before the end of input does not 479250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // set the hitEnd or requireEnd flags 479350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp->fInputIdx-1]==0x0d)) { 4794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 479550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 479650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 479750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // not at a new line. Fail. 479850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 479950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 480050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 480150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 480250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 480350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_DOLLAR_MD: // $, test for End of line in multi-line and UNIX_LINES mode 480450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 480550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fAnchorLimit) { 480650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We really are at the end of input. Success. 480750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 480850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fRequireEnd = TRUE; // Java set requireEnd in this case, even though 480950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; // adding a new-line would not lose the match. 481050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 481150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // If we are not positioned just before a new-line, the test fails; backtrack out. 481250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // It makes no difference where the new-line is within the input. 481350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (inputBuf[fp->fInputIdx] != 0x0a) { 481450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 481550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 481650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 481750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 481850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 481950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 482050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CARET: // ^, test for start of line 4821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx != fAnchorStart) { 4822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 4824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 482550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 482650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 482750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CARET_M: // ^, test for start of line in mulit-line mode 482850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 482950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx == fAnchorStart) { 483050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We are at the start input. Success. 483150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 483250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 483350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check whether character just before the current pos is a new-line 483450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // unless we are at the end of input 483550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c = inputBuf[fp->fInputIdx - 1]; 483650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((fp->fInputIdx < fAnchorLimit) && 483750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) { 483850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // It's a new-line. ^ is true. Success. 483950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // TODO: what should be done with positions between a CR and LF? 484050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 484150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 484250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Not at the start of a line. Fail. 484350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 484450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 484550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 484650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 484750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 484850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho case URX_CARET_M_UNIX: // ^, test for start of line in mulit-line + Unix-line mode 484950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 485050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fp->fInputIdx >= fAnchorStart); 485150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx <= fAnchorStart) { 485250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We are at the start input. Success. 485350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 485450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 485550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Check whether character just before the current pos is a new-line 485650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(fp->fInputIdx <= fAnchorLimit); 485750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar c = inputBuf[fp->fInputIdx - 1]; 485850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c != 0x0a) { 485950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Not at the start of a line. Back-track out. 486050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 486150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 486250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 486350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 486450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_B: // Test for word boundaries 4866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 486750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = isChunkWordBoundary((int32_t)fp->fInputIdx); 4868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru success ^= (opValue != 0); // flip sense for \B 4869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!success) { 4870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 487450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 487550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_BU: // Test for word boundaries, Unicode-style 4877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 4878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool success = isUWordBoundary(fp->fInputIdx); 4879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru success ^= (opValue != 0); // flip sense for \B 4880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!success) { 4881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 488550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 488650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_D: // Test for decimal digit 4888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 4889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fActiveLimit) { 4890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 4891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 4893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 489450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 489550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 489650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 4897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int8_t ctype = u_charType(c); // TODO: make a unicode set for this. Will be faster. 4898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool success = (ctype == U_DECIMAL_DIGIT_NUMBER); 4899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru success ^= (opValue != 0); // flip sense for \D 490050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!success) { 4901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 490550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 490650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_G: // Test for position at end of previous match 490827f654740f2a26ad62a5c155af9199af9e69b889claireho if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->fInputIdx==fActiveStart))) { 4909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 4910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 4911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 491250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 491350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKSLASH_X: 491550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Match a Grapheme, as defined by Unicode TR 29. 491650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Differs slightly from Perl, which consumes combining marks independently 491750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // of context. 491850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 4919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 492050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Fail if at end of input 492150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 492250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 492350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 492450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 492550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 492750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Examine (and consume) the current char. 492850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Dispatch into a little state machine, based on the char. 492950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 493050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 493150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet **sets = fPattern->fStaticSets; 493250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_NORMAL]->contains(c)) goto GC_Extend; 493350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_CONTROL]->contains(c)) goto GC_Control; 493450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_L]->contains(c)) goto GC_L; 493550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LV]->contains(c)) goto GC_V; 493650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LVT]->contains(c)) goto GC_T; 493750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_V]->contains(c)) goto GC_V; 493850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_T]->contains(c)) goto GC_T; 493950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 4940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_L: 494450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 494550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 494650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_L]->contains(c)) goto GC_L; 494750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LV]->contains(c)) goto GC_V; 494850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_LVT]->contains(c)) goto GC_T; 494950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_V]->contains(c)) goto GC_V; 495050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, 0, fp->fInputIdx, c); 495150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 4952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_V: 495450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 495550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 495650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_V]->contains(c)) goto GC_V; 495750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_T]->contains(c)) goto GC_T; 495850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, 0, fp->fInputIdx, c); 495950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 4960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_T: 496250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) goto GC_Done; 496350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 496450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_T]->contains(c)) goto GC_T; 496550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, 0, fp->fInputIdx, c); 496650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Extend; 4967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_Extend: 496950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Combining characters are consumed here 497050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for (;;) { 497150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 497250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 4973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 497450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 497550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (sets[URX_GC_EXTEND]->contains(c) == FALSE) { 497650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_BACK_1(inputBuf, 0, fp->fInputIdx); 497750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 497850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 497950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 498050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho goto GC_Done; 4981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_Control: 498350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Most control chars stand alone (don't combine with combining chars), 498450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // except for that CR/LF sequence is a single grapheme cluster. 498550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c == 0x0d && fp->fInputIdx < fActiveLimit && inputBuf[fp->fInputIdx] == 0x0a) { 498650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx++; 498750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_Done: 499050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 499150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 4992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 499350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 499450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 499550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 499650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 499750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case URX_BACKSLASH_Z: // Test for end of Input 5000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx < fAnchorLimit) { 5001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fRequireEnd = TRUE; 5005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 500750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 500850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 500950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STATIC_SETREF: 5011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test input character against one of the predefined sets 5013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (Word Characters, for example) 5014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The high bit of the op value is a flag for the match polarity. 5015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 0: success if input char is in set. 5016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1: success if input char is not in set. 5017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fActiveLimit) { 5018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 502250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool success = ((opValue & URX_NEG_SET) == URX_NEG_SET); 5024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru opValue &= ~URX_NEG_SET; 5025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue > 0 && opValue < URX_LAST_SET); 502650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 502750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 5028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 5029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < 256) { 5030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 5031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s8->contains(c)) { 5032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru success = !success; 5033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet *s = fPattern->fStaticSets[opValue]; 5036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s->contains(c)) { 5037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru success = !success; 5038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!success) { 504150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 504250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) { 504350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize); 504450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) { 504550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Try to find it, backwards 504650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t reverseIndex = fp->fInputIdx; 504750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_BACK_1(inputBuf, backSearchIndex, reverseIndex); // skip the first character we tried 504850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = ((opValue & URX_NEG_SET) == URX_NEG_SET); // reset 504950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 505050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, backSearchIndex, reverseIndex, c); 505150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c < 256) { 505250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 505350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c)) { 505450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = !success; 505550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 505650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 505750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeSet *s = fPattern->fStaticSets[opValue]; 505850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c)) { 505950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = !success; 506050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 506150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 506250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while (reverseIndex > backSearchIndex && !success); 506350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 506450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 506550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 506650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = reverseIndex; 506750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex) { 506850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 506950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 507050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // Skip the LOOP_C, we just did that 507150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 507250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 507350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 507450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 507550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 5076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 508150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STAT_SETREF_N: 5083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test input character for NOT being a member of one of 5085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the predefined sets (Word Characters, for example) 5086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fActiveLimit) { 5087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 509150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue > 0 && opValue < URX_LAST_SET); 509350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 5095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 5096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < 256) { 5097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 5098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s8->contains(c) == FALSE) { 5099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet *s = fPattern->fStaticSets[opValue]; 5103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s->contains(c) == FALSE) { 5104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 510850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 510950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) { 511050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize); 511150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) { 511250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Try to find it, backwards 511350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t reverseIndex = fp->fInputIdx; 511450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_BACK_1(inputBuf, backSearchIndex, reverseIndex); // skip the first character we tried 511550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = FALSE; 511650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 511750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, backSearchIndex, reverseIndex, c); 511850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c < 256) { 511950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue]; 512050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c) == FALSE) { 512150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 512250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 512350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 512450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 512550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeSet *s = fPattern->fStaticSets[opValue]; 512650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c) == FALSE) { 512750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 512850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 512950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 513050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 513150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while (reverseIndex > backSearchIndex); 513250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 513350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 513450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 513550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = reverseIndex; 513650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex) { 513750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 513850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 513950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // Skip the LOOP_C, we just did that 514050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 514150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 514250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 514350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 514450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 5145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 514950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_SETREF: 515150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { 515250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx >= fActiveLimit) { 515350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; 515450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 5156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 515750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 515850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opValue > 0 && opValue < sets->size()); 515950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 516050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // There is input left. Pick up one char and test it for set membership. 516150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 516250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 516350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c<256) { 516450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fSets8[opValue]; 516550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c)) { 516650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The character is in the set. A Match. 516750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 516850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 516950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 517050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); 517150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c)) { 517250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The character is in the set. A Match. 517350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 517450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 517550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 517650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 517750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the character wasn't in the set. 517850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 517950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) { 518050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize); 518150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) { 518250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Try to find it, backwards 518350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t reverseIndex = fp->fInputIdx; 518450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_BACK_1(inputBuf, backSearchIndex, reverseIndex); // skip the first character we tried 518550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = FALSE; 518650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 518750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, backSearchIndex, reverseIndex, c); 518850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (c < 256) { 518950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Regex8BitSet *s8 = &fPattern->fSets8[opValue]; 519050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s8->contains(c)) { 519150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 519250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 519350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 519450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 519550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); 519650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (s->contains(c)) { 519750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 519850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 519950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 520050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 520150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while (reverseIndex > backSearchIndex); 520250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 520350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 520450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 520550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = reverseIndex; 520650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > reverseIndex) { 520750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 520850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 520950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // Skip the LOOP_C, we just did that 521050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 521150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 521250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 5213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 521450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 521550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 521850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 521950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOTANY: 5221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // . matches anything, but stops at end-of-line. 5223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fActiveLimit) { 5224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At end of input. Match failed. Backtrack out. 5225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 522950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // There is input left. Advance over one char, unless we've hit end-of-line 523150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 5232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 5233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (((c & 0x7f) <= 0x29) && // First quickly bypass as many chars as possible 5234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) { 5235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // End of line in normal mode. . does not match. 523650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 524150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 524250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_DOTANY_ALL: 5244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 524550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // . in dot-matches-all (including new lines) mode 5246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fActiveLimit) { 5247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At end of input. Match failed. Backtrack out. 5248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 525250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // There is input left. Advance over one char, except if we are 5254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // at a cr/lf, advance over both of them. 5255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 5256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 5257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c==0x0d && fp->fInputIdx < fActiveLimit) { 5258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // In the case of a CR/LF, we need to advance over both. 525950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (inputBuf[fp->fInputIdx] == 0x0a) { 526050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_FWD_1(inputBuf, fp->fInputIdx, fActiveLimit); 5261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 526550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 526650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru case URX_DOTANY_UNIX: 5268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // '.' operator, matches all, but stops at end-of-line. 5270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // UNIX_LINES mode, so 0x0a is the only recognized line ending. 5271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx >= fActiveLimit) { 5272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // At end of input. Match failed. Backtrack out. 5273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 527750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // There is input left. Advance over one char, unless we've hit end-of-line 527950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 5280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 5281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (c == 0x0a) { 5282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // End of line in normal mode. '.' does not match the \n 5283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 528750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 528850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP: 5290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = opValue; 5291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 529250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_FAIL: 5294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isMatch = FALSE; 5295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto breakFromLoop; 529650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP_SAV: 5298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue < fPattern->fCompiledPat->size()); 5299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx, status); // State save to loc following current 5300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp->fPatIdx = opValue; // Then JMP. 5301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 530250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMP_SAV_X: 5304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This opcode is used with (x)+, when x can match a zero length string. 5305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Same as JMP_SAV, except conditional on the match having made forward progress. 5306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Destination of the JMP must be a URX_STO_INP_LOC, from which we get the 5307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // data address of the input position at the start of the loop. 5308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue > 0 && opValue < fPattern->fCompiledPat->size()); 531050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t stoOp = (int32_t)pat[opValue-1]; 5311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(URX_TYPE(stoOp) == URX_STO_INP_LOC); 5312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t frameLoc = URX_VAL(stoOp); 5313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(frameLoc >= 0 && frameLoc < fFrameSize); 531450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t prevInputIdx = (int32_t)fp->fExtra[frameLoc]; 5315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(prevInputIdx <= fp->fInputIdx); 5316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (prevInputIdx < fp->fInputIdx) { 5317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The match did make progress. Repeat the loop. 5318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx, status); // State save to loc following current 5319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = opValue; 5320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[frameLoc] = fp->fInputIdx; 5321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If the input position did not advance, we do nothing here, 5323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // execution will fall out of the loop. 5324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 532650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_INIT: 5328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(opValue >= 0 && opValue < fFrameSize-2); 5330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[opValue] = 0; // Set the loop counter variable to zero 533150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Pick up the three extra operands that CTR_INIT has, and 5333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // skip the pattern location counter past 533450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 5335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx += 3; 5336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t loopLoc = URX_VAL(pat[instrOperandLoc]); 533750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[instrOperandLoc+1]; 533850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[instrOperandLoc+2]; 5339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(minCount>=0); 5340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(maxCount>=minCount || maxCount==-1); 5341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(loopLoc>fp->fPatIdx); 534250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (minCount == 0) { 5344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, loopLoc+1, status); 5345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxCount == 0) { 5347c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 535150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_LOOP: 5353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2); 535550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t initOp = (int32_t)pat[opValue]; 5356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT); 535750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)]; 535850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[opValue+2]; 535950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[opValue+3]; 536050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Increment the counter. Note: we DIDN'T worry about counter 5361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // overflow, since the data comes from UnicodeStrings, which 536250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // stores its length in an int32_t. Do we have to think about 536350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // this now that we're using UText? Probably not, since the length 536450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // in UChar32s is still an int32_t. 5365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*pCounter)++; 5366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(*pCounter > 0); 536750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((uint64_t)*pCounter >= (uint32_t)maxCount) { 5368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(*pCounter == maxCount || maxCount == -1); 5369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*pCounter >= minCount) { 5372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx, status); 5373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = opValue + 4; // Loop back. 5375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 537750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_INIT_NG: 5379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Initialize a non-greedy loop 5381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(opValue >= 0 && opValue < fFrameSize-2); 5382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[opValue] = 0; // Set the loop counter variable to zero 538350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Pick up the three extra operands that CTR_INIT has, and 5385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // skip the pattern location counter past 538650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 5387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx += 3; 5388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t loopLoc = URX_VAL(pat[instrOperandLoc]); 538950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[instrOperandLoc+1]; 539050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[instrOperandLoc+2]; 5391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(minCount>=0); 5392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(maxCount>=minCount || maxCount==-1); 5393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(loopLoc>fp->fPatIdx); 539450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (minCount == 0) { 5396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxCount != 0) { 5397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx, status); 5398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = loopLoc+1; // Continue with stuff after repeated block 5400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 540350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_CTR_LOOP_NG: 5405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Non-greedy {min, max} loops 5407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2); 540850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t initOp = (int32_t)pat[opValue]; 5409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT_NG); 541050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)]; 541150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minCount = (int32_t)pat[opValue+2]; 541250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxCount = (int32_t)pat[opValue+3]; 541350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Increment the counter. Note: we DIDN'T worry about counter 5414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // overflow, since the data comes from UnicodeStrings, which 541550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // stores its length in an int32_t. Do we have to think about 541650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // this now that we're using UText? Probably not, since the length 541750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // in UChar32s is still an int32_t. 5418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*pCounter)++; 5419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(*pCounter > 0); 542050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 542150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((uint64_t)*pCounter >= (uint32_t)maxCount) { 5422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The loop has matched the maximum permitted number of times. 5423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Break out of here with no action. Matching will 5424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // continue with the following pattern. 5425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(*pCounter == maxCount || maxCount == -1); 5426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 542850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*pCounter < minCount) { 5430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We haven't met the minimum number of matches yet. 5431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Loop back for another one. 5432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = opValue + 4; // Loop back. 5433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We do have the minimum number of matches. 5435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Fall into the following pattern, but first do 5436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a state save to the top of the loop, so that a failure 5437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // in the following pattern will try another iteration of the loop. 5438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, opValue + 4, status); 5439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 544250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STO_SP: 5444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize); 5445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData[opValue] = fStack->size(); 5446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 544750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LD_SP: 5449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize); 545150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newStackSize = (int32_t)fData[opValue]; 5452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(newStackSize <= fStack->size()); 545350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; 545450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (newFP == (int64_t *)fp) { 5455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 5458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<fFrameSize; i++) { 545950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho newFP[i] = ((int64_t *)fp)[i]; 5460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp = (REStackFrame *)newFP; 5462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStack->setSize(newStackSize); 5463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 546550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKREF: 5467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_BACKREF_I: 5468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5469c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(opValue < fFrameSize); 547050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t groupStartIdx = fp->fExtra[opValue]; 547150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t groupEndIdx = fp->fExtra[opValue+1]; 5472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(groupStartIdx <= groupEndIdx); 547350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t len = groupEndIdx-groupStartIdx; 5474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (groupStartIdx < 0) { 5475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This capture group has not participated in the match thus far, 5476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match. 5477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (len == 0) { 5480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The capture group match was of an empty string. 5481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Verified by testing: Perl matches succeed in this case, so 5482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we do too. 5483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool haveMatch = FALSE; 5487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx + len <= fActiveLimit) { 5488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (opType == URX_BACKREF) { 548950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (u_strncmp(inputBuf+groupStartIdx, inputBuf+fp->fInputIdx, (int32_t)len) == 0) { 5490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru haveMatch = TRUE; 5491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u_strncasecmp(inputBuf+groupStartIdx, inputBuf+fp->fInputIdx, 549450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (int32_t)len, U_FOLD_CASE_DEFAULT) == 0) { 5495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru haveMatch = TRUE; 5496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // TODO: probably need to do a partial string comparison, and only 5500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // set HitEnd if the available input matched. Ticket #6074 5501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (haveMatch) { 5504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fInputIdx += len; // Match. Advance current input position. 5505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no match. 5507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 551050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STO_INP_LOC: 5512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(opValue >= 0 && opValue < fFrameSize); 5514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[opValue] = fp->fInputIdx; 5515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 551750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_JMPX: 5519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 552050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t instrOperandLoc = (int32_t)fp->fPatIdx; 5521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx += 1; 5522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t dataLoc = URX_VAL(pat[instrOperandLoc]); 5523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(dataLoc >= 0 && dataLoc < fFrameSize); 552450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t savedInputIdx = (int32_t)fp->fExtra[dataLoc]; 5525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(savedInputIdx <= fp->fInputIdx); 5526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (savedInputIdx < fp->fInputIdx) { 5527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = opValue; // JMP 5528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 552950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); // FAIL, no progress in loop. 5530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 553350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LA_START: 5535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Entering a lookahead block. 5537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save Stack Ptr, Input Pos. 5538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 5539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData[opValue] = fStack->size(); 5540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData[opValue+1] = fp->fInputIdx; 5541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveStart = fLookStart; // Set the match region change for 5542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = fLookLimit; // transparent bounds. 5543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 554550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LA_END: 5547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Leaving a look-ahead block. 5549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // restore Stack Ptr, Input Pos to positions they had on entry to block. 5550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 5551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t stackSize = fStack->size(); 555250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newStackSize = (int32_t)fData[opValue]; 5553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(stackSize >= newStackSize); 5554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (stackSize > newStackSize) { 5555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Copy the current top frame back to the new (cut back) top frame. 5556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This makes the capture groups from within the look-ahead 5557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // expression available. 555850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; 5559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 5560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (i=0; i<fFrameSize; i++) { 556150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho newFP[i] = ((int64_t *)fp)[i]; 5562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp = (REStackFrame *)newFP; 5564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStack->setSize(newStackSize); 5565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fInputIdx = fData[opValue+1]; 556750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Restore the active region bounds in the input string; they may have 5569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // been changed because of transparent bounds on a Region. 5570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveStart = fRegionStart; 5571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = fRegionLimit; 5572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 557450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_ONECHAR_I: 5576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx < fActiveLimit) { 557750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 5578c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 5579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) { 5580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 5583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 558550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 558650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 558750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) { 558850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize); 558950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) { 559050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = FALSE; 559150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t reverseIndex = fp->fInputIdx; 559250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 559350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (reverseIndex > backSearchIndex) { 559450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, backSearchIndex, reverseIndex, c); 559550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) { 559650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 559750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 559850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (c == U_SENTINEL) { 559950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 560050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 560150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 560250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 560350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = FALSE; 560450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 560550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = reverseIndex; 560650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex) { 560750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 560850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 560950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // Skip the LOOP_C, we just did that 561050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 561150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 561250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 561350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 561450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 561550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 561850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_STRING_I: 5620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test input against a literal string. 5622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Strings require two slots in the compiled pattern, one for the 5623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // offset to the string text, and one for the length. 562427f654740f2a26ad62a5c155af9199af9e69b889claireho const UCaseProps *csp = ucase_getSingleton(); 562527f654740f2a26ad62a5c155af9199af9e69b889claireho { 562650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t stringStartIdx, stringLen; 562750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho stringStartIdx = opValue; 5628b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 562950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho op = (int32_t)pat[fp->fPatIdx]; 563050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; 563150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho opType = URX_TYPE(op); 563250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho opValue = URX_VAL(op); 563350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(opType == URX_STRING_LEN); 563450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho stringLen = opValue; 5635b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 563650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *patternChars = litText+stringStartIdx; 563750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *patternEnd = patternChars+stringLen; 5638b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 563950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *foldChars = NULL; 564050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t foldOffset, foldLength; 564150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 56420fa67b93b831c6636ca18b152a1b1b14cc99b034claireho UBool c_is_valid = FALSE; 5643b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 564450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 564550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t originalInputIdx = fp->fInputIdx; 564650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 564750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool success = TRUE; 5648b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 564950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho foldOffset = foldLength = 0; 565050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 565150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while (patternChars < patternEnd && success) { 56520fa67b93b831c6636ca18b152a1b1b14cc99b034claireho if (fp->fInputIdx < fActiveLimit) { // don't read past end of string 56530fa67b93b831c6636ca18b152a1b1b14cc99b034claireho if(foldOffset < foldLength) { 56540fa67b93b831c6636ca18b152a1b1b14cc99b034claireho U16_NEXT_UNSAFE(foldChars, foldOffset, c); 56550fa67b93b831c6636ca18b152a1b1b14cc99b034claireho c_is_valid = TRUE; 56560fa67b93b831c6636ca18b152a1b1b14cc99b034claireho } else { 56570fa67b93b831c6636ca18b152a1b1b14cc99b034claireho // test pre-condition of U16_NEXT: i < length 56580fa67b93b831c6636ca18b152a1b1b14cc99b034claireho U_ASSERT(fp->fInputIdx < fActiveLimit); 56590fa67b93b831c6636ca18b152a1b1b14cc99b034claireho U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c); 56600fa67b93b831c6636ca18b152a1b1b14cc99b034claireho c_is_valid = TRUE; 56610fa67b93b831c6636ca18b152a1b1b14cc99b034claireho foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT); 56620fa67b93b831c6636ca18b152a1b1b14cc99b034claireho if(foldLength >= 0) { 56630fa67b93b831c6636ca18b152a1b1b14cc99b034claireho if(foldLength <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle chars that fold to 0-length strings 56640fa67b93b831c6636ca18b152a1b1b14cc99b034claireho foldOffset = 0; 56650fa67b93b831c6636ca18b152a1b1b14cc99b034claireho U16_NEXT_UNSAFE(foldChars, foldOffset, c); 56660fa67b93b831c6636ca18b152a1b1b14cc99b034claireho } else { 56670fa67b93b831c6636ca18b152a1b1b14cc99b034claireho c = foldLength; 56680fa67b93b831c6636ca18b152a1b1b14cc99b034claireho foldLength = foldOffset; // to avoid reading chars from the folding buffer 56690fa67b93b831c6636ca18b152a1b1b14cc99b034claireho } 567050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 567150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 56720fa67b93b831c6636ca18b152a1b1b14cc99b034claireho } else { 5673b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho c_is_valid = FALSE; 567450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 5675b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 56760fa67b93b831c6636ca18b152a1b1b14cc99b034claireho if (fp->fInputIdx <= fActiveLimit && c_is_valid) { 567750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_IS_BMP(c)) { 567850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = (*patternChars == c); 567950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternChars += 1; 568050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if (patternChars+1 < patternEnd) { 568150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = (*patternChars == U16_LEAD(c) && *(patternChars+1) == U16_TRAIL(c)); 568250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternChars += 2; 568350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 568450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 568550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = FALSE; 568650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fHitEnd = TRUE; // TODO: See ticket 6074 568750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 568850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 5689b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 569050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (!success) { 569150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 569250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex && fStack->size()) { 569350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize); 569450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) { 569550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Reset to last start point 569650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t reverseIndex = originalInputIdx; 569750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho patternChars = litText+stringStartIdx; 5698b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 569950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Search backwards for a possible start 570050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 570150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, backSearchIndex, reverseIndex, c); 570250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT); 570350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(foldLength >= 0) { 570450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(foldLength <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle chars that fold to 0-length strings 570550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho foldOffset = 0; 570650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_NEXT_UNSAFE(foldChars, foldOffset, c); 570750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 570850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho c = foldLength; 570950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho foldLength = foldOffset; // to avoid reading chars from the folding buffer 571050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 571150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 5712b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 571350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if ((U_IS_BMP(c) && *patternChars == c) || 571450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*patternChars == U16_LEAD(c) && *(patternChars+1) == U16_TRAIL(c))) { 571550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho success = TRUE; 571650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 571750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 571850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while (reverseIndex > backSearchIndex); 5719b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 572050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // And try again 572150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (success) { 572250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 572350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx = reverseIndex; 572450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (fp->fInputIdx > backSearchIndex) { 572550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = StateSave(fp, fp->fPatIdx, status); 572650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 572750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fPatIdx++; // Skip the LOOP_C, we just did that 572850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 572950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 573050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 573150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 573250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 573350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 5736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 573850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_START: 5740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Entering a look-behind block. 5742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save Stack Ptr, Input Pos. 5743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // TODO: implement transparent bounds. Ticket #6067 5744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 5745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData[opValue] = fStack->size(); 5746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData[opValue+1] = fp->fInputIdx; 5747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Init the variable containing the start index for attempted matches. 5748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fData[opValue+2] = -1; 5749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save input string length, then reset to pin any matches to end at 5750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the current position. 5751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fData[opValue+3] = fActiveLimit; 5752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = fp->fInputIdx; 5753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 575550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 575650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_CONT: 5758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Positive Look-Behind, at top of loop checking for matches of LB expression 5760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // at all possible input starting positions. 576150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Fetch the min and max possible match lengths. They are the operands 5763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // of this op in the pattern. 576450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minML = (int32_t)pat[fp->fPatIdx++]; 576550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxML = (int32_t)pat[fp->fPatIdx++]; 5766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(minML <= maxML); 5767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(minML >= 0); 576850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Fetch (from data) the last input index where a match was attempted. 5770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 577150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *lbStartIdx = &fData[opValue+2]; 5772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*lbStartIdx < 0) { 5773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // First time through loop. 5774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *lbStartIdx = fp->fInputIdx - minML; 5775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 2nd through nth time through the loop. 5777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Back up start position for match by one. 5778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*lbStartIdx == 0) { 577950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (*lbStartIdx)--; 5780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_BACK_1(inputBuf, 0, *lbStartIdx); 5782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 578450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) { 5786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We have tried all potential match starting points without 5787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // getting a match. Backtrack out, and out of the 5788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Look Behind altogether. 5789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 579050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t restoreInputLen = fData[opValue+3]; 5791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(restoreInputLen >= fActiveLimit); 579250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(restoreInputLen <= fInputLength); 5793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = restoreInputLen; 5794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 579650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save state to this URX_LB_CONT op, so failure to match will repeat the loop. 5798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (successful match will fall off the end of the loop.) 5799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx-3, status); 5800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fInputIdx = *lbStartIdx; 5801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 580350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LB_END: 5805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // End of a look-behind block, after a successful match. 5806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 5808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx != fActiveLimit) { 5809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The look-behind expression matched, but the match did not 5810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // extend all the way to the point that we are looking behind from. 5811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // FAIL out of here, which will take us back to the LB_CONT, which 5812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // will retry the match starting at another position or fail 5813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the look-behind altogether, whichever is appropriate. 5814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 581750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Look-behind match is good. Restore the orignal input string length, 5819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // which had been truncated to pin the end of the lookbehind match to the 5820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // position being looked-behind. 582150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t originalInputLen = fData[opValue+3]; 5822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(originalInputLen >= fActiveLimit); 582350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(originalInputLen <= fInputLength); 5824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = originalInputLen; 5825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 582750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 582850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LBN_CONT: 5830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Negative Look-Behind, at top of loop checking for matches of LB expression 5832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // at all possible input starting positions. 583350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Fetch the extra parameters of this op. 583550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t minML = (int32_t)pat[fp->fPatIdx++]; 583650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t maxML = (int32_t)pat[fp->fPatIdx++]; 583750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t continueLoc = (int32_t)pat[fp->fPatIdx++]; 583850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continueLoc = URX_VAL(continueLoc); 5839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(minML <= maxML); 5840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(minML >= 0); 5841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(continueLoc > fp->fPatIdx); 584250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Fetch (from data) the last input index where a match was attempted. 5844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 584550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t *lbStartIdx = &fData[opValue+2]; 5846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*lbStartIdx < 0) { 5847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // First time through loop. 5848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *lbStartIdx = fp->fInputIdx - minML; 5849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 2nd through nth time through the loop. 5851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Back up start position for match by one. 5852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*lbStartIdx == 0) { 5853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*lbStartIdx)--; // Because U16_BACK is unsafe starting at 0. 5854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_BACK_1(inputBuf, 0, *lbStartIdx); 5856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 585850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) { 5860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We have tried all potential match starting points without 5861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // getting a match, which means that the negative lookbehind as 5862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a whole has succeeded. Jump forward to the continue location 586350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t restoreInputLen = fData[opValue+3]; 5864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(restoreInputLen >= fActiveLimit); 586550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(restoreInputLen <= fInputLength); 5866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = restoreInputLen; 5867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx = continueLoc; 5868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 587050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save state to this URX_LB_CONT op, so failure to match will repeat the loop. 5872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (successful match will cause a FAIL out of the loop altogether.) 5873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx-4, status); 5874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fInputIdx = *lbStartIdx; 5875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 587750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LBN_END: 5879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // End of a negative look-behind block, after a successful match. 5880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 5882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (fp->fInputIdx != fActiveLimit) { 5883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The look-behind expression matched, but the match did not 5884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // extend all the way to the point that we are looking behind from. 5885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // FAIL out of here, which will take us back to the LB_CONT, which 5886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // will retry the match starting at another position or succeed 5887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the look-behind altogether, whichever is appropriate. 5888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 589150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Look-behind expression matched, which means look-behind test as 5893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a whole Fails 5894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Restore the orignal input string length, which had been truncated 5896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // inorder to pin the end of the lookbehind match 5897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to the position being looked-behind. 589850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int64_t originalInputLen = fData[opValue+3]; 5899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(originalInputLen >= fActiveLimit); 590050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(originalInputLen <= fInputLength); 5901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fActiveLimit = originalInputLen; 590250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Restore original stack position, discarding any state saved 5904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // by the successful pattern match. 5905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize); 590650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t newStackSize = (int32_t)fData[opValue]; 5907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(fStack->size() > newStackSize); 5908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fStack->setSize(newStackSize); 5909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // FAIL, which will take control back to someplace 5911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // prior to entering the look-behind test. 5912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = (REStackFrame *)fStack->popFrame(fFrameSize); 5913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 591550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 591650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_SR_I: 5918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Loop Initialization for the optimized implementation of 5919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [some character set]* 5920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This op scans through all matching input. 5921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The following LOOP_C op emulates stack unwinding if the following pattern fails. 5922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(opValue > 0 && opValue < sets->size()); 5924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Regex8BitSet *s8 = &fPattern->fSets8[opValue]; 5925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); 592650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Loop through input, until either the input is exhausted or 5928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we reach a character that is not a member of the set. 592950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t ix = (int32_t)fp->fInputIdx; 5930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 5931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ix >= fActiveLimit) { 5932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 5936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, ix, fActiveLimit, c); 5937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c<256) { 5938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s8->contains(c) == FALSE) { 5939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_BACK_1(inputBuf, 0, ix); 5940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s->contains(c) == FALSE) { 5944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_BACK_1(inputBuf, 0, ix); 5945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 594950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If there were no matching characters, skip over the loop altogether. 5951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The loop doesn't run at all, a * op always succeeds. 5952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ix == fp->fInputIdx) { 5953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx++; // skip the URX_LOOP_C op. 5954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 595650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Peek ahead in the compiled pattern, to the URX_LOOP_C that 5958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // must follow. It's operand is the stack location 5959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // that holds the starting input index for the match of this [set]* 596050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t loopcOp = (int32_t)pat[fp->fPatIdx]; 5961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C); 5962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t stackLoc = URX_VAL(loopcOp); 5963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize); 5964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[stackLoc] = fp->fInputIdx; 596550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 596650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho backSearchIndex = fp->fInputIdx; 596750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 5968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fInputIdx = ix; 596950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save State to the URX_LOOP_C op that follows this one, 5971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // so that match failures in the following code will return to there. 5972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Then bump the pattern idx so the LOOP_C is skipped on the way out of here. 5973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx, status); 5974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx++; 5975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 597750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 597850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 5979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_DOT_I: 5980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Loop Initialization for the optimized implementation of .* 5981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This op scans through all remaining input. 5982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The following LOOP_C op emulates stack unwinding if the following pattern fails. 5983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 5984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Loop through input until the input is exhausted (we reach an end-of-line) 5985c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // In DOTALL mode, we can just go straight to the end of the input. 5986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t ix; 5987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((opValue & 1) == 1) { 5988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Dot-matches-All mode. Jump straight to the end of the string. 598950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ix = (int32_t)fActiveLimit; 5990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 5992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // NOT DOT ALL mode. Line endings do not match '.' 5993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Scan forward until a line ending or end of input. 599450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ix = (int32_t)fp->fInputIdx; 5995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 5996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (ix >= fActiveLimit) { 5997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fHitEnd = TRUE; 5998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 5999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 6001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_NEXT(inputBuf, ix, fActiveLimit, c); // c = inputBuf[ix++] 600227f654740f2a26ad62a5c155af9199af9e69b889claireho if ((c & 0x7f) <= 0x29) { // Fast filter of non-new-line-s 600327f654740f2a26ad62a5c155af9199af9e69b889claireho if ((c == 0x0a) || // 0x0a is newline in both modes. 600427f654740f2a26ad62a5c155af9199af9e69b889claireho (((opValue & 2) == 0) && // IF not UNIX_LINES mode 600527f654740f2a26ad62a5c155af9199af9e69b889claireho ((c<=0x0d && c>=0x0a) || c==0x85 || c==0x2028 || c==0x2029))) { 6006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // char is a line ending. Put the input pos back to the 6007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // line ending char, and exit the scanning loop. 6008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U16_BACK_1(inputBuf, 0, ix); 6009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru break; 6010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 6011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 601450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If there were no matching characters, skip over the loop altogether. 6016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The loop doesn't run at all, a * op always succeeds. 6017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ix == fp->fInputIdx) { 6018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx++; // skip the URX_LOOP_C op. 6019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 6020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 602150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Peek ahead in the compiled pattern, to the URX_LOOP_C that 6023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // must follow. It's operand is the stack location 6024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // that holds the starting input index for the match of this .* 602550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t loopcOp = (int32_t)pat[fp->fPatIdx]; 6026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C); 6027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t stackLoc = URX_VAL(loopcOp); 6028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize); 6029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fExtra[stackLoc] = fp->fInputIdx; 603050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #ifdef REGEX_SMART_BACKTRACKING 603150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho backSearchIndex = fp->fInputIdx; 603250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho #endif 6033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fInputIdx = ix; 603450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save State to the URX_LOOP_C op that follows this one, 6036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // so that match failures in the following code will return to there. 6037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Then bump the pattern idx so the LOOP_C is skipped on the way out of here. 6038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx, status); 6039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fp->fPatIdx++; 6040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 604250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 604350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case URX_LOOP_C: 6045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 6046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru U_ASSERT(opValue>=0 && opValue<fFrameSize); 604750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho backSearchIndex = (int32_t)fp->fExtra[opValue]; 604850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_ASSERT(backSearchIndex <= fp->fInputIdx); 604950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (backSearchIndex == fp->fInputIdx) { 6050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We've backed up the input idx to the point that the loop started. 6051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The loop is done. Leave here without saving state. 6052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Subsequent failures won't come back here. 6053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 6054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Set up for the next iteration of the loop, with input index 6056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // backed up by one from the last time through, 6057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and a state save to this instruction in case the following code fails again. 6058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (We're going backwards because this loop emulates stack unwinding, not 6059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the initial scan forward.) 6060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(fp->fInputIdx > 0); 606150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 prevC; 606250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_PREV(inputBuf, 0, fp->fInputIdx, prevC); // !!!: should this 0 be one of f*Limit? 606350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 606450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (prevC == 0x0a && 606550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fp->fInputIdx > backSearchIndex && 6066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru inputBuf[fp->fInputIdx-1] == 0x0d) { 606750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t prevOp = (int32_t)pat[fp->fPatIdx-2]; 6068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (URX_TYPE(prevOp) == URX_LOOP_DOT_I) { 6069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // .*, stepping back over CRLF pair. 607050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_BACK_1(inputBuf, 0, fp->fInputIdx); 6071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 607350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 607450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fp = StateSave(fp, fp->fPatIdx-1, status); 6076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 607850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 607950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 608050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 6082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Trouble. The compiled pattern contains an entry with an 6083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // unrecognized type tag. 6084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(FALSE); 6085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 608650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 6088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru isMatch = FALSE; 6089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 6090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerubreakFromLoop: 6094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatch = isMatch; 6095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (isMatch) { 6096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fLastMatchEnd = fMatchEnd; 6097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatchStart = startIdx; 6098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fMatchEnd = fp->fInputIdx; 6099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fTraceDebug) { 6100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_RUN_DEBUG_PRINTF(("Match. start=%d end=%d\n\n", fMatchStart, fMatchEnd)); 6101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 6104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 6105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fTraceDebug) { 6106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru REGEX_RUN_DEBUG_PRINTF(("No match\n\n")); 6107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 6108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 610950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 6110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fFrame = fp; // The active stack frame when the engine stopped. 611150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Contains the capture group results that we need to 611250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // access later. 6113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 6115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 6116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexMatcher) 6119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 6121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 6123