1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**************************************************************************
3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Copyright (C) 2002-2011 International Business Machines Corporation  *
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   and others. All rights reserved.                                     *
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**************************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
8c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//  file:  rematch.cpp
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//         Contains the implementation of class RegexMatcher,
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//         which is one of the main API classes for the ICU regular expression package.
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h"
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h"
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h"
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h"
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/rbbi.h"
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h"
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h"
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvectr32.h"
2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uvectr64.h"
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regeximp.h"
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regexst.h"
2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "regextxt.h"
3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "ucase.h"
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// #include <malloc.h>        // Needed for heapcheck testing
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3527f654740f2a26ad62a5c155af9199af9e69b889claireho// Find progress callback
3627f654740f2a26ad62a5c155af9199af9e69b889claireho// ----------------------
3727f654740f2a26ad62a5c155af9199af9e69b889claireho// Macro to inline test & call to ReportFindProgress().  Eliminates unnecessary function call.
3827f654740f2a26ad62a5c155af9199af9e69b889claireho//
3927f654740f2a26ad62a5c155af9199af9e69b889claireho#define REGEXFINDPROGRESS_INTERRUPT(pos, status)     \
4027f654740f2a26ad62a5c155af9199af9e69b889claireho    (fFindProgressCallbackFn != NULL) && (ReportFindProgress(pos, status) == FALSE)
4127f654740f2a26ad62a5c155af9199af9e69b889claireho
4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Smart Backtracking
4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// ------------------
4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// When a failure would go back to a LOOP_C instruction,
4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// strings, characters, and setrefs scan backwards for a valid start
4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// character themselves, pop the stack, and save state, emulating the
4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// LOOP_C's effect but assured that the next character of input is a
4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// possible matching character.
5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Good idea in theory; unfortunately it only helps out a few specific
5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// cases and slows the engine down a little in the rest.
5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//#define REGEX_SMART_BACKTRACKING 1
5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
58c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Default limit for the size of the back track stack, to avoid system
59c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//    failures causedby heap exhaustion.  Units are in 32 bit words, not bytes.
60c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// This value puts ICU's limits higher than most other regexp implementations,
61c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//    which use recursion rather than the heap, and take more storage per
62c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//    backtrack point.
63c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
64c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t DEFAULT_BACKTRACK_STACK_CAPACITY = 8000000;
65c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
66c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Time limit counter constant.
67c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//   Time limits for expression evaluation are in terms of quanta of work by
68c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//   the engine, each of which is 10,000 state saves.
69c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//   This constant determines that state saves per tick number.
70c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t TIMER_INITIAL_VALUE = 10000;
71c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-----------------------------------------------------------------------------
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   Constructor and Destructor
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-----------------------------------------------------------------------------
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher::RegexMatcher(const RegexPattern *pat)  {
78c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fDeferredStatus = U_ZERO_ERROR;
79c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    init(fDeferredStatus);
80c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
81c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
82c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pat==NULL) {
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fDeferredStatus = U_ILLEGAL_ARGUMENT_ERROR;
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
87c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fPattern = pat;
8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init2(RegexStaticSets::gStaticSets->fEmptyText, fDeferredStatus);
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher::RegexMatcher(const UnicodeString &regexp, const UnicodeString &input,
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                           uint32_t flags, UErrorCode &status) {
95c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    init(status);
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
99c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UParseError    pe;
100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fPatternOwned      = RegexPattern::compile(regexp, flags, pe, status);
101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fPattern           = fPatternOwned;
10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText inputText = UTEXT_INITIALIZER;
10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openConstUnicodeString(&inputText, &input, &status);
10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init2(&inputText, status);
10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&inputText);
10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInputUniStrMaybeMutable = TRUE;
10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
11250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher::RegexMatcher(UText *regexp, UText *input,
11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                           uint32_t flags, UErrorCode &status) {
11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init(status);
11550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
11650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
11750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError    pe;
11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fPatternOwned      = RegexPattern::compile(regexp, flags, pe, status);
12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fPattern           = fPatternOwned;
125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    init2(input, status);
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher::RegexMatcher(const UnicodeString &regexp,
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                           uint32_t flags, UErrorCode &status) {
131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    init(status);
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UParseError    pe;
136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fPatternOwned      = RegexPattern::compile(regexp, flags, pe, status);
13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fPattern           = fPatternOwned;
14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init2(RegexStaticSets::gStaticSets->fEmptyText, status);
14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
14450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher::RegexMatcher(UText *regexp,
14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                           uint32_t flags, UErrorCode &status) {
14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init(status);
14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError    pe;
15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fPatternOwned      = RegexPattern::compile(regexp, flags, pe, status);
15250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(status)) {
15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fPattern           = fPatternOwned;
15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init2(RegexStaticSets::gStaticSets->fEmptyText, status);
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
162c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher::~RegexMatcher() {
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete fStack;
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fData != fSmallData) {
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(fData);
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fData = NULL;
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fPatternOwned) {
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete fPatternOwned;
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fPatternOwned = NULL;
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fPattern = NULL;
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInput) {
17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete fInput;
17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInputText) {
17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(fInputText);
18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fAltInputText) {
18250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(fAltInputText);
18350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    #if UCONFIG_NO_BREAK_ITERATION==0
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete fWordBreakItr;
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    #endif
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//   init()   common initialization for use by all constructors.
192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//            Initialize all fields, get the object into a consistent state.
193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//            This must be done even when the initial status shows an error,
194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//            so that the object is initialized sufficiently well for the destructor
195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//            to run safely.
196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid RegexMatcher::init(UErrorCode &status) {
198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fPattern           = NULL;
199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fPatternOwned      = NULL;
200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFrameSize         = 0;
201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fRegionStart       = 0;
202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fRegionLimit       = 0;
203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fAnchorStart       = 0;
204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fAnchorLimit       = 0;
205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fLookStart         = 0;
206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fLookLimit         = 0;
207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fActiveStart       = 0;
208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fActiveLimit       = 0;
209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fTransparentBounds = FALSE;
210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fAnchoringBounds   = TRUE;
211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fMatch             = FALSE;
212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fMatchStart        = 0;
213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fMatchEnd          = 0;
214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fLastMatchEnd      = -1;
215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fAppendPosition    = 0;
216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fHitEnd            = FALSE;
217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fRequireEnd        = FALSE;
218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fStack             = NULL;
219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFrame             = NULL;
220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fTimeLimit         = 0;
221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fTime              = 0;
222c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fTickCounter       = 0;
223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fStackLimit        = DEFAULT_BACKTRACK_STACK_CAPACITY;
224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fCallbackFn        = NULL;
225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fCallbackContext   = NULL;
22627f654740f2a26ad62a5c155af9199af9e69b889claireho    fFindProgressCallbackFn      = NULL;
22727f654740f2a26ad62a5c155af9199af9e69b889claireho    fFindProgressCallbackContext = NULL;
228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fTraceDebug        = FALSE;
229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fDeferredStatus    = status;
230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fData              = fSmallData;
231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fWordBreakItr      = NULL;
232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fStack             = new UVector64(status);
23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInputText         = NULL;
23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fAltInputText      = NULL;
23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInput             = NULL;
23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInputLength       = 0;
23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInputUniStrMaybeMutable = FALSE;
23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        fDeferredStatus = status;
242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
244c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
245c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//  init2()   Common initialization for use by RegexMatcher constructors, part 2.
247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//            This handles the common setup to be done after the Pattern is available.
248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
24950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::init2(UText *input, UErrorCode &status) {
250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        fDeferredStatus = status;
252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(fSmallData[0]))) {
25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fData = (int64_t *)uprv_malloc(fPattern->fDataSize * sizeof(int64_t));
257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (fData == NULL) {
258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            status = fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return;
260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    reset(input);
264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    setStackLimit(DEFAULT_BACKTRACK_STACK_CAPACITY, status);
265c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
266c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        fDeferredStatus = status;
267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar BACKSLASH  = 0x5c;
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar DOLLARSIGN = 0x24;
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    appendReplacement
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher &RegexMatcher::appendReplacement(UnicodeString &dest,
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                              const UnicodeString &replacement,
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                              UErrorCode &status) {
28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText replacementText = UTEXT_INITIALIZER;
28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openConstUnicodeString(&replacementText, &replacement, &status);
28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_SUCCESS(status)) {
28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText resultText = UTEXT_INITIALIZER;
28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUnicodeString(&resultText, &dest, &status);
28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_SUCCESS(status)) {
29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            appendReplacement(&resultText, &replacementText, status);
29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_close(&resultText);
29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&replacementText);
29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    appendReplacement, UText mode
30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
30250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::appendReplacement(UText *dest,
30350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                              UText *replacement,
30450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                              UErrorCode &status) {
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = fDeferredStatus;
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fMatch == FALSE) {
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_REGEX_INVALID_STATE;
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Copy input string from the end of previous match to start of current match
31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t  destLen = utext_nativeLength(dest);
31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fMatchStart > fAppendPosition) {
32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
32127f654740f2a26ad62a5c155af9199af9e69b889claireho            destLen += utext_replace(dest, destLen, destLen, fInputText->chunkContents+fAppendPosition,
32227f654740f2a26ad62a5c155af9199af9e69b889claireho                                     (int32_t)(fMatchStart-fAppendPosition), &status);
32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t len16;
32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTEXT_USES_U16(fInputText)) {
32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                len16 = (int32_t)(fMatchStart-fAppendPosition);
32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UErrorCode lengthStatus = U_ZERO_ERROR;
32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                len16 = utext_extract(fInputText, fAppendPosition, fMatchStart, NULL, 0, &lengthStatus);
33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1));
33227f654740f2a26ad62a5c155af9199af9e69b889claireho            if (inputChars == NULL) {
33327f654740f2a26ad62a5c155af9199af9e69b889claireho                status = U_MEMORY_ALLOCATION_ERROR;
33427f654740f2a26ad62a5c155af9199af9e69b889claireho                return *this;
33527f654740f2a26ad62a5c155af9199af9e69b889claireho            }
33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_extract(fInputText, fAppendPosition, fMatchStart, inputChars, len16+1, &status);
33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            destLen += utext_replace(dest, destLen, destLen, inputChars, len16, &status);
33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uprv_free(inputChars);
33950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fAppendPosition = fMatchEnd;
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // scan the replacement text, looking for substitutions ($n) and \escapes.
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  TODO:  optimize this loop by efficiently scanning for '$' or '\',
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //         move entire ranges not containing substitutions.
34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UTEXT_SETNATIVEINDEX(replacement, 0);
34850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 c = UTEXT_NEXT32(replacement);
34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while (c != U_SENTINEL) {
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (c == BACKSLASH) {
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Backslash Escape.  Copy the following char out without further checks.
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                    Note:  Surrogate pairs don't need any special handling
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                           The second half wont be a '$' or a '\', and
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                           will move to the dest normally on the next
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                           loop iteration.
35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c = UTEXT_CURRENT32(replacement);
35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (c == U_SENTINEL) {
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (c==0x55/*U*/ || c==0x75/*u*/) {
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // We have a \udddd or \Udddddddd escape sequence.
36350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t offset = 0;
36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                struct URegexUTextUnescapeCharContext context = U_REGEX_UTEXT_UNESCAPE_CONTEXT(replacement);
36550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 escapedChar = u_unescapeAt(uregex_utext_unescape_charAt, &offset, INT32_MAX, &context);
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (escapedChar != (UChar32)0xFFFFFFFF) {
36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (U_IS_BMP(escapedChar)) {
36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UChar c16 = (UChar)escapedChar;
36950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status);
37050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
37150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UChar surrogate[2];
37250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        surrogate[0] = U16_LEAD(escapedChar);
37350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        surrogate[1] = U16_TRAIL(escapedChar);
37450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (U_SUCCESS(status)) {
37550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status);
37650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
37750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // TODO:  Report errors for mal-formed \u escapes?
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //        As this is, the original sequence is output, which may be OK.
38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (context.lastOffset == offset) {
381b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        (void)UTEXT_PREVIOUS32(replacement);
38250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else if (context.lastOffset != offset-1) {
38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        utext_moveIndex32(replacement, offset - context.lastOffset - 1);
38450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
38550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
38650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
387b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                (void)UTEXT_NEXT32(replacement);
38850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Plain backslash escape.  Just put out the escaped character.
38950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (U_IS_BMP(c)) {
39050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar c16 = (UChar)c;
39150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status);
39250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
39350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar surrogate[2];
39450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    surrogate[0] = U16_LEAD(c);
39550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    surrogate[1] = U16_TRAIL(c);
39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (U_SUCCESS(status)) {
39750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status);
39850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
40150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if (c != DOLLARSIGN) {
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Normal char, not a $.  Copy it out without further checks.
40350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (U_IS_BMP(c)) {
40450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar c16 = (UChar)c;
40550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status);
40650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar surrogate[2];
40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                surrogate[0] = U16_LEAD(c);
40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                surrogate[1] = U16_TRAIL(c);
41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (U_SUCCESS(status)) {
41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status);
41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // We've got a $.  Pick up a capture group number if one follows.
41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Consume at most the number of digits necessary for the largest capture
41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // number that is valid for this pattern.
41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
41950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t numDigits = 0;
42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t groupNum  = 0;
42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar32 digitC;
42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            for (;;) {
42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                digitC = UTEXT_CURRENT32(replacement);
42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (digitC == U_SENTINEL) {
42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (u_isdigit(digitC) == FALSE) {
42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
430b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                (void)UTEXT_NEXT32(replacement);
43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                groupNum=groupNum*10 + u_charDigitValue(digitC);
43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                numDigits++;
43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (numDigits >= fPattern->fMaxCaptureDigits) {
43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (numDigits == 0) {
44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // The $ didn't introduce a group number at all.
44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Treat it as just part of the substitution text.
44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar c16 = DOLLARSIGN;
44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status);
44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Finally, append the capture group data to the destination.
44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                destLen += appendGroup(groupNum, dest, status);
44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (U_FAILURE(status)) {
44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Can fail if group number is out of range.
44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(status)) {
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
45650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c = UTEXT_NEXT32(replacement);
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    appendTail     Intended to be used in conjunction with appendReplacement()
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                   To the destination string, append everything following
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                   the last match position from the input string.
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
472c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//                   Note:  Match ranges do not affect appendTail or appendReplacement
473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString &RegexMatcher::appendTail(UnicodeString &dest) {
47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode status = U_ZERO_ERROR;
47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText resultText = UTEXT_INITIALIZER;
47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUnicodeString(&resultText, &dest, &status);
47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_SUCCESS(status)) {
48127f654740f2a26ad62a5c155af9199af9e69b889claireho        appendTail(&resultText, status);
48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&resultText);
48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return dest;
48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
48750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
48850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   appendTail, UText mode
49050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
49127f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::appendTail(UText *dest, UErrorCode &status) {
49227f654740f2a26ad62a5c155af9199af9e69b889claireho    UBool bailOut = FALSE;
49327f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
49427f654740f2a26ad62a5c155af9199af9e69b889claireho        bailOut = TRUE;
49527f654740f2a26ad62a5c155af9199af9e69b889claireho    }
49627f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(fDeferredStatus)) {
49727f654740f2a26ad62a5c155af9199af9e69b889claireho        status = fDeferredStatus;
49827f654740f2a26ad62a5c155af9199af9e69b889claireho        bailOut = TRUE;
49927f654740f2a26ad62a5c155af9199af9e69b889claireho    }
50027f654740f2a26ad62a5c155af9199af9e69b889claireho
50127f654740f2a26ad62a5c155af9199af9e69b889claireho    if (bailOut) {
50227f654740f2a26ad62a5c155af9199af9e69b889claireho        //  dest must not be NULL
50327f654740f2a26ad62a5c155af9199af9e69b889claireho        if (dest) {
50427f654740f2a26ad62a5c155af9199af9e69b889claireho            utext_replace(dest, utext_nativeLength(dest), utext_nativeLength(dest), NULL, 0, &status);
50527f654740f2a26ad62a5c155af9199af9e69b889claireho            return dest;
50627f654740f2a26ad62a5c155af9199af9e69b889claireho        }
50727f654740f2a26ad62a5c155af9199af9e69b889claireho    }
50827f654740f2a26ad62a5c155af9199af9e69b889claireho
50950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInputLength > fAppendPosition) {
51050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
51150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int64_t destLen = utext_nativeLength(dest);
51227f654740f2a26ad62a5c155af9199af9e69b889claireho            utext_replace(dest, destLen, destLen, fInputText->chunkContents+fAppendPosition,
51327f654740f2a26ad62a5c155af9199af9e69b889claireho                          (int32_t)(fInputLength-fAppendPosition), &status);
51450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
51550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t len16;
51650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTEXT_USES_U16(fInputText)) {
51750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                len16 = (int32_t)(fInputLength-fAppendPosition);
51850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
51950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                len16 = utext_extract(fInputText, fAppendPosition, fInputLength, NULL, 0, &status);
52050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                status = U_ZERO_ERROR; // buffer overflow
52150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
52250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
52350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16));
52427f654740f2a26ad62a5c155af9199af9e69b889claireho            if (inputChars == NULL) {
52527f654740f2a26ad62a5c155af9199af9e69b889claireho                fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
52627f654740f2a26ad62a5c155af9199af9e69b889claireho            } else {
52727f654740f2a26ad62a5c155af9199af9e69b889claireho                utext_extract(fInputText, fAppendPosition, fInputLength, inputChars, len16, &status); // unterminated
52827f654740f2a26ad62a5c155af9199af9e69b889claireho                int64_t destLen = utext_nativeLength(dest);
52927f654740f2a26ad62a5c155af9199af9e69b889claireho                utext_replace(dest, destLen, destLen, inputChars, len16, &status);
53027f654740f2a26ad62a5c155af9199af9e69b889claireho                uprv_free(inputChars);
53127f654740f2a26ad62a5c155af9199af9e69b889claireho            }
53250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return dest;
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   end
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t RegexMatcher::end(UErrorCode &err) const {
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return end(0, err);
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
54827f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::end64(UErrorCode &err) const {
54927f654740f2a26ad62a5c155af9199af9e69b889claireho    return end64(0, err);
55027f654740f2a26ad62a5c155af9199af9e69b889claireho}
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
55227f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::end64(int32_t group, UErrorCode &err) const {
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(err)) {
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -1;
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fMatch == FALSE) {
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        err = U_REGEX_INVALID_STATE;
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -1;
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (group < 0 || group > fPattern->fGroupMap->size()) {
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        err = U_INDEX_OUTOFBOUNDS_ERROR;
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -1;
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
56450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t e = -1;
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (group == 0) {
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        e = fMatchEnd;
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Get the position within the stack frame of the variables for
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //    this capture group.
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t groupOffset = fPattern->fGroupMap->elementAti(group-1);
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_ASSERT(groupOffset < fPattern->fFrameSize);
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_ASSERT(groupOffset >= 0);
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        e = fFrame->fExtra[groupOffset + 1];
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
57550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
57627f654740f2a26ad62a5c155af9199af9e69b889claireho        return e;
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
57927f654740f2a26ad62a5c155af9199af9e69b889clairehoint32_t RegexMatcher::end(int32_t group, UErrorCode &err) const {
58027f654740f2a26ad62a5c155af9199af9e69b889claireho    return (int32_t)end64(group, err);
58127f654740f2a26ad62a5c155af9199af9e69b889claireho}
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   find()
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool RegexMatcher::find() {
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Start at the position of the last match end.  (Will be zero if the
59150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   matcher has been reset.)
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
59650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
59750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
59850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return findUsingChunk();
59950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
60150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t startPos = fMatchEnd;
602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (startPos==0) {
603c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        startPos = fActiveStart;
604c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fMatch) {
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Save the position of any previous successful match.
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fLastMatchEnd = fMatchEnd;
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fMatchStart == fMatchEnd) {
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Previous match had zero length.  Move start position up one position
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //  to avoid sending find() into a loop on zero-length matches.
613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (startPos >= fActiveLimit) {
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fMatch = FALSE;
615c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fHitEnd = TRUE;
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return FALSE;
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
61850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTEXT_SETNATIVEINDEX(fInputText, startPos);
619b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            (void)UTEXT_NEXT32(fInputText);
62050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            startPos = UTEXT_GETNATIVEINDEX(fInputText);
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fLastMatchEnd >= 0) {
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // A previous find() failed to match.  Don't try again.
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   (without this test, a pattern with a zero-length match
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //    could match again at the end of an input string.)
627c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fHitEnd = TRUE;
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Compute the position in the input string beyond which a match can not begin, because
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   the minimum length match would extend past the end of the input.
635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //   Note:  some patterns that cannot match anything will have fMinMatchLength==Max Int.
636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //          Be aware of possible overflows if making changes here.
63750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t testStartLimit;
63850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_USES_U16(fInputText)) {
63950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        testStartLimit = fActiveLimit - fPattern->fMinMatchLen;
64050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (startPos > testStartLimit) {
64150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fMatch = FALSE;
64250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fHitEnd = TRUE;
64350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
64450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
64550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
64650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // For now, let the matcher discover that it can't match on its own
64750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // We don't know how long the match len is in native characters
64850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        testStartLimit = fActiveLimit;
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32  c;
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_ASSERT(startPos >= 0);
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch (fPattern->fStartType) {
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case START_NO_INFO:
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // No optimization was found.
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  Try a match at each input position.
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (;;) {
659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            MatchAt(startPos, FALSE, fDeferredStatus);
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(fDeferredStatus)) {
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return FALSE;
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (fMatch) {
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return TRUE;
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
66650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (startPos >= testStartLimit) {
667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fHitEnd = TRUE;
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return FALSE;
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
67050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTEXT_SETNATIVEINDEX(fInputText, startPos);
671b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            (void)UTEXT_NEXT32(fInputText);
67250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            startPos = UTEXT_GETNATIVEINDEX(fInputText);
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Note that it's perfectly OK for a pattern to have a zero-length
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   match at the end of a string, so we must make sure that the loop
67550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   runs with startPos == testStartLimit the last time through.
67627f654740f2a26ad62a5c155af9199af9e69b889claireho            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
67727f654740f2a26ad62a5c155af9199af9e69b889claireho                return FALSE;
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_ASSERT(FALSE);
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case START_START:
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Matches are only possible at the start of the input string
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //   (pattern begins with ^ or \A)
684c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (startPos > fActiveStart) {
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fMatch = FALSE;
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        MatchAt(startPos, FALSE, fDeferredStatus);
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(fDeferredStatus)) {
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return fMatch;
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case START_SET:
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Match may start on any char from a pre-computed set.
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(fPattern->fMinMatchLen > 0);
69950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int64_t pos;
70050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTEXT_SETNATIVEINDEX(fInputText, startPos);
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for (;;) {
70250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
70350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                pos = UTEXT_GETNATIVEINDEX(fInputText);
70450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // c will be -1 (U_SENTINEL) at end of text, in which case we
70550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // skip this next block (so we don't have a negative array index)
70650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // and handle end of text in the following block.
70727f654740f2a26ad62a5c155af9199af9e69b889claireho                if (c >= 0 && ((c<256 && fPattern->fInitialChars8->contains(c)) ||
70827f654740f2a26ad62a5c155af9199af9e69b889claireho                              (c>=256 && fPattern->fInitialChars->contains(c)))) {
70950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    MatchAt(startPos, FALSE, fDeferredStatus);
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (U_FAILURE(fDeferredStatus)) {
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return FALSE;
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (fMatch) {
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return TRUE;
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
71650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UTEXT_SETNATIVEINDEX(fInputText, pos);
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
71850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (startPos >= testStartLimit) {
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fMatch = FALSE;
720c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return FALSE;
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
72350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                startPos = pos;
72427f654740f2a26ad62a5c155af9199af9e69b889claireho	            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
72527f654740f2a26ad62a5c155af9199af9e69b889claireho                    return FALSE;
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_ASSERT(FALSE);
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case START_STRING:
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case START_CHAR:
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Match starts on exactly one char.
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(fPattern->fMinMatchLen > 0);
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 theChar = fPattern->fInitialChar;
73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int64_t pos;
73750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTEXT_SETNATIVEINDEX(fInputText, startPos);
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for (;;) {
73950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
74050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                pos = UTEXT_GETNATIVEINDEX(fInputText);
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (c == theChar) {
74250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    MatchAt(startPos, FALSE, fDeferredStatus);
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (U_FAILURE(fDeferredStatus)) {
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return FALSE;
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (fMatch) {
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return TRUE;
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
74950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UTEXT_SETNATIVEINDEX(fInputText, pos);
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
75150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (startPos >= testStartLimit) {
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fMatch = FALSE;
753c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return FALSE;
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
75650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                startPos = pos;
75727f654740f2a26ad62a5c155af9199af9e69b889claireho	            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
75827f654740f2a26ad62a5c155af9199af9e69b889claireho                    return FALSE;
75950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           }
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_ASSERT(FALSE);
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case START_LINE:
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32  c;
766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (startPos == fAnchorStart) {
767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                MatchAt(startPos, FALSE, fDeferredStatus);
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (U_FAILURE(fDeferredStatus)) {
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return FALSE;
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (fMatch) {
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return TRUE;
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
77450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, startPos);
77550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
77650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                startPos = UTEXT_GETNATIVEINDEX(fInputText);
77750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
77850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, startPos);
77950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_PREVIOUS32(fInputText);
78050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, startPos);
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
783c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fPattern->fFlags & UREGEX_UNIX_LINES) {
78450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                for (;;) {
785c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (c == 0x0a) {
786c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            MatchAt(startPos, FALSE, fDeferredStatus);
787c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (U_FAILURE(fDeferredStatus)) {
788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                return FALSE;
789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
790c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (fMatch) {
791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                return TRUE;
792c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
79350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            UTEXT_SETNATIVEINDEX(fInputText, startPos);
794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
79550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (startPos >= testStartLimit) {
796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fMatch = FALSE;
797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fHitEnd = TRUE;
798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return FALSE;
799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
80050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    c = UTEXT_NEXT32(fInputText);
80150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    startPos = UTEXT_GETNATIVEINDEX(fInputText);
802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Note that it's perfectly OK for a pattern to have a zero-length
803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //   match at the end of a string, so we must make sure that the loop
80450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   runs with startPos == testStartLimit the last time through.
80527f654740f2a26ad62a5c155af9199af9e69b889claireho		            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
80627f654740f2a26ad62a5c155af9199af9e69b889claireho                        return FALSE;
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
809c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for (;;) {
810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029 )) {
81250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (c == 0x0d && startPos < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) {
813b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                (void)UTEXT_NEXT32(fInputText);
81450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                startPos = UTEXT_GETNATIVEINDEX(fInputText);
815c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            MatchAt(startPos, FALSE, fDeferredStatus);
817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (U_FAILURE(fDeferredStatus)) {
818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                return FALSE;
819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (fMatch) {
821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                return TRUE;
822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
82350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            UTEXT_SETNATIVEINDEX(fInputText, startPos);
824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
82550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (startPos >= testStartLimit) {
826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fMatch = FALSE;
827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fHitEnd = TRUE;
828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return FALSE;
829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
83050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    c = UTEXT_NEXT32(fInputText);
83150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    startPos = UTEXT_GETNATIVEINDEX(fInputText);
832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Note that it's perfectly OK for a pattern to have a zero-length
833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //   match at the end of a string, so we must make sure that the loop
83450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   runs with startPos == testStartLimit the last time through.
83527f654740f2a26ad62a5c155af9199af9e69b889claireho		            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
83627f654740f2a26ad62a5c155af9199af9e69b889claireho                        return FALSE;
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_ASSERT(FALSE);
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_ASSERT(FALSE);
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return FALSE;
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
85127f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool RegexMatcher::find(int64_t start, UErrorCode &status) {
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = fDeferredStatus;
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    this->reset();                        // Note:  Reset() is specified by Java Matcher documentation.
860c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                          //        This will reset the region to be the full input length.
86150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (start < 0) {
86250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
86350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
86450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
86550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
86627f654740f2a26ad62a5c155af9199af9e69b889claireho    int64_t nativeStart = start;
86727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (nativeStart < fActiveStart || nativeStart > fActiveLimit) {
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_INDEX_OUTOFBOUNDS_ERROR;
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
87150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fMatchEnd = nativeStart;
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return find();
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
87850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   findUsingChunk() -- like find(), but with the advance knowledge that the
87950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                       entire string is available in the UText's chunk buffer.
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
88250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::findUsingChunk() {
88350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Start at the position of the last match end.  (Will be zero if the
88450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   matcher has been reset.
88550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
88750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t startPos = (int32_t)fMatchEnd;
88850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (startPos==0) {
88950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        startPos = (int32_t)fActiveStart;
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
89150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
89250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *inputBuf = fInputText->chunkContents;
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
89450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fMatch) {
89550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Save the position of any previous successful match.
89650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fLastMatchEnd = fMatchEnd;
89750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
89850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fMatchStart == fMatchEnd) {
89950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Previous match had zero length.  Move start position up one position
90050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  to avoid sending find() into a loop on zero-length matches.
90150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (startPos >= fActiveLimit) {
90250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fMatch = FALSE;
90350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
90450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
90550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
90650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_FWD_1(inputBuf, startPos, fInputLength);
90750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
90850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
90950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fLastMatchEnd >= 0) {
91050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // A previous find() failed to match.  Don't try again.
91150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   (without this test, a pattern with a zero-length match
91250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //    could match again at the end of an input string.)
91350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fHitEnd = TRUE;
91450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
91550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
91750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
91850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
91950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Compute the position in the input string beyond which a match can not begin, because
92050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   the minimum length match would extend past the end of the input.
92150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   Note:  some patterns that cannot match anything will have fMinMatchLength==Max Int.
92250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //          Be aware of possible overflows if making changes here.
92350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t testLen  = (int32_t)(fActiveLimit - fPattern->fMinMatchLen);
92450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (startPos > testLen) {
92550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fMatch = FALSE;
92650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fHitEnd = TRUE;
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
92950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
93050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32  c;
93150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    U_ASSERT(startPos >= 0);
93250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
93350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    switch (fPattern->fStartType) {
93450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    case START_NO_INFO:
93550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // No optimization was found.
93650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Try a match at each input position.
93750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for (;;) {
93850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            MatchChunkAt(startPos, FALSE, fDeferredStatus);
93950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (U_FAILURE(fDeferredStatus)) {
94050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
94150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
94250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fMatch) {
94350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return TRUE;
94450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
94550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (startPos >= testLen) {
94650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
94750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
94850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
94950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_FWD_1(inputBuf, startPos, fActiveLimit);
95050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Note that it's perfectly OK for a pattern to have a zero-length
95150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   match at the end of a string, so we must make sure that the loop
95250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   runs with startPos == testLen the last time through.
95327f654740f2a26ad62a5c155af9199af9e69b889claireho            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
95427f654740f2a26ad62a5c155af9199af9e69b889claireho                return FALSE;
95550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
95650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(FALSE);
95750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
95850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    case START_START:
95950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Matches are only possible at the start of the input string
96050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   (pattern begins with ^ or \A)
96150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (startPos > fActiveStart) {
96250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fMatch = FALSE;
96350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
96450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
96550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchChunkAt(startPos, FALSE, fDeferredStatus);
96650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(fDeferredStatus)) {
96750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
96850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
96950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return fMatch;
97050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
97150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
97250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    case START_SET:
97350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
97450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Match may start on any char from a pre-computed set.
97550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(fPattern->fMinMatchLen > 0);
97650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for (;;) {
97750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t pos = startPos;
97850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(inputBuf, startPos, fActiveLimit, c);  // like c = inputBuf[startPos++];
97927f654740f2a26ad62a5c155af9199af9e69b889claireho            if ((c<256 && fPattern->fInitialChars8->contains(c)) ||
98027f654740f2a26ad62a5c155af9199af9e69b889claireho                (c>=256 && fPattern->fInitialChars->contains(c))) {
98150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                MatchChunkAt(pos, FALSE, fDeferredStatus);
98250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (U_FAILURE(fDeferredStatus)) {
98350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return FALSE;
98450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
98550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fMatch) {
98650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return TRUE;
98750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
98850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
98950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (pos >= testLen) {
99050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fMatch = FALSE;
99150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
99250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
99350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
99427f654740f2a26ad62a5c155af9199af9e69b889claireho            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
99527f654740f2a26ad62a5c155af9199af9e69b889claireho                return FALSE;
99650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
99850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(FALSE);
99950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
100050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    case START_STRING:
100150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    case START_CHAR:
100250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
100350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Match starts on exactly one char.
100450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(fPattern->fMinMatchLen > 0);
100550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 theChar = fPattern->fInitialChar;
100650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for (;;) {
100750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t pos = startPos;
100850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(inputBuf, startPos, fActiveLimit, c);  // like c = inputBuf[startPos++];
100950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (c == theChar) {
101050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                MatchChunkAt(pos, FALSE, fDeferredStatus);
101150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (U_FAILURE(fDeferredStatus)) {
101250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return FALSE;
101350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
101450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fMatch) {
101550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return TRUE;
101650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
101750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
101850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (pos >= testLen) {
101950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fMatch = FALSE;
102050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
102150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
102250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
102327f654740f2a26ad62a5c155af9199af9e69b889claireho            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
102427f654740f2a26ad62a5c155af9199af9e69b889claireho                return FALSE;
102550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
102750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(FALSE);
102850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
102950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    case START_LINE:
103050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
103150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32  c;
103250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (startPos == fAnchorStart) {
103350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            MatchChunkAt(startPos, FALSE, fDeferredStatus);
103450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (U_FAILURE(fDeferredStatus)) {
103550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
103650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
103750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fMatch) {
103850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return TRUE;
103950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
104050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_FWD_1(inputBuf, startPos, fActiveLimit);
104150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
104250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
104350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fPattern->fFlags & UREGEX_UNIX_LINES) {
104450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            for (;;) {
104550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = inputBuf[startPos-1];
104650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c == 0x0a) {
104750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    MatchChunkAt(startPos, FALSE, fDeferredStatus);
104850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (U_FAILURE(fDeferredStatus)) {
104950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        return FALSE;
105050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
105150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (fMatch) {
105250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        return TRUE;
105350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
105450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
105550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (startPos >= testLen) {
105650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fMatch = FALSE;
105750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
105850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return FALSE;
105950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
106050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_FWD_1(inputBuf, startPos, fActiveLimit);
106150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Note that it's perfectly OK for a pattern to have a zero-length
106250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   match at the end of a string, so we must make sure that the loop
106350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   runs with startPos == testLen the last time through.
106427f654740f2a26ad62a5c155af9199af9e69b889claireho	            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
106527f654740f2a26ad62a5c155af9199af9e69b889claireho                    return FALSE;
106650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
106750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
106850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            for (;;) {
106950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = inputBuf[startPos-1];
107050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
107150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029 )) {
107250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) {
107350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        startPos++;
107450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
107550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    MatchChunkAt(startPos, FALSE, fDeferredStatus);
107650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (U_FAILURE(fDeferredStatus)) {
107750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        return FALSE;
107850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
107950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (fMatch) {
108050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        return TRUE;
108150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
108250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
108350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (startPos >= testLen) {
108450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fMatch = FALSE;
108550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
108650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return FALSE;
108750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
108850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_FWD_1(inputBuf, startPos, fActiveLimit);
108950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Note that it's perfectly OK for a pattern to have a zero-length
109050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   match at the end of a string, so we must make sure that the loop
109150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   runs with startPos == testLen the last time through.
109227f654740f2a26ad62a5c155af9199af9e69b889claireho	            if  (REGEXFINDPROGRESS_INTERRUPT(startPos, fDeferredStatus))
109327f654740f2a26ad62a5c155af9199af9e69b889claireho                    return FALSE;
109450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
109550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
109750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
109850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    default:
109950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(FALSE);
1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
110150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
110250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    U_ASSERT(FALSE);
110350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return FALSE;
1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
111050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  group()
1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
111350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::group(UErrorCode &status) const {
111450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return group(0, status);
111550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
111650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
111727f654740f2a26ad62a5c155af9199af9e69b889claireho//  Return immutable shallow clone
111827f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::group(UText *dest, int64_t &group_len, UErrorCode &status) const {
111927f654740f2a26ad62a5c155af9199af9e69b889claireho    return group(0, dest, group_len, status);
112050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
112150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
112227f654740f2a26ad62a5c155af9199af9e69b889claireho//  Return immutable shallow clone
112327f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const {
112427f654740f2a26ad62a5c155af9199af9e69b889claireho    group_len = 0;
112527f654740f2a26ad62a5c155af9199af9e69b889claireho    UBool bailOut = FALSE;
112627f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
112727f654740f2a26ad62a5c155af9199af9e69b889claireho        return dest;
112827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
112927f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(fDeferredStatus)) {
113027f654740f2a26ad62a5c155af9199af9e69b889claireho        status = fDeferredStatus;
113127f654740f2a26ad62a5c155af9199af9e69b889claireho        bailOut = TRUE;
113227f654740f2a26ad62a5c155af9199af9e69b889claireho    }
113327f654740f2a26ad62a5c155af9199af9e69b889claireho    if (fMatch == FALSE) {
113427f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_REGEX_INVALID_STATE;
113527f654740f2a26ad62a5c155af9199af9e69b889claireho        bailOut = TRUE;
113627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
113727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) {
113827f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
113927f654740f2a26ad62a5c155af9199af9e69b889claireho        bailOut = TRUE;
114027f654740f2a26ad62a5c155af9199af9e69b889claireho    }
114127f654740f2a26ad62a5c155af9199af9e69b889claireho
114227f654740f2a26ad62a5c155af9199af9e69b889claireho    if (bailOut) {
114327f654740f2a26ad62a5c155af9199af9e69b889claireho        return (dest) ? dest : utext_openUChars(NULL, NULL, 0, &status);
114427f654740f2a26ad62a5c155af9199af9e69b889claireho    }
114527f654740f2a26ad62a5c155af9199af9e69b889claireho
114627f654740f2a26ad62a5c155af9199af9e69b889claireho    int64_t s, e;
114727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (groupNum == 0) {
114827f654740f2a26ad62a5c155af9199af9e69b889claireho        s = fMatchStart;
114927f654740f2a26ad62a5c155af9199af9e69b889claireho        e = fMatchEnd;
115027f654740f2a26ad62a5c155af9199af9e69b889claireho    } else {
115127f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1);
115227f654740f2a26ad62a5c155af9199af9e69b889claireho        U_ASSERT(groupOffset < fPattern->fFrameSize);
115327f654740f2a26ad62a5c155af9199af9e69b889claireho        U_ASSERT(groupOffset >= 0);
115427f654740f2a26ad62a5c155af9199af9e69b889claireho        s = fFrame->fExtra[groupOffset];
115527f654740f2a26ad62a5c155af9199af9e69b889claireho        e = fFrame->fExtra[groupOffset+1];
115627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
115750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
115827f654740f2a26ad62a5c155af9199af9e69b889claireho    if (s < 0) {
115927f654740f2a26ad62a5c155af9199af9e69b889claireho        // A capture group wasn't part of the match
116027f654740f2a26ad62a5c155af9199af9e69b889claireho        return utext_clone(dest, fInputText, FALSE, TRUE, &status);
116127f654740f2a26ad62a5c155af9199af9e69b889claireho    }
116227f654740f2a26ad62a5c155af9199af9e69b889claireho    U_ASSERT(s <= e);
116327f654740f2a26ad62a5c155af9199af9e69b889claireho    group_len = e - s;
116427f654740f2a26ad62a5c155af9199af9e69b889claireho
116527f654740f2a26ad62a5c155af9199af9e69b889claireho    dest = utext_clone(dest, fInputText, FALSE, TRUE, &status);
116627f654740f2a26ad62a5c155af9199af9e69b889claireho    if (dest)
116727f654740f2a26ad62a5c155af9199af9e69b889claireho        UTEXT_SETNATIVEINDEX(dest, s);
116827f654740f2a26ad62a5c155af9199af9e69b889claireho    return dest;
116927f654740f2a26ad62a5c155af9199af9e69b889claireho}
117050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
117150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const {
117250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString result;
117327f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
117427f654740f2a26ad62a5c155af9199af9e69b889claireho        return result;
117527f654740f2a26ad62a5c155af9199af9e69b889claireho    }
117650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText resultText = UTEXT_INITIALIZER;
117750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUnicodeString(&resultText, &result, &status);
117850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    group(groupNum, &resultText, status);
117950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&resultText);
118050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return result;
118150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
118250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
118350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
118427f654740f2a26ad62a5c155af9199af9e69b889claireho//  Return deep (mutable) clone
118527f654740f2a26ad62a5c155af9199af9e69b889claireho//		Technology Preview (as an API), but note that the UnicodeString API is implemented
118627f654740f2a26ad62a5c155af9199af9e69b889claireho//		using this function.
118750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUText *RegexMatcher::group(int32_t groupNum, UText *dest, UErrorCode &status) const {
118850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool bailOut = FALSE;
1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
119027f654740f2a26ad62a5c155af9199af9e69b889claireho        return dest;
1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = fDeferredStatus;
119450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bailOut = TRUE;
1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
119650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
119750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fMatch == FALSE) {
119850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_REGEX_INVALID_STATE;
119950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bailOut = TRUE;
120050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
120150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) {
120250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
120350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        bailOut = TRUE;
120450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
120550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
120650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (bailOut) {
120750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (dest) {
120850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status);
120950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return dest;
121050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
121150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return utext_openUChars(NULL, NULL, 0, &status);
121250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
121350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
121450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
121550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t s, e;
121650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (groupNum == 0) {
121750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        s = fMatchStart;
121850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        e = fMatchEnd;
121950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
122050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1);
122150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(groupOffset < fPattern->fFrameSize);
122250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(groupOffset >= 0);
122350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        s = fFrame->fExtra[groupOffset];
122450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        e = fFrame->fExtra[groupOffset+1];
122550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
122650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
122750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (s < 0) {
122850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // A capture group wasn't part of the match
122950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (dest) {
123050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status);
123150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return dest;
123250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
123350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return utext_openUChars(NULL, NULL, 0, &status);
123450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
123550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
123650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    U_ASSERT(s <= e);
123750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
123850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
123950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(e <= fInputLength);
124050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (dest) {
124150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_replace(dest, 0, utext_nativeLength(dest), fInputText->chunkContents+s, (int32_t)(e-s), &status);
124250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
124350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UText groupText = UTEXT_INITIALIZER;
124450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_openUChars(&groupText, fInputText->chunkContents+s, e-s, &status);
124550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            dest = utext_clone(NULL, &groupText, TRUE, FALSE, &status);
124650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_close(&groupText);
124750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
124850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
124950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t len16;
125050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTEXT_USES_U16(fInputText)) {
125150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            len16 = (int32_t)(e-s);
125250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
125350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UErrorCode lengthStatus = U_ZERO_ERROR;
125450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            len16 = utext_extract(fInputText, s, e, NULL, 0, &lengthStatus);
125550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
125650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar *groupChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1));
125727f654740f2a26ad62a5c155af9199af9e69b889claireho        if (groupChars == NULL) {
125827f654740f2a26ad62a5c155af9199af9e69b889claireho            status = U_MEMORY_ALLOCATION_ERROR;
125927f654740f2a26ad62a5c155af9199af9e69b889claireho            return dest;
126027f654740f2a26ad62a5c155af9199af9e69b889claireho        }
126150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_extract(fInputText, s, e, groupChars, len16+1, &status);
126250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
126350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (dest) {
126450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_replace(dest, 0, utext_nativeLength(dest), groupChars, len16, &status);
126550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
126650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UText groupText = UTEXT_INITIALIZER;
126750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_openUChars(&groupText, groupChars, len16, &status);
126850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            dest = utext_clone(NULL, &groupText, TRUE, FALSE, &status);
126950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_close(&groupText);
127050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
127150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
127250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uprv_free(groupChars);
127350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
127450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return dest;
1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
127750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
127850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
127950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  appendGroup() -- currently internal only, appends a group to a UText rather
128050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                   than replacing its contents
128150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
128250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
128350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
128450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint64_t RegexMatcher::appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const {
1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
128627f654740f2a26ad62a5c155af9199af9e69b889claireho        return 0;
1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = fDeferredStatus;
129027f654740f2a26ad62a5c155af9199af9e69b889claireho        return 0;
1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
129227f654740f2a26ad62a5c155af9199af9e69b889claireho    int64_t destLen = utext_nativeLength(dest);
129350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
129450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fMatch == FALSE) {
129550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_REGEX_INVALID_STATE;
129650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return utext_replace(dest, destLen, destLen, NULL, 0, &status);
129750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
129850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) {
1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_INDEX_OUTOFBOUNDS_ERROR;
130050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return utext_replace(dest, destLen, destLen, NULL, 0, &status);
1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
130250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
130350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t s, e;
130450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (groupNum == 0) {
130550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        s = fMatchStart;
130650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        e = fMatchEnd;
130750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
130850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1);
130950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(groupOffset < fPattern->fFrameSize);
131050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(groupOffset >= 0);
131150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        s = fFrame->fExtra[groupOffset];
131250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        e = fFrame->fExtra[groupOffset+1];
131350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
131450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
131550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (s < 0) {
131650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // A capture group wasn't part of the match
131750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return utext_replace(dest, destLen, destLen, NULL, 0, &status);
131850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
131950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    U_ASSERT(s <= e);
132050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
132150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t deltaLen;
132250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
132350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(e <= fInputLength);
132450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        deltaLen = utext_replace(dest, destLen, destLen, fInputText->chunkContents+s, (int32_t)(e-s), &status);
132550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
132650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t len16;
132750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTEXT_USES_U16(fInputText)) {
132850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            len16 = (int32_t)(e-s);
132950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
133050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UErrorCode lengthStatus = U_ZERO_ERROR;
133150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            len16 = utext_extract(fInputText, s, e, NULL, 0, &lengthStatus);
133250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
133350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar *groupChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1));
133427f654740f2a26ad62a5c155af9199af9e69b889claireho        if (groupChars == NULL) {
133527f654740f2a26ad62a5c155af9199af9e69b889claireho            status = U_MEMORY_ALLOCATION_ERROR;
133627f654740f2a26ad62a5c155af9199af9e69b889claireho            return 0;
133727f654740f2a26ad62a5c155af9199af9e69b889claireho        }
133850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_extract(fInputText, s, e, groupChars, len16+1, &status);
133950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
134050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        deltaLen = utext_replace(dest, destLen, destLen, groupChars, len16, &status);
134150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uprv_free(groupChars);
134250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
134350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return deltaLen;
1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1348c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
135050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  groupCount()
1351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
135350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::groupCount() const {
135450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fPattern->fGroupMap->size();
1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  hasAnchoringBounds()
1362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
136450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::hasAnchoringBounds() const {
136550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fAnchoringBounds;
1366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
137150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  hasTransparentBounds()
1372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1373c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
137450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::hasTransparentBounds() const {
137550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fTransparentBounds;
1376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1378c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
137950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
138250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  hitEnd()
1383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
138550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::hitEnd() const {
138650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fHitEnd;
1387c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
139250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  input()
1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
139550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UnicodeString &RegexMatcher::input() const {
139650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (!fInput) {
139750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status = U_ZERO_ERROR;
139850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t len16;
139950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTEXT_USES_U16(fInputText)) {
140050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            len16 = (int32_t)fInputLength;
140150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
140250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            len16 = utext_extract(fInputText, 0, fInputLength, NULL, 0, &status);
140350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR; // overflow, length status
1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
140550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString *result = new UnicodeString(len16, 0, 0);
140650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
140750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar *inputChars = result->getBuffer(len16);
140850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_extract(fInputText, 0, fInputLength, inputChars, len16, &status); // unterminated warning
140950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result->releaseBuffer(len16);
141050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
141150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (*(const UnicodeString **)&fInput) = result; // pointer assignment, rather than operator=
141250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
141350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
141450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *fInput;
141550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
141650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
141750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
141850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
141950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  inputText()
142050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
142150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
142250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUText *RegexMatcher::inputText() const {
142350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fInputText;
142450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
142550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
142650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
142750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
142850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
142950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  getInput() -- like inputText(), but makes a clone or copies into another UText
143050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
143150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
143227f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::getInput (UText *dest, UErrorCode &status) const {
143327f654740f2a26ad62a5c155af9199af9e69b889claireho    UBool bailOut = FALSE;
143427f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
143527f654740f2a26ad62a5c155af9199af9e69b889claireho        return dest;
143627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
143727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(fDeferredStatus)) {
143827f654740f2a26ad62a5c155af9199af9e69b889claireho        status = fDeferredStatus;
143927f654740f2a26ad62a5c155af9199af9e69b889claireho        bailOut = TRUE;
144027f654740f2a26ad62a5c155af9199af9e69b889claireho    }
144127f654740f2a26ad62a5c155af9199af9e69b889claireho
144227f654740f2a26ad62a5c155af9199af9e69b889claireho    if (bailOut) {
144327f654740f2a26ad62a5c155af9199af9e69b889claireho        if (dest) {
144427f654740f2a26ad62a5c155af9199af9e69b889claireho            utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, &status);
144527f654740f2a26ad62a5c155af9199af9e69b889claireho            return dest;
144627f654740f2a26ad62a5c155af9199af9e69b889claireho        } else {
144727f654740f2a26ad62a5c155af9199af9e69b889claireho            return utext_clone(NULL, fInputText, FALSE, TRUE, &status);
144827f654740f2a26ad62a5c155af9199af9e69b889claireho        }
144927f654740f2a26ad62a5c155af9199af9e69b889claireho    }
145027f654740f2a26ad62a5c155af9199af9e69b889claireho
145150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (dest) {
145250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
145350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_replace(dest, 0, utext_nativeLength(dest), fInputText->chunkContents, (int32_t)fInputLength, &status);
145450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
145550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t input16Len;
145650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTEXT_USES_U16(fInputText)) {
145750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                input16Len = (int32_t)fInputLength;
145850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
145950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UErrorCode lengthStatus = U_ZERO_ERROR;
146050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                input16Len = utext_extract(fInputText, 0, fInputLength, NULL, 0, &lengthStatus); // buffer overflow error
146150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
146250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(input16Len));
146327f654740f2a26ad62a5c155af9199af9e69b889claireho            if (inputChars == NULL) {
146427f654740f2a26ad62a5c155af9199af9e69b889claireho                return dest;
146527f654740f2a26ad62a5c155af9199af9e69b889claireho            }
146650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
146750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
146850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_extract(fInputText, 0, fInputLength, inputChars, input16Len, &status); // not terminated warning
146950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
147050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_replace(dest, 0, utext_nativeLength(dest), inputChars, input16Len, &status);
147150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
147250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uprv_free(inputChars);
147350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
147450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return dest;
147550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
147650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return utext_clone(NULL, fInputText, FALSE, TRUE, &status);
1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
148150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool compat_SyncMutableUTextContents(UText *ut);
148250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool compat_SyncMutableUTextContents(UText *ut) {
148350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool retVal = FALSE;
148450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
148550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  In the following test, we're really only interested in whether the UText should switch
148650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  between heap and stack allocation.  If length hasn't changed, we won't, so the chunkContents
148750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  will still point to the correct data.
148850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (utext_nativeLength(ut) != ut->nativeIndexingLimit) {
148950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString *us=(UnicodeString *)ut->context;
149050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
149150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Update to the latest length.
149250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // For example, (utext_nativeLength(ut) != ut->nativeIndexingLimit).
149350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t newLength = us->length();
149450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
149550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Update the chunk description.
149650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // The buffer may have switched between stack- and heap-based.
149750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ut->chunkContents    = us->getBuffer();
149850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ut->chunkLength      = newLength;
149950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ut->chunkNativeLimit = newLength;
150050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ut->nativeIndexingLimit = newLength;
150150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retVal = TRUE;
150250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
150450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return retVal;
150550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
150950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  lookingAt()
1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
151250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::lookingAt(UErrorCode &status) {
1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
151450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = fDeferredStatus;
151850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
151950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
152050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
152150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInputUniStrMaybeMutable) {
152250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (compat_SyncMutableUTextContents(fInputText)) {
152350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fInputLength = utext_nativeLength(fInputText);
152450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        reset();
152550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
152650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
152750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    else {
152850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        resetPreserveRegion();
152950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
153050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
153150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchChunkAt((int32_t)fActiveStart, FALSE, status);
153250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
153350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchAt(fActiveStart, FALSE, status);
1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
153550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fMatch;
153650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
153750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
153927f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool RegexMatcher::lookingAt(int64_t start, UErrorCode &status) {
154050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
154150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
154250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
154350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(fDeferredStatus)) {
154450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = fDeferredStatus;
154550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
154650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reset();
154850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
154950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (start < 0) {
155050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
155150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
155250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
155350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
155450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInputUniStrMaybeMutable) {
155550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (compat_SyncMutableUTextContents(fInputText)) {
155650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fInputLength = utext_nativeLength(fInputText);
155750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        reset();
155850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
156150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t nativeStart;
156227f654740f2a26ad62a5c155af9199af9e69b889claireho    nativeStart = start;
156327f654740f2a26ad62a5c155af9199af9e69b889claireho    if (nativeStart < fActiveStart || nativeStart > fActiveLimit) {
156450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
156550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
156650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
156750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
156850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
156950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchChunkAt((int32_t)nativeStart, FALSE, status);
157050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
157150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchAt(nativeStart, FALSE, status);
157250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
157350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fMatch;
1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
1579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
158050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  matches()
1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
158350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::matches(UErrorCode &status) {
158450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
158550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
158650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
158750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(fDeferredStatus)) {
158850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = fDeferredStatus;
158950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
159050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
159250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInputUniStrMaybeMutable) {
159350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (compat_SyncMutableUTextContents(fInputText)) {
159450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fInputLength = utext_nativeLength(fInputText);
159550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        reset();
159650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
159750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
159850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    else {
159950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        resetPreserveRegion();
160050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
160250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
160350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchChunkAt((int32_t)fActiveStart, TRUE, status);
160450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
160550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchAt(fActiveStart, TRUE, status);
160650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
160750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fMatch;
1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
161127f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool RegexMatcher::matches(int64_t start, UErrorCode &status) {
161250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
161350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
161450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
161550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(fDeferredStatus)) {
161650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = fDeferredStatus;
161750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
161850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reset();
162050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
162150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (start < 0) {
162250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
162350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
162650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInputUniStrMaybeMutable) {
162750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (compat_SyncMutableUTextContents(fInputText)) {
162850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fInputLength = utext_nativeLength(fInputText);
162950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        reset();
163050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
163150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
163350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t nativeStart;
163427f654740f2a26ad62a5c155af9199af9e69b889claireho    nativeStart = start;
163527f654740f2a26ad62a5c155af9199af9e69b889claireho    if (nativeStart < fActiveStart || nativeStart > fActiveLimit) {
1636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_INDEX_OUTOFBOUNDS_ERROR;
163750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
164050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
164150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchChunkAt((int32_t)nativeStart, TRUE, status);
164250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
164350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchAt(nativeStart, TRUE, status);
164450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
164550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fMatch;
164650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
1651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
165250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    pattern
1653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
165550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst RegexPattern &RegexMatcher::pattern() const {
165650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *fPattern;
1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
166150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
166350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    region
1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
166550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
166627f654740f2a26ad62a5c155af9199af9e69b889clairehoRegexMatcher &RegexMatcher::region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status) {
1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
166850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return *this;
1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
167050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
167127f654740f2a26ad62a5c155af9199af9e69b889claireho    if (regionStart>regionLimit || regionStart<0 || regionLimit<0) {
167250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
167450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
167527f654740f2a26ad62a5c155af9199af9e69b889claireho    int64_t nativeStart = regionStart;
167627f654740f2a26ad62a5c155af9199af9e69b889claireho    int64_t nativeLimit = regionLimit;
167727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (nativeStart > fInputLength || nativeLimit > fInputLength) {
167827f654740f2a26ad62a5c155af9199af9e69b889claireho      status = U_ILLEGAL_ARGUMENT_ERROR;
1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
168050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
168127f654740f2a26ad62a5c155af9199af9e69b889claireho    if (startIndex == -1)
168227f654740f2a26ad62a5c155af9199af9e69b889claireho      this->reset();
168327f654740f2a26ad62a5c155af9199af9e69b889claireho    else
168427f654740f2a26ad62a5c155af9199af9e69b889claireho      resetPreserveRegion();
168527f654740f2a26ad62a5c155af9199af9e69b889claireho
168650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fRegionStart = nativeStart;
168750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fRegionLimit = nativeLimit;
168850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fActiveStart = nativeStart;
168950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fActiveLimit = nativeLimit;
169027f654740f2a26ad62a5c155af9199af9e69b889claireho
169127f654740f2a26ad62a5c155af9199af9e69b889claireho    if (startIndex != -1) {
169227f654740f2a26ad62a5c155af9199af9e69b889claireho      if (startIndex < fActiveStart || startIndex > fActiveLimit) {
169327f654740f2a26ad62a5c155af9199af9e69b889claireho          status = U_INDEX_OUTOFBOUNDS_ERROR;
169427f654740f2a26ad62a5c155af9199af9e69b889claireho      }
169527f654740f2a26ad62a5c155af9199af9e69b889claireho      fMatchEnd = startIndex;
169627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
169727f654740f2a26ad62a5c155af9199af9e69b889claireho
169850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (!fTransparentBounds) {
169950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fLookStart = nativeStart;
170050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fLookLimit = nativeLimit;
170150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
170250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fAnchoringBounds) {
170350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fAnchorStart = nativeStart;
170450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fAnchorLimit = nativeLimit;
170550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
170650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
170927f654740f2a26ad62a5c155af9199af9e69b889clairehoRegexMatcher &RegexMatcher::region(int64_t start, int64_t limit, UErrorCode &status) {
171027f654740f2a26ad62a5c155af9199af9e69b889claireho  return region(start, limit, -1, status);
171127f654740f2a26ad62a5c155af9199af9e69b889claireho}
1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
171550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    regionEnd
1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
171850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::regionEnd() const {
171927f654740f2a26ad62a5c155af9199af9e69b889claireho    return (int32_t)fRegionLimit;
1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
172227f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::regionEnd64() const {
172327f654740f2a26ad62a5c155af9199af9e69b889claireho    return fRegionLimit;
172427f654740f2a26ad62a5c155af9199af9e69b889claireho}
1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1727c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
172850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    regionStart
1729c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1730c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
173150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::regionStart() const {
173227f654740f2a26ad62a5c155af9199af9e69b889claireho    return (int32_t)fRegionStart;
173327f654740f2a26ad62a5c155af9199af9e69b889claireho}
173427f654740f2a26ad62a5c155af9199af9e69b889claireho
173527f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::regionStart64() const {
173627f654740f2a26ad62a5c155af9199af9e69b889claireho    return fRegionStart;
1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
174250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    replaceAll
1743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
174550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::replaceAll(const UnicodeString &replacement, UErrorCode &status) {
174650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText replacementText = UTEXT_INITIALIZER;
174750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText resultText = UTEXT_INITIALIZER;
174850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString resultString;
174927f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
175027f654740f2a26ad62a5c155af9199af9e69b889claireho        return resultString;
175127f654740f2a26ad62a5c155af9199af9e69b889claireho    }
175250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
175350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openConstUnicodeString(&replacementText, &replacement, &status);
175450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUnicodeString(&resultText, &resultString, &status);
175550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
175650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    replaceAll(&replacementText, &resultText, status);
1757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
175850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&resultText);
175950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&replacementText);
176050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
176150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return resultString;
1762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
176450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
176650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    replaceAll, UText mode
1767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
176850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUText *RegexMatcher::replaceAll(UText *replacement, UText *dest, UErrorCode &status) {
1769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
177027f654740f2a26ad62a5c155af9199af9e69b889claireho        return dest;
1771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
1773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = fDeferredStatus;
177427f654740f2a26ad62a5c155af9199af9e69b889claireho        return dest;
1775c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
177650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
177750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (dest == NULL) {
177850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString emptyString;
177950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText empty = UTEXT_INITIALIZER;
178050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
178150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUnicodeString(&empty, &emptyString, &status);
178250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dest = utext_clone(NULL, &empty, TRUE, FALSE, &status);
178350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&empty);
1784c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
178550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
178650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_SUCCESS(status)) {
178750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        reset();
178850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while (find()) {
178950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            appendReplacement(dest, replacement, status);
179050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (U_FAILURE(status)) {
179150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
179250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
179350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
179427f654740f2a26ad62a5c155af9199af9e69b889claireho        appendTail(dest, status);
179550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
179650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
179750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return dest;
1798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
180350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    replaceFirst
1804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
180650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::replaceFirst(const UnicodeString &replacement, UErrorCode &status) {
180750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText replacementText = UTEXT_INITIALIZER;
180850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText resultText = UTEXT_INITIALIZER;
180950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString resultString;
181050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
181150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openConstUnicodeString(&replacementText, &replacement, &status);
181250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUnicodeString(&resultText, &resultString, &status);
181350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
181450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    replaceFirst(&replacementText, &resultText, status);
181550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
181650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&resultText);
181750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&replacementText);
181850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
181950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return resultString;
1820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
182350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    replaceFirst, UText mode
1824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
182550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUText *RegexMatcher::replaceFirst(UText *replacement, UText *dest, UErrorCode &status) {
1826c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
182727f654740f2a26ad62a5c155af9199af9e69b889claireho        return dest;
1828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
1830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = fDeferredStatus;
183127f654740f2a26ad62a5c155af9199af9e69b889claireho        return dest;
1832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
183350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    reset();
183550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (!find()) {
183627f654740f2a26ad62a5c155af9199af9e69b889claireho        return getInput(dest, status);
183750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
183950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (dest == NULL) {
184050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString emptyString;
184150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText empty = UTEXT_INITIALIZER;
184250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
184350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUnicodeString(&empty, &emptyString, &status);
184450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dest = utext_clone(NULL, &empty, TRUE, FALSE, &status);
184550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&empty);
1846c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
184750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
184850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    appendReplacement(dest, replacement, status);
184927f654740f2a26ad62a5c155af9199af9e69b889claireho    appendTail(dest, status);
185050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
185150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return dest;
1852c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1853c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1854c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1855c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1856c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
185750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     requireEnd
1858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1859c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
186050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::requireEnd() const {
186150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fRequireEnd;
1862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1863c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
186750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     reset
1868c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
187050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::reset() {
187150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fRegionStart    = 0;
187250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fRegionLimit    = fInputLength;
187350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fActiveStart    = 0;
187450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fActiveLimit    = fInputLength;
187550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fAnchorStart    = 0;
187650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fAnchorLimit    = fInputLength;
187750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fLookStart      = 0;
187850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fLookLimit      = fInputLength;
187950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    resetPreserveRegion();
188050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
188150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
188250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
188350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
188450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
188550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::resetPreserveRegion() {
188650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fMatchStart     = 0;
188750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fMatchEnd       = 0;
188850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fLastMatchEnd   = -1;
188950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fAppendPosition = 0;
189050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fMatch          = FALSE;
189150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fHitEnd         = FALSE;
189250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fRequireEnd     = FALSE;
189350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTime           = 0;
189450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTickCounter    = TIMER_INITIAL_VALUE;
189550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //resetStack(); // more expensive than it looks...
189650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
189750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
189850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
189950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::reset(const UnicodeString &input) {
190050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInputText = utext_openConstUnicodeString(fInputText, &input, &fDeferredStatus);
190127f654740f2a26ad62a5c155af9199af9e69b889claireho    if (fPattern->fNeedsAltInput) {
190227f654740f2a26ad62a5c155af9199af9e69b889claireho        fAltInputText = utext_clone(fAltInputText, fInputText, FALSE, TRUE, &fDeferredStatus);
190327f654740f2a26ad62a5c155af9199af9e69b889claireho    }
190450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInputLength = utext_nativeLength(fInputText);
190550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
190650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reset();
190750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete fInput;
190850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInput = NULL;
190950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
191050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Do the following for any UnicodeString.
191150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  This is for compatibility for those clients who modify the input string "live" during regex operations.
191250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInputUniStrMaybeMutable = TRUE;
191350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
191450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fWordBreakItr != NULL) {
191550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==0
191650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status = U_ZERO_ERROR;
191750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fWordBreakItr->setText(fInputText, status);
191850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
191950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
192050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
192150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
192250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
192350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
192450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::reset(UText *input) {
192550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInputText != input) {
192650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fInputText = utext_clone(fInputText, input, FALSE, TRUE, &fDeferredStatus);
192750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fPattern->fNeedsAltInput) fAltInputText = utext_clone(fAltInputText, fInputText, FALSE, TRUE, &fDeferredStatus);
192850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fInputLength = utext_nativeLength(fInputText);
192950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
193050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete fInput;
193150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fInput = NULL;
193250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
193350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fWordBreakItr != NULL) {
193450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==0
193550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UErrorCode status = U_ZERO_ERROR;
193650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fWordBreakItr->setText(input, status);
193750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
193850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
193950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
194050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reset();
194150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInputUniStrMaybeMutable = FALSE;
194250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
194350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
194450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
194550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
194650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*RegexMatcher &RegexMatcher::reset(const UChar *) {
194750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fDeferredStatus = U_INTERNAL_PROGRAM_ERROR;
194850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
194950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}*/
195050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
195127f654740f2a26ad62a5c155af9199af9e69b889clairehoRegexMatcher &RegexMatcher::reset(int64_t position, UErrorCode &status) {
195250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
195350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return *this;
195450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
195550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reset();       // Reset also resets the region to be the entire string.
195650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
195727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (position < 0 || position > fActiveLimit) {
195850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
195950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return *this;
196050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
196127f654740f2a26ad62a5c155af9199af9e69b889claireho    fMatchEnd = position;
196250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
1963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
196527f654740f2a26ad62a5c155af9199af9e69b889claireho
19662e615e9896b12236afe0ff2695e8afc2ee73f961claireho//--------------------------------------------------------------------------------
19672e615e9896b12236afe0ff2695e8afc2ee73f961claireho//
19682e615e9896b12236afe0ff2695e8afc2ee73f961claireho//    refresh
19692e615e9896b12236afe0ff2695e8afc2ee73f961claireho//
19702e615e9896b12236afe0ff2695e8afc2ee73f961claireho//--------------------------------------------------------------------------------
19712e615e9896b12236afe0ff2695e8afc2ee73f961clairehoRegexMatcher &RegexMatcher::refreshInputText(UText *input, UErrorCode &status) {
19722e615e9896b12236afe0ff2695e8afc2ee73f961claireho    if (U_FAILURE(status)) {
19732e615e9896b12236afe0ff2695e8afc2ee73f961claireho        return *this;
19742e615e9896b12236afe0ff2695e8afc2ee73f961claireho    }
19752e615e9896b12236afe0ff2695e8afc2ee73f961claireho    if (input == NULL) {
19762e615e9896b12236afe0ff2695e8afc2ee73f961claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
19772e615e9896b12236afe0ff2695e8afc2ee73f961claireho        return *this;
19782e615e9896b12236afe0ff2695e8afc2ee73f961claireho    }
19792e615e9896b12236afe0ff2695e8afc2ee73f961claireho    if (utext_nativeLength(fInputText) != utext_nativeLength(input)) {
19802e615e9896b12236afe0ff2695e8afc2ee73f961claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
19812e615e9896b12236afe0ff2695e8afc2ee73f961claireho        return *this;
19822e615e9896b12236afe0ff2695e8afc2ee73f961claireho    }
19832e615e9896b12236afe0ff2695e8afc2ee73f961claireho    int64_t  pos = utext_getNativeIndex(fInputText);
19842e615e9896b12236afe0ff2695e8afc2ee73f961claireho    //  Shallow read-only clone of the new UText into the existing input UText
19852e615e9896b12236afe0ff2695e8afc2ee73f961claireho    fInputText = utext_clone(fInputText, input, FALSE, TRUE, &status);
19862e615e9896b12236afe0ff2695e8afc2ee73f961claireho    if (U_FAILURE(status)) {
19872e615e9896b12236afe0ff2695e8afc2ee73f961claireho        return *this;
19882e615e9896b12236afe0ff2695e8afc2ee73f961claireho    }
19892e615e9896b12236afe0ff2695e8afc2ee73f961claireho    utext_setNativeIndex(fInputText, pos);
1990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
19912e615e9896b12236afe0ff2695e8afc2ee73f961claireho    if (fAltInputText != NULL) {
19922e615e9896b12236afe0ff2695e8afc2ee73f961claireho        pos = utext_getNativeIndex(fAltInputText);
19932e615e9896b12236afe0ff2695e8afc2ee73f961claireho        fAltInputText = utext_clone(fAltInputText, input, FALSE, TRUE, &status);
19942e615e9896b12236afe0ff2695e8afc2ee73f961claireho        if (U_FAILURE(status)) {
19952e615e9896b12236afe0ff2695e8afc2ee73f961claireho            return *this;
19962e615e9896b12236afe0ff2695e8afc2ee73f961claireho        }
19972e615e9896b12236afe0ff2695e8afc2ee73f961claireho        utext_setNativeIndex(fAltInputText, pos);
19982e615e9896b12236afe0ff2695e8afc2ee73f961claireho    }
19992e615e9896b12236afe0ff2695e8afc2ee73f961claireho    return *this;
20002e615e9896b12236afe0ff2695e8afc2ee73f961claireho}
2001b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
200250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
200350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
2005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
200650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    setTrace
2007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
2008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
200950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setTrace(UBool state) {
201050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTraceDebug = state;
2011c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
2012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
2013c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
201450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
201550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------
2016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
201750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   split
2018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
201950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------
202050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t  RegexMatcher::split(const UnicodeString &input,
202150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString    dest[],
202250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t          destCapacity,
202350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode      &status)
202450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{
202550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText inputText = UTEXT_INITIALIZER;
202650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openConstUnicodeString(&inputText, &input, &status);
202727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
202827f654740f2a26ad62a5c155af9199af9e69b889claireho        return 0;
202927f654740f2a26ad62a5c155af9199af9e69b889claireho    }
203050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
203127f654740f2a26ad62a5c155af9199af9e69b889claireho    UText **destText = (UText **)uprv_malloc(sizeof(UText*)*destCapacity);
203227f654740f2a26ad62a5c155af9199af9e69b889claireho    if (destText == NULL) {
203327f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_MEMORY_ALLOCATION_ERROR;
203427f654740f2a26ad62a5c155af9199af9e69b889claireho        return 0;
203527f654740f2a26ad62a5c155af9199af9e69b889claireho    }
203650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t i;
203750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i = 0; i < destCapacity; i++) {
203850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        destText[i] = utext_openUnicodeString(NULL, &dest[i], &status);
203950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
204050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
204150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t fieldCount = split(&inputText, destText, destCapacity, status);
204250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
204350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i = 0; i < destCapacity; i++) {
204450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(destText[i]);
204550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
2046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
204727f654740f2a26ad62a5c155af9199af9e69b889claireho    uprv_free(destText);
204850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&inputText);
204950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fieldCount;
205050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
2051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
205350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   split, UText mode
205450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
205550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t  RegexMatcher::split(UText *input,
205650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText           *dest[],
205750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t          destCapacity,
205850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode      &status)
205950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{
206050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
206150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Check arguements for validity
206250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
206350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
206450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
206550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    };
206650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
206750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (destCapacity < 1) {
206850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
206950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
207050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
207150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
207250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
207350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Reset for the input text
207450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
207550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reset(input);
207650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t   nextOutputStringStart = 0;
207750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fActiveLimit == 0) {
207850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
207950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
208050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
208150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
208250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Loop through the input text, searching for the delimiter pattern
208350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
208450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t i;
208550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t numCaptureGroups = fPattern->fGroupMap->size();
208650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i=0; ; i++) {
208750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (i>=destCapacity-1) {
208850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // There is one or zero output string left.
208950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Fill the last output string with whatever is left from the input, then exit the loop.
209050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  ( i will be == destCapacity if we filled the output array while processing
209150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //    capture groups of the delimiter expression, in which case we will discard the
209250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //    last capture group saved in favor of the unprocessed remainder of the
209350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //    input string.)
209450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            i = destCapacity-1;
209550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fActiveLimit > nextOutputStringStart) {
209650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) {
209750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (dest[i]) {
209827f654740f2a26ad62a5c155af9199af9e69b889claireho                        utext_replace(dest[i], 0, utext_nativeLength(dest[i]),
209927f654740f2a26ad62a5c155af9199af9e69b889claireho                                      input->chunkContents+nextOutputStringStart,
210027f654740f2a26ad62a5c155af9199af9e69b889claireho                                      (int32_t)(fActiveLimit-nextOutputStringStart), &status);
210150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
210250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UText remainingText = UTEXT_INITIALIZER;
210327f654740f2a26ad62a5c155af9199af9e69b889claireho                        utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart,
210427f654740f2a26ad62a5c155af9199af9e69b889claireho                                         fActiveLimit-nextOutputStringStart, &status);
210550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
210650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        utext_close(&remainingText);
210750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
210850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
210950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UErrorCode lengthStatus = U_ZERO_ERROR;
211027f654740f2a26ad62a5c155af9199af9e69b889claireho                    int32_t remaining16Length =
211127f654740f2a26ad62a5c155af9199af9e69b889claireho                        utext_extract(input, nextOutputStringStart, fActiveLimit, NULL, 0, &lengthStatus);
211250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1));
211327f654740f2a26ad62a5c155af9199af9e69b889claireho                    if (remainingChars == NULL) {
211427f654740f2a26ad62a5c155af9199af9e69b889claireho                        status = U_MEMORY_ALLOCATION_ERROR;
211527f654740f2a26ad62a5c155af9199af9e69b889claireho                        break;
211627f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
211727f654740f2a26ad62a5c155af9199af9e69b889claireho
211850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_extract(input, nextOutputStringStart, fActiveLimit, remainingChars, remaining16Length+1, &status);
211950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (dest[i]) {
212050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status);
212150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
212250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UText remainingText = UTEXT_INITIALIZER;
212350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        utext_openUChars(&remainingText, remainingChars, remaining16Length, &status);
212450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
212550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        utext_close(&remainingText);
212650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
212750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
212850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    uprv_free(remainingChars);
212950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
213050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
213150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
213250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
213350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (find()) {
213450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // We found another delimiter.  Move everything from where we started looking
213550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  up until the start of the delimiter into the next output string.
213650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) {
213750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (dest[i]) {
213827f654740f2a26ad62a5c155af9199af9e69b889claireho                    utext_replace(dest[i], 0, utext_nativeLength(dest[i]),
213927f654740f2a26ad62a5c155af9199af9e69b889claireho                                  input->chunkContents+nextOutputStringStart,
214027f654740f2a26ad62a5c155af9199af9e69b889claireho                                  (int32_t)(fMatchStart-nextOutputStringStart), &status);
214150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
214250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UText remainingText = UTEXT_INITIALIZER;
214327f654740f2a26ad62a5c155af9199af9e69b889claireho                    utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart,
214427f654740f2a26ad62a5c155af9199af9e69b889claireho                                      fMatchStart-nextOutputStringStart, &status);
214550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
214650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_close(&remainingText);
214750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
214850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
214950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UErrorCode lengthStatus = U_ZERO_ERROR;
215050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t remaining16Length = utext_extract(input, nextOutputStringStart, fMatchStart, NULL, 0, &lengthStatus);
215150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1));
215227f654740f2a26ad62a5c155af9199af9e69b889claireho                if (remainingChars == NULL) {
215327f654740f2a26ad62a5c155af9199af9e69b889claireho                    status = U_MEMORY_ALLOCATION_ERROR;
215427f654740f2a26ad62a5c155af9199af9e69b889claireho                    break;
215527f654740f2a26ad62a5c155af9199af9e69b889claireho                }
215650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                utext_extract(input, nextOutputStringStart, fMatchStart, remainingChars, remaining16Length+1, &status);
215750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (dest[i]) {
215850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status);
215950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
216050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UText remainingText = UTEXT_INITIALIZER;
216150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_openUChars(&remainingText, remainingChars, remaining16Length, &status);
216250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
216350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_close(&remainingText);
216450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
216550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
216650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                uprv_free(remainingChars);
216750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
216850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            nextOutputStringStart = fMatchEnd;
216950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
217050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // If the delimiter pattern has capturing parentheses, the captured
217150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  text goes out into the next n destination strings.
217250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t groupNum;
217350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
2174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (i >= destCapacity-2) {
2175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // Never fill the last available output string with capture group text.
2176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // It will filled with the last field, the remainder of the
2177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    //  unsplit input text.
217850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
217950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
218050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                i++;
218150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                dest[i] = group(groupNum, dest[i], status);
218250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
218350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
218450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (nextOutputStringStart == fActiveLimit) {
2185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // The delimiter was at the end of the string.  We're done, but first
2186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // we output one last empty string, for the empty field following
2187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                //   the delimiter at the end of input.
2188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (i+1 < destCapacity) {
2189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    ++i;
2190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if (dest[i] == NULL) {
2191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        dest[i] = utext_openUChars(NULL, NULL, 0, &status);
2192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    } else {
2193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        static UChar emptyString[] = {(UChar)0};
2194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        utext_replace(dest[i], 0, utext_nativeLength(dest[i]), emptyString, 0, &status);
2195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
219650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
2197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                break;
2198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
2199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
220050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
220150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        else
220250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        {
220350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // We ran off the end of the input while looking for the next delimiter.
220450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // All the remaining text goes into the current output string.
220550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) {
220650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (dest[i]) {
220727f654740f2a26ad62a5c155af9199af9e69b889claireho                    utext_replace(dest[i], 0, utext_nativeLength(dest[i]),
220827f654740f2a26ad62a5c155af9199af9e69b889claireho                                  input->chunkContents+nextOutputStringStart,
220927f654740f2a26ad62a5c155af9199af9e69b889claireho                                  (int32_t)(fActiveLimit-nextOutputStringStart), &status);
221050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
221150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UText remainingText = UTEXT_INITIALIZER;
221227f654740f2a26ad62a5c155af9199af9e69b889claireho                    utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart,
221327f654740f2a26ad62a5c155af9199af9e69b889claireho                                     fActiveLimit-nextOutputStringStart, &status);
221450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
221550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_close(&remainingText);
221650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
221750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
221850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UErrorCode lengthStatus = U_ZERO_ERROR;
221950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t remaining16Length = utext_extract(input, nextOutputStringStart, fActiveLimit, NULL, 0, &lengthStatus);
222050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1));
222127f654740f2a26ad62a5c155af9199af9e69b889claireho                if (remainingChars == NULL) {
222227f654740f2a26ad62a5c155af9199af9e69b889claireho                    status = U_MEMORY_ALLOCATION_ERROR;
222327f654740f2a26ad62a5c155af9199af9e69b889claireho                    break;
222427f654740f2a26ad62a5c155af9199af9e69b889claireho                }
222550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
222650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                utext_extract(input, nextOutputStringStart, fActiveLimit, remainingChars, remaining16Length+1, &status);
222750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (dest[i]) {
222850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status);
222950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
223050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UText remainingText = UTEXT_INITIALIZER;
223150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_openUChars(&remainingText, remainingChars, remaining16Length, &status);
223250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
223350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_close(&remainingText);
223450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
223550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
223650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                uprv_free(remainingChars);
223750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
223850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
223950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
224027f654740f2a26ad62a5c155af9199af9e69b889claireho        if (U_FAILURE(status)) {
224127f654740f2a26ad62a5c155af9199af9e69b889claireho            break;
224227f654740f2a26ad62a5c155af9199af9e69b889claireho        }
224327f654740f2a26ad62a5c155af9199af9e69b889claireho    }   // end of for loop
224450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return i+1;
224550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
224650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
224750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
224850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
224950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
225050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     start
225150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
225250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
225350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::start(UErrorCode &status) const {
225450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return start(0, status);
225550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
225650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
225727f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::start64(UErrorCode &status) const {
225827f654740f2a26ad62a5c155af9199af9e69b889claireho    return start64(0, status);
225927f654740f2a26ad62a5c155af9199af9e69b889claireho}
226050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
226150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
226250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
226350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     start(int32_t group, UErrorCode &status)
226450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
226550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
226627f654740f2a26ad62a5c155af9199af9e69b889claireho
226727f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::start64(int32_t group, UErrorCode &status) const {
226850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
226950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return -1;
227050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
227150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(fDeferredStatus)) {
227250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = fDeferredStatus;
227350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return -1;
227450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
227550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fMatch == FALSE) {
227650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_REGEX_INVALID_STATE;
227750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return -1;
227850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
227950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (group < 0 || group > fPattern->fGroupMap->size()) {
228050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
228150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return -1;
228250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
228350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t s;
228450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (group == 0) {
228550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        s = fMatchStart;
228650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
228750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t groupOffset = fPattern->fGroupMap->elementAti(group-1);
228850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(groupOffset < fPattern->fFrameSize);
228950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(groupOffset >= 0);
229050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        s = fFrame->fExtra[groupOffset];
229150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
229250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
229327f654740f2a26ad62a5c155af9199af9e69b889claireho    return s;
229450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
229550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
229650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
229727f654740f2a26ad62a5c155af9199af9e69b889clairehoint32_t RegexMatcher::start(int32_t group, UErrorCode &status) const {
229827f654740f2a26ad62a5c155af9199af9e69b889claireho    return (int32_t)start64(group, status);
229927f654740f2a26ad62a5c155af9199af9e69b889claireho}
230050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
230150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
230250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
230350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     useAnchoringBounds
230450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
230550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
230650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::useAnchoringBounds(UBool b) {
230750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fAnchoringBounds = b;
230850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fAnchorStart = (fAnchoringBounds ? fRegionStart : 0);
230950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fAnchorLimit = (fAnchoringBounds ? fRegionLimit : fInputLength);
231050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
231150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
231250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
231350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
231450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
231550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
231650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     useTransparentBounds
231750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
231850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
231950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::useTransparentBounds(UBool b) {
232050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTransparentBounds = b;
232150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fLookStart = (fTransparentBounds ? 0 : fRegionStart);
232250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fLookLimit = (fTransparentBounds ? fInputLength : fRegionLimit);
232350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
232450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
232550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
232650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
232750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
232850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     setTimeLimit
232950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
233050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
233150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setTimeLimit(int32_t limit, UErrorCode &status) {
233250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
233350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
233450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
233550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(fDeferredStatus)) {
233650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = fDeferredStatus;
233750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
233850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
233950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (limit < 0) {
234050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
234150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
234250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
234350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTimeLimit = limit;
234450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
234550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
234650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
234750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
234850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
234950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     getTimeLimit
235050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
235150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
235250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::getTimeLimit() const {
235350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fTimeLimit;
235450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
235550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
235650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
235750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
235850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
235950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     setStackLimit
236050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
236150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
236250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setStackLimit(int32_t limit, UErrorCode &status) {
236350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
236450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
236550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
236650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(fDeferredStatus)) {
236750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = fDeferredStatus;
236850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
236950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
237050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (limit < 0) {
237150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
237250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
237350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
237450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
237550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Reset the matcher.  This is needed here in case there is a current match
237650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    whose final stack frame (containing the match results, pointed to by fFrame)
237750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    would be lost by resizing to a smaller stack size.
237850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reset();
237950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
238050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (limit == 0) {
238150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Unlimited stack expansion
238250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fStack->setMaxCapacity(0);
238350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
238450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Change the units of the limit  from bytes to ints, and bump the size up
238550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   to be big enough to hold at least one stack frame for the pattern,
238650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   if it isn't there already.
238750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t adjustedLimit = limit / sizeof(int32_t);
238850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (adjustedLimit < fPattern->fFrameSize) {
238950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            adjustedLimit = fPattern->fFrameSize;
239050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
239150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fStack->setMaxCapacity(adjustedLimit);
239250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
239350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fStackLimit = limit;
239450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
239550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
239650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
239750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
239850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
239950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     getStackLimit
240050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
240150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
240250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::getStackLimit() const {
240350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fStackLimit;
240450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
240550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
240650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
240750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
240850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
240950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     setMatchCallback
241050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
241150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
241250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setMatchCallback(URegexMatchCallback     *callback,
241350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    const void              *context,
241450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    UErrorCode              &status) {
241550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
241650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
241750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
241850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fCallbackFn = callback;
241950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fCallbackContext = context;
242050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
242150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
242250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
242350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
242450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
242550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     getMatchCallback
242650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
242750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
242850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::getMatchCallback(URegexMatchCallback   *&callback,
242950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  const void              *&context,
243050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  UErrorCode              &status) {
243150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
243250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       return;
243350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
243450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    callback = fCallbackFn;
243550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    context  = fCallbackContext;
243650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
243750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
243850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
243927f654740f2a26ad62a5c155af9199af9e69b889claireho//--------------------------------------------------------------------------------
244027f654740f2a26ad62a5c155af9199af9e69b889claireho//
244127f654740f2a26ad62a5c155af9199af9e69b889claireho//     setMatchCallback
244227f654740f2a26ad62a5c155af9199af9e69b889claireho//
244327f654740f2a26ad62a5c155af9199af9e69b889claireho//--------------------------------------------------------------------------------
244427f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexMatcher::setFindProgressCallback(URegexFindProgressCallback      *callback,
244527f654740f2a26ad62a5c155af9199af9e69b889claireho                                                const void                      *context,
244627f654740f2a26ad62a5c155af9199af9e69b889claireho                                                UErrorCode                      &status) {
244727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
244827f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
244927f654740f2a26ad62a5c155af9199af9e69b889claireho    }
245027f654740f2a26ad62a5c155af9199af9e69b889claireho    fFindProgressCallbackFn = callback;
245127f654740f2a26ad62a5c155af9199af9e69b889claireho    fFindProgressCallbackContext = context;
245227f654740f2a26ad62a5c155af9199af9e69b889claireho}
245327f654740f2a26ad62a5c155af9199af9e69b889claireho
245427f654740f2a26ad62a5c155af9199af9e69b889claireho
245527f654740f2a26ad62a5c155af9199af9e69b889claireho//--------------------------------------------------------------------------------
245627f654740f2a26ad62a5c155af9199af9e69b889claireho//
245727f654740f2a26ad62a5c155af9199af9e69b889claireho//     getMatchCallback
245827f654740f2a26ad62a5c155af9199af9e69b889claireho//
245927f654740f2a26ad62a5c155af9199af9e69b889claireho//--------------------------------------------------------------------------------
246027f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexMatcher::getFindProgressCallback(URegexFindProgressCallback    *&callback,
246127f654740f2a26ad62a5c155af9199af9e69b889claireho                                                const void                    *&context,
246227f654740f2a26ad62a5c155af9199af9e69b889claireho                                                UErrorCode                    &status) {
246327f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
246427f654740f2a26ad62a5c155af9199af9e69b889claireho       return;
246527f654740f2a26ad62a5c155af9199af9e69b889claireho    }
246627f654740f2a26ad62a5c155af9199af9e69b889claireho    callback = fFindProgressCallbackFn;
246727f654740f2a26ad62a5c155af9199af9e69b889claireho    context  = fFindProgressCallbackContext;
246827f654740f2a26ad62a5c155af9199af9e69b889claireho}
246927f654740f2a26ad62a5c155af9199af9e69b889claireho
247027f654740f2a26ad62a5c155af9199af9e69b889claireho
247150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//================================================================================
247250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
247350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    Code following this point in this file is the internal
247450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    Match Engine Implementation.
247550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
247650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//================================================================================
247750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
247850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
247950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
248050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
248150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   resetStack
248250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//           Discard any previous contents of the state save stack, and initialize a
248350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//           new stack frame to all -1.  The -1s are needed for capture group limits,
248450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//           where they indicate that a group has not yet matched anything.
248550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
248650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoREStackFrame *RegexMatcher::resetStack() {
248750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Discard any previous contents of the state save stack, and initialize a
248850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  new stack frame with all -1 data.  The -1s are needed for capture group limits,
248950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  where they indicate that a group has not yet matched anything.
249050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fStack->removeAllElements();
249150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
249250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REStackFrame *iFrame = (REStackFrame *)fStack->reserveBlock(fPattern->fFrameSize, fDeferredStatus);
249350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t i;
249450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i=0; i<fPattern->fFrameSize-RESTACKFRAME_HDRCOUNT; i++) {
249550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        iFrame->fExtra[i] = -1;
249650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
249750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return iFrame;
249850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
249950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
250050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
250150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
250250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
250350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
250450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   isWordBoundary
250550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     in perl, "xab..cd..", \b is true at positions 0,3,5,7
250650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     For us,
250750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                       If the current char is a combining mark,
250850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                          \b is FALSE.
250950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                       Else Scan backwards to the first non-combining char.
251050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                            We are at a boundary if the this char and the original chars are
251150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                               opposite in membership in \w set
251250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
251350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//          parameters:   pos   - the current position in the input buffer
251450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
251550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//              TODO:  double-check edge cases at region boundaries.
251650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
251750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
251850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::isWordBoundary(int64_t pos) {
251950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool isBoundary = FALSE;
252050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool cIsWord    = FALSE;
252150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
252250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (pos >= fLookLimit) {
252350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fHitEnd = TRUE;
252450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
252550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Determine whether char c at current position is a member of the word set of chars.
252650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // If we're off the end of the string, behave as though we're not at a word char.
252750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTEXT_SETNATIVEINDEX(fInputText, pos);
252850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32  c = UTEXT_CURRENT32(fInputText);
252950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND) || u_charType(c) == U_FORMAT_CHAR) {
253050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Current char is a combining one.  Not a boundary.
253150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
253250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
253350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        cIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(c);
253450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
253550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
253650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Back up until we come to a non-combining char, determine whether
253750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  that char is a word char.
253850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool prevCIsWord = FALSE;
253950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (;;) {
254050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTEXT_GETNATIVEINDEX(fInputText) <= fLookStart) {
254150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
254250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
254350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 prevChar = UTEXT_PREVIOUS32(fInputText);
254450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (!(u_hasBinaryProperty(prevChar, UCHAR_GRAPHEME_EXTEND)
254550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              || u_charType(prevChar) == U_FORMAT_CHAR)) {
254650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevCIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(prevChar);
254750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
254850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
254950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
255050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    isBoundary = cIsWord ^ prevCIsWord;
255150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return isBoundary;
255250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
255350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
255450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::isChunkWordBoundary(int32_t pos) {
255550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool isBoundary = FALSE;
255650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool cIsWord    = FALSE;
255750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
255850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *inputBuf = fInputText->chunkContents;
255950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
256050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (pos >= fLookLimit) {
256150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fHitEnd = TRUE;
256250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
256350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Determine whether char c at current position is a member of the word set of chars.
256450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // If we're off the end of the string, behave as though we're not at a word char.
256550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c;
256650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U16_GET(inputBuf, fLookStart, pos, fLookLimit, c);
256750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND) || u_charType(c) == U_FORMAT_CHAR) {
256850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Current char is a combining one.  Not a boundary.
256950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
257050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
257150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        cIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(c);
257250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
257350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
257450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Back up until we come to a non-combining char, determine whether
257550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  that char is a word char.
257650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool prevCIsWord = FALSE;
257750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (;;) {
257850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (pos <= fLookStart) {
257950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
258050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
258150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 prevChar;
258250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U16_PREV(inputBuf, fLookStart, pos, prevChar);
258350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (!(u_hasBinaryProperty(prevChar, UCHAR_GRAPHEME_EXTEND)
258450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              || u_charType(prevChar) == U_FORMAT_CHAR)) {
258550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevCIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(prevChar);
258650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
258750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
258850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
258950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    isBoundary = cIsWord ^ prevCIsWord;
259050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return isBoundary;
259150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
259250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
259350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
259450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
259550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   isUWordBoundary
259650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
259750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//         Test for a word boundary using RBBI word break.
259850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
259950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//          parameters:   pos   - the current position in the input buffer
260050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
260150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
260250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::isUWordBoundary(int64_t pos) {
260350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool       returnVal = FALSE;
260450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==0
260550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
260650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // If we haven't yet created a break iterator for this matcher, do it now.
260750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fWordBreakItr == NULL) {
260850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fWordBreakItr =
260950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), fDeferredStatus);
261050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(fDeferredStatus)) {
261150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
261250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
261350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fWordBreakItr->setText(fInputText, fDeferredStatus);
261450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
261550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
261650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (pos >= fLookLimit) {
261750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fHitEnd = TRUE;
261850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        returnVal = TRUE;   // With Unicode word rules, only positions within the interior of "real"
261950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            //    words are not boundaries.  All non-word chars stand by themselves,
262050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            //    with word boundaries on both sides.
262150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
262250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (!UTEXT_USES_U16(fInputText)) {
262350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // !!!: Would like a better way to do this!
262450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UErrorCode status = U_ZERO_ERROR;
262550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            pos = utext_extract(fInputText, 0, pos, NULL, 0, &status);
262650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
262750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        returnVal = fWordBreakItr->isBoundary((int32_t)pos);
262850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
262950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
263050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return   returnVal;
263150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
263250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
263350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
263450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
263550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   IncrementTime     This function is called once each TIMER_INITIAL_VALUE state
263650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     saves. Increment the "time" counter, and call the
263750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     user callback function if there is one installed.
263850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
263950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     If the match operation needs to be aborted, either for a time-out
264050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     or because the user callback asked for it, just set an error status.
264150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     The engine will pick that up and stop in its outer loop.
264250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
264350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
264450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::IncrementTime(UErrorCode &status) {
264550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTickCounter = TIMER_INITIAL_VALUE;
264650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTime++;
264750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fCallbackFn != NULL) {
264850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if ((*fCallbackFn)(fCallbackContext, fTime) == FALSE) {
264950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_REGEX_STOPPED_BY_CALLER;
265050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return;
265150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
265250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
265350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fTimeLimit > 0 && fTime >= fTimeLimit) {
265450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_REGEX_TIME_OUT;
265550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
265650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
265750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
265850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
265950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
266027f654740f2a26ad62a5c155af9199af9e69b889claireho//   ReportFindProgress     This function is called once for each advance in the target
266127f654740f2a26ad62a5c155af9199af9e69b889claireho//                          string from the find() function, and calls the user progress callback
266227f654740f2a26ad62a5c155af9199af9e69b889claireho//                          function if there is one installed.
266327f654740f2a26ad62a5c155af9199af9e69b889claireho//
266427f654740f2a26ad62a5c155af9199af9e69b889claireho//                          NOTE:
266527f654740f2a26ad62a5c155af9199af9e69b889claireho//
266627f654740f2a26ad62a5c155af9199af9e69b889claireho//                          If the match operation needs to be aborted because the user
266727f654740f2a26ad62a5c155af9199af9e69b889claireho//                          callback asked for it, just set an error status.
266827f654740f2a26ad62a5c155af9199af9e69b889claireho//                          The engine will pick that up and stop in its outer loop.
266927f654740f2a26ad62a5c155af9199af9e69b889claireho//
267027f654740f2a26ad62a5c155af9199af9e69b889claireho//--------------------------------------------------------------------------------
267127f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool RegexMatcher::ReportFindProgress(int64_t matchIndex, UErrorCode &status) {
267227f654740f2a26ad62a5c155af9199af9e69b889claireho    if (fFindProgressCallbackFn != NULL) {
267327f654740f2a26ad62a5c155af9199af9e69b889claireho        if ((*fFindProgressCallbackFn)(fFindProgressCallbackContext, matchIndex) == FALSE) {
267427f654740f2a26ad62a5c155af9199af9e69b889claireho            status = U_ZERO_ERROR /*U_REGEX_STOPPED_BY_CALLER*/;
267527f654740f2a26ad62a5c155af9199af9e69b889claireho            return FALSE;
267627f654740f2a26ad62a5c155af9199af9e69b889claireho        }
267727f654740f2a26ad62a5c155af9199af9e69b889claireho    }
267827f654740f2a26ad62a5c155af9199af9e69b889claireho    return TRUE;
267927f654740f2a26ad62a5c155af9199af9e69b889claireho}
268027f654740f2a26ad62a5c155af9199af9e69b889claireho
268127f654740f2a26ad62a5c155af9199af9e69b889claireho//--------------------------------------------------------------------------------
268227f654740f2a26ad62a5c155af9199af9e69b889claireho//
268350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   StateSave
268450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       Make a new stack frame, initialized as a copy of the current stack frame.
268550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       Set the pattern index in the original stack frame from the operand value
268650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       in the opcode.  Execution of the engine continues with the state in
268750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       the newly created stack frame
268850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
268950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       Note that reserveBlock() may grow the stack, resulting in the
269050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       whole thing being relocated in memory.
269150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
269250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    Parameters:
269350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       fp           The top frame pointer when called.  At return, a new
269450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                    fame will be present
269550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       savePatIdx   An index into the compiled pattern.  Goes into the original
269650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                    (not new) frame.  If execution ever back-tracks out of the
269750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                    new frame, this will be where we continue from in the pattern.
269850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    Return
269950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                    The new frame pointer.
270050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
270150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
270250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status) {
270350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // push storage for a new frame.
270450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t *newFP = fStack->reserveBlock(fFrameSize, status);
270550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (newFP == NULL) {
270650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Failure on attempted stack expansion.
270750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   Stack function set some other error code, change it to a more
270850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   specific one for regular expressions.
270950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_REGEX_STACK_OVERFLOW;
271050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // We need to return a writable stack frame, so just return the
271150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //    previous frame.  The match operation will stop quickly
271250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //    because of the error status, after which the frame will never
271350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //    be looked at again.
271450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return fp;
271550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
271650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fp = (REStackFrame *)(newFP - fFrameSize);  // in case of realloc of stack.
271750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
271850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // New stack frame = copy of old top frame.
271950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t *source = (int64_t *)fp;
272050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t *dest   = newFP;
272150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (;;) {
272250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *dest++ = *source++;
272350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (source == newFP) {
272450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
272550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
272650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
272750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
272850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTickCounter--;
272950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fTickCounter <= 0) {
273050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       IncrementTime(status);    // Re-initializes fTickCounter
273150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
273250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fp->fPatIdx = savePatIdx;
273350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return (REStackFrame *)newFP;
273450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
273550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
273650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
273750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
273850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
273950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   MatchAt      This is the actual matching engine.
274050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
274150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  startIdx:    begin matching a this index.
274250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  toEnd:       if true, match must extend to end of the input region
274350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
274450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
274550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
274650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool       isMatch  = FALSE;      // True if the we have a match.
274750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
274850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t     backSearchIndex = U_INT64_MAX; // used after greedy single-character matches for searching backwards
274950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
275050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     op;                    // Operation from the compiled pattern, split into
275150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     opType;                //    the opcode
275250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     opValue;               //    and the operand value.
275350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
275450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    #ifdef REGEX_RUN_DEBUG
275550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fTraceDebug)
275650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
275750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("MatchAt(startIdx=%ld)\n", startIdx);
275850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("Original Pattern: ");
275950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c = utext_next32From(fPattern->fPattern, 0);
276050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while (c != U_SENTINEL) {
276150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (c<32 || c>256) {
276250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = '.';
276350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
276450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_DUMP_DEBUG_PRINTF(("%c", c));
276550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
276650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c = UTEXT_NEXT32(fPattern->fPattern);
276750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
276850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("\n");
276950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("Input String: ");
277050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        c = utext_next32From(fInputText, 0);
277150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while (c != U_SENTINEL) {
277250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (c<32 || c>256) {
277350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = '.';
277450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
277550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            printf("%c", c);
277650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
277750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c = UTEXT_NEXT32(fInputText);
277850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
277950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("\n");
278050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("\n");
278150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
278250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    #endif
278350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
278450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
278550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
278650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
278750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
278850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Cache frequently referenced items from the compiled pattern
278950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
279050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t             *pat           = fPattern->fCompiledPat->getBuffer();
279150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
279250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar         *litText       = fPattern->fLiteralText.getBuffer();
279350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UVector             *sets          = fPattern->fSets;
279450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
279550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fFrameSize = fPattern->fFrameSize;
279650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REStackFrame        *fp            = resetStack();
279750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
279850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fp->fPatIdx   = 0;
279950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fp->fInputIdx = startIdx;
280050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
280150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Zero out the pattern's static data
280250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t i;
280350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i = 0; i<fPattern->fDataSize; i++) {
280450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fData[i] = 0;
280550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
280650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
280750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
280850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Main loop for interpreting the compiled pattern.
280950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  One iteration of the loop per pattern operation performed.
281050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
281150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (;;) {
281250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if 0
281350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (_heapchk() != _HEAPOK) {
281450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fprintf(stderr, "Heap Trouble\n");
281550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
281650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
281750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
281850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        op      = (int32_t)pat[fp->fPatIdx];
281950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        opType  = URX_TYPE(op);
282050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        opValue = URX_VAL(op);
282150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        #ifdef REGEX_RUN_DEBUG
282250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fTraceDebug) {
282350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
282450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            printf("inputIdx=%d   inputChar=%x   sp=%3d   activeLimit=%d  ", fp->fInputIdx,
282550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit);
282650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fPattern->dumpOp(fp->fPatIdx);
282750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
282850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        #endif
282950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fp->fPatIdx++;
283050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
283150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        switch (opType) {
283250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
283350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
283450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_NOP:
283550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
283650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
283750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
283850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKTRACK:
283950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Force a backtrack.  In some circumstances, the pattern compiler
284050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   will notice that the pattern can't possibly match anything, and will
284150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   emit one of these at that point.
284250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
284350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
284450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
284550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
284650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_ONECHAR:
284750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx < fActiveLimit) {
284850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
284950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
285050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c == opValue) {
285150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
285250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
285350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
285450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
285550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
285650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
285750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
285850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            #ifdef REGEX_SMART_BACKTRACKING
285950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) {
286050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize);
286150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) {
286250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UBool success = FALSE;
286350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar32 c = UTEXT_PREVIOUS32(fInputText);
286450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    while (UTEXT_GETNATIVEINDEX(fInputText) >= backSearchIndex) {
286550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (c == opValue) {
286650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            success = TRUE;
286750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
286850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        } else if (c == U_SENTINEL) {
286950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
287050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
287150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c = UTEXT_PREVIOUS32(fInputText);
287250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
287350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (success) {
287450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fHitEnd = FALSE;
287550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp = (REStackFrame *)fStack->popFrame(fFrameSize);
287650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
287750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (fp->fInputIdx > backSearchIndex) {
287850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp = StateSave(fp, fp->fPatIdx, status);
287950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
288050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fPatIdx++; // Skip the LOOP_C, we just did that
288150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
288250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
288350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
288450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
288550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            #endif
288650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
288750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
288850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
288950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
289050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
289150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_STRING:
289250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
289350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Test input against a literal string.
289450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Strings require two slots in the compiled pattern, one for the
289550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   offset to the string text, and one for the length.
289650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t   stringStartIdx = opValue;
289750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t   stringLen;
289850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
289950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                op      = (int32_t)pat[fp->fPatIdx];     // Fetch the second operand
290050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fPatIdx++;
290150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                opType    = URX_TYPE(op);
290250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                stringLen = URX_VAL(op);
290350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opType == URX_STRING_LEN);
290450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(stringLen >= 2);
290550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
290650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                const UChar *patternChars = litText+stringStartIdx;
290750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                const UChar *patternEnd = patternChars+stringLen;
290850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
290950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
291050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c;
291150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UBool success = TRUE;
291250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
291350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                while (patternChars < patternEnd && success) {
291450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    c = UTEXT_NEXT32(fInputText);
291550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
291650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (c != U_SENTINEL && UTEXT_GETNATIVEINDEX(fInputText) <= fActiveLimit) {
291750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (U_IS_BMP(c)) {
291850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            success = (*patternChars == c);
291950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            patternChars += 1;
292050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        } else if (patternChars+1 < patternEnd) {
292150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            success = (*patternChars == U16_LEAD(c) && *(patternChars+1) == U16_TRAIL(c));
292250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            patternChars += 2;
292350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
292450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
292550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        success = FALSE;
292650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fHitEnd = TRUE;          //   TODO:  See ticket 6074
292750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
292850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
292950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
293050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (success) {
293150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
293250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
293350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    #ifdef REGEX_SMART_BACKTRACKING
293450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (fp->fInputIdx > backSearchIndex && fStack->size()) {
293550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize);
293650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) {
293750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            // Reset to last start point
293850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
293950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            patternChars = litText+stringStartIdx;
294050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
294150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            // Search backwards for a possible start
294250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            do {
294350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                c = UTEXT_PREVIOUS32(fInputText);
294450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (c == U_SENTINEL) {
294550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    break;
294650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                } else if ((U_IS_BMP(c) && *patternChars == c) ||
294750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    (*patternChars == U16_LEAD(c) && *(patternChars+1) == U16_TRAIL(c))) {
294850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    success = TRUE;
294950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    break;
295050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
295150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            } while (UTEXT_GETNATIVEINDEX(fInputText) >= backSearchIndex);
295250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
295350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            // And try again
295450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (success) {
295550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
295650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
295750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (fp->fInputIdx > backSearchIndex) {
295850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    fp = StateSave(fp, fp->fPatIdx, status);
295950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
296050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp->fPatIdx++; // Skip the LOOP_C, we just did that
296150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                break;
296250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
296350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
296450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
296550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    #endif
296650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
296750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
296850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
296950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
297050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
297150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
297250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_STATE_SAVE:
297350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp = StateSave(fp, opValue, status);
297450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
297550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
297650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
297750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_END:
297850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // The match loop will exit via this path on a successful match,
297950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   when we reach the end of the pattern.
298050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (toEnd && fp->fInputIdx != fActiveLimit) {
298150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // The pattern matched, but not to the end of input.  Try some more.
298250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
298350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
298450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
298550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isMatch = TRUE;
298650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto  breakFromLoop;
298750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
298850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Start and End Capture stack frame variables are laid out out like this:
298950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  fp->fExtra[opValue]  - The start of a completed capture group
299050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //             opValue+1 - The end   of a completed capture group
299150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //             opValue+2 - the start of a capture group whose end
299250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //                          has not yet been reached (and might not ever be).
299350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_START_CAPTURE:
299450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U_ASSERT(opValue >= 0 && opValue < fFrameSize-3);
299550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp->fExtra[opValue+2] = fp->fInputIdx;
299650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
299750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
299850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
299950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_END_CAPTURE:
300050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U_ASSERT(opValue >= 0 && opValue < fFrameSize-3);
300150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U_ASSERT(fp->fExtra[opValue+2] >= 0);            // Start pos for this group must be set.
300250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp->fExtra[opValue]   = fp->fExtra[opValue+2];   // Tentative start becomes real.
300350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp->fExtra[opValue+1] = fp->fInputIdx;           // End position
300450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U_ASSERT(fp->fExtra[opValue] <= fp->fExtra[opValue+1]);
300550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
300650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
300750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
300850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_DOLLAR:                   //  $, test for End of line
300950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                           //     or for position before new line at end of input
301050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
301150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fAnchorLimit) {
301250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We really are at the end of input.  Success.
301350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
301450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fRequireEnd = TRUE;
301550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
301650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
301750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
301850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
301950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
302050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If we are positioned just before a new-line that is located at the
302150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   end of input, succeed.
302250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
302350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) {
302450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if ((c>=0x0a && c<=0x0d) || c==0x85 || c==0x2028 || c==0x2029) {
302550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // If not in the middle of a CR/LF sequence
3026b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                      if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && ((void)UTEXT_PREVIOUS32(fInputText), UTEXT_PREVIOUS32(fInputText))==0x0d)) {
302750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            // At new-line at end of input. Success
302850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fHitEnd = TRUE;
302950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fRequireEnd = TRUE;
303050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
303150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
303250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
303350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
303450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
303550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar32 nextC = UTEXT_NEXT32(fInputText);
303650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (c == 0x0d && nextC == 0x0a && UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) {
303750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fHitEnd = TRUE;
303850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fRequireEnd = TRUE;
303950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;                         // At CR/LF at end of input.  Success
304050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
304150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
304250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
304350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
304450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
304550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
304650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
304750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
304850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         case URX_DOLLAR_D:                   //  $, test for End of Line, in UNIX_LINES mode.
304950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx >= fAnchorLimit) {
305050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Off the end of input.  Success.
305150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
305250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fRequireEnd = TRUE;
305350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
305450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
305550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
305650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
305750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Either at the last character of input, or off the end.
305850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c == 0x0a && UTEXT_GETNATIVEINDEX(fInputText) == fAnchorLimit) {
305950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
306050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fRequireEnd = TRUE;
306150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
306250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
306350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
306450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
306550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Not at end of input.  Back-track out.
306650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
306750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
306850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
306950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
307050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         case URX_DOLLAR_M:                //  $, test for End of line in multi-line mode
307150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             {
307250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 if (fp->fInputIdx >= fAnchorLimit) {
307350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     // We really are at the end of input.  Success.
307450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     fHitEnd = TRUE;
307550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     fRequireEnd = TRUE;
307650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     break;
307750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 }
307850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 // If we are positioned just before a new-line, succeed.
307950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 // It makes no difference where the new-line is within the input.
308050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
308150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 UChar32 c = UTEXT_CURRENT32(fInputText);
308250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 if ((c>=0x0a && c<=0x0d) || c==0x85 ||c==0x2028 || c==0x2029) {
308350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     // At a line end, except for the odd chance of  being in the middle of a CR/LF sequence
308450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     //  In multi-line mode, hitting a new-line just before the end of input does not
308550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     //   set the hitEnd or requireEnd flags
308650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && UTEXT_PREVIOUS32(fInputText)==0x0d)) {
308750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
308850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     }
308950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 }
309050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 // not at a new line.  Fail.
309150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 fp = (REStackFrame *)fStack->popFrame(fFrameSize);
309250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             }
309350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             break;
309450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
309550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
309650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         case URX_DOLLAR_MD:                //  $, test for End of line in multi-line and UNIX_LINES mode
309750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             {
309850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 if (fp->fInputIdx >= fAnchorLimit) {
309950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     // We really are at the end of input.  Success.
310050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     fHitEnd = TRUE;
310150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     fRequireEnd = TRUE;  // Java set requireEnd in this case, even though
310250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     break;               //   adding a new-line would not lose the match.
310350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 }
310450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 // If we are not positioned just before a new-line, the test fails; backtrack out.
310550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 // It makes no difference where the new-line is within the input.
310650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
310750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 if (UTEXT_CURRENT32(fInputText) != 0x0a) {
310850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     fp = (REStackFrame *)fStack->popFrame(fFrameSize);
310950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 }
311050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             }
311150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             break;
311250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
311350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
311450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       case URX_CARET:                    //  ^, test for start of line
311550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx != fAnchorStart) {
311650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
311750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
311850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
311950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
312050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
312150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       case URX_CARET_M:                   //  ^, test for start of line in mulit-line mode
312250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           {
312350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               if (fp->fInputIdx == fAnchorStart) {
312450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   // We are at the start input.  Success.
312550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   break;
312650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               }
312750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               // Check whether character just before the current pos is a new-line
312850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               //   unless we are at the end of input
312950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
313050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               UChar32  c = UTEXT_PREVIOUS32(fInputText);
313150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               if ((fp->fInputIdx < fAnchorLimit) &&
313250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
313350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   //  It's a new-line.  ^ is true.  Success.
313450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   //  TODO:  what should be done with positions between a CR and LF?
313550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   break;
313650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               }
313750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               // Not at the start of a line.  Fail.
313850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               fp = (REStackFrame *)fStack->popFrame(fFrameSize);
313950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           }
314050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           break;
314150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
314250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
314350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       case URX_CARET_M_UNIX:       //  ^, test for start of line in mulit-line + Unix-line mode
314450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           {
314550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               U_ASSERT(fp->fInputIdx >= fAnchorStart);
314650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               if (fp->fInputIdx <= fAnchorStart) {
314750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   // We are at the start input.  Success.
314850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   break;
314950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               }
315050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               // Check whether character just before the current pos is a new-line
315150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               U_ASSERT(fp->fInputIdx <= fAnchorLimit);
315250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
315350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               UChar32  c = UTEXT_PREVIOUS32(fInputText);
315450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               if (c != 0x0a) {
315550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   // Not at the start of a line.  Back-track out.
315650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   fp = (REStackFrame *)fStack->popFrame(fFrameSize);
315750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               }
315850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           }
315950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           break;
316050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
316150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKSLASH_B:          // Test for word boundaries
316250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
316350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UBool success = isWordBoundary(fp->fInputIdx);
316450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                success ^= (opValue != 0);     // flip sense for \B
316550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (!success) {
316650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
316750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
316850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
316950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
317050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
317150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
317250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKSLASH_BU:          // Test for word boundaries, Unicode-style
317350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
317450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UBool success = isUWordBoundary(fp->fInputIdx);
317550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                success ^= (opValue != 0);     // flip sense for \B
317650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (!success) {
317750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
317850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
317950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
318050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
318150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
318250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
318350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKSLASH_D:            // Test for decimal digit
318450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
318550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
318650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
318750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
318850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
318950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
319050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
319150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
319250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
319350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
319450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int8_t ctype = u_charType(c);     // TODO:  make a unicode set for this.  Will be faster.
319550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UBool success = (ctype == U_DECIMAL_DIGIT_NUMBER);
319650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                success ^= (opValue != 0);        // flip sense for \D
319750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (success) {
319850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
319950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
320050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
320150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
320250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
320350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
320450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
320550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
320650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKSLASH_G:          // Test for position at end of previous match
320727f654740f2a26ad62a5c155af9199af9e69b889claireho            if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->fInputIdx==fActiveStart))) {
320850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
320950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
321050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
321150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
321250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
321350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKSLASH_X:
321450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  Match a Grapheme, as defined by Unicode TR 29.
321550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  Differs slightly from Perl, which consumes combining marks independently
321650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //    of context.
321750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
321850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
321950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fail if at end of input
322050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
322150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
322250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
322350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
322450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
322550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
322650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
322750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
322850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Examine (and consume) the current char.
322950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   Dispatch into a little state machine, based on the char.
323050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32  c;
323150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
323250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
323350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UnicodeSet **sets = fPattern->fStaticSets;
323450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_NORMAL]->contains(c))  goto GC_Extend;
323550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_CONTROL]->contains(c)) goto GC_Control;
323650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_L]->contains(c))       goto GC_L;
323750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_LV]->contains(c))      goto GC_V;
323850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_LVT]->contains(c))     goto GC_T;
323950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_V]->contains(c))       goto GC_V;
324050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_T]->contains(c))       goto GC_T;
324150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto GC_Extend;
324250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
324350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
324450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
324550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_L:
324650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
324750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
324850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
324950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_L]->contains(c))       goto GC_L;
325050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_LV]->contains(c))      goto GC_V;
325150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_LVT]->contains(c))     goto GC_T;
325250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_V]->contains(c))       goto GC_V;
3253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                (void)UTEXT_PREVIOUS32(fInputText);
325450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
325550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto GC_Extend;
325650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
325750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_V:
325850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
325950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
326050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
326150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_V]->contains(c))       goto GC_V;
326250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_T]->contains(c))       goto GC_T;
3263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                (void)UTEXT_PREVIOUS32(fInputText);
326450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
326550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto GC_Extend;
326650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
326750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_T:
326850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
326950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
327050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
327150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_T]->contains(c))       goto GC_T;
3272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                (void)UTEXT_PREVIOUS32(fInputText);
327350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
327450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto GC_Extend;
327550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
327650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_Extend:
327750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Combining characters are consumed here
327850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                for (;;) {
327950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (fp->fInputIdx >= fActiveLimit) {
328050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
328150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
328250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    c = UTEXT_CURRENT32(fInputText);
328350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (sets[URX_GC_EXTEND]->contains(c) == FALSE) {
328450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
328550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
3286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    (void)UTEXT_NEXT32(fInputText);
328750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
328850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
328950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto GC_Done;
329050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
329150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_Control:
329250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Most control chars stand alone (don't combine with combining chars),
329350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   except for that CR/LF sequence is a single grapheme cluster.
329450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c == 0x0d && fp->fInputIdx < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) {
329550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    c = UTEXT_NEXT32(fInputText);
329650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
329750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
329850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
329950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_Done:
330050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
330150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
330250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
330350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
330450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
330550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
330650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
330750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
330850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
330950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKSLASH_Z:          // Test for end of Input
331050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx < fAnchorLimit) {
331150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
331250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
331350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
331450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fRequireEnd = TRUE;
331550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
331650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
331750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
331850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
331950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
332050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_STATIC_SETREF:
332150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
332250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Test input character against one of the predefined sets
332350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    (Word Characters, for example)
332450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // The high bit of the op value is a flag for the match polarity.
332550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    0:   success if input char is in set.
332650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    1:   success if input char is not in set.
332750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
332850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
332950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
333050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
333150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
333250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
333350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UBool success = ((opValue & URX_NEG_SET) == URX_NEG_SET);
333450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                opValue &= ~URX_NEG_SET;
333550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
333650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
333750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
333850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
333950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c < 256) {
334050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
334150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s8->contains(c)) {
334250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        success = !success;
334350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
334450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
334550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UnicodeSet *s = fPattern->fStaticSets[opValue];
334650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s->contains(c)) {
334750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        success = !success;
334850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
334950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
335050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (success) {
335150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
335250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
335350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // the character wasn't in the set.
335450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    #ifdef REGEX_SMART_BACKTRACKING
335550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) {
335650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize);
335750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) {
335850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            // Try to find it, backwards
335950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            UTEXT_PREVIOUS32(fInputText); // skip the first character we tried
336050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            success = ((opValue & URX_NEG_SET) == URX_NEG_SET); // reset
336150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            do {
336250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                c = UTEXT_PREVIOUS32(fInputText);
336350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (c == U_SENTINEL) {
336450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    break;
336550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                } else if (c < 256) {
336650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
336750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    if (s8->contains(c)) {
336850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        success = !success;
336950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    }
337050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                } else {
337150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    const UnicodeSet *s = fPattern->fStaticSets[opValue];
337250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    if (s->contains(c)) {
337350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        success = !success;
337450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    }
337550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
337650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            } while (UTEXT_GETNATIVEINDEX(fInputText) >= backSearchIndex && !success);
337750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
337850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (success && c != U_SENTINEL) {
337950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
338050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
338150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (fp->fInputIdx > backSearchIndex) {
338250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    fp = StateSave(fp, fp->fPatIdx, status);
338350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
338450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp->fPatIdx++; // Skip the LOOP_C, we just did that
338550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                break;
338650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
338750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
338850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
338950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    #endif
339050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
339150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
339250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
339350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
339450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
339550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
339650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_STAT_SETREF_N:
339750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
339850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Test input character for NOT being a member of  one of
339950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    the predefined sets (Word Characters, for example)
340050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
340150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
340250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
340350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
340450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
340550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
340650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
340750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
340850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
340950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
341050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
341150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c < 256) {
341250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
341350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s8->contains(c) == FALSE) {
341450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
341550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
341650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
341750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
341850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UnicodeSet *s = fPattern->fStaticSets[opValue];
341950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s->contains(c) == FALSE) {
342050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
342150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
342250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
342350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
342450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // the character wasn't in the set.
342550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #ifdef REGEX_SMART_BACKTRACKING
342650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) {
342750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize);
342850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) {
342950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // Try to find it, backwards
343050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UTEXT_PREVIOUS32(fInputText); // skip the first character we tried
343150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UBool success = FALSE;
343250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        do {
343350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            c = UTEXT_PREVIOUS32(fInputText);
343450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (c == U_SENTINEL) {
343550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                break;
343650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            } else if (c < 256) {
343750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
343850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (s8->contains(c) == FALSE) {
343950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    success = TRUE;
344050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    break;
344150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
344250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            } else {
344350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                const UnicodeSet *s = fPattern->fStaticSets[opValue];
344450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (s->contains(c) == FALSE) {
344550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    success = TRUE;
344650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    break;
344750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
344850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
344950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        } while (UTEXT_GETNATIVEINDEX(fInputText) >= backSearchIndex);
345050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
345150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (success) {
345250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
345350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
345450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (fp->fInputIdx > backSearchIndex) {
345550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp = StateSave(fp, fp->fPatIdx, status);
345650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
345750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp->fPatIdx++; // Skip the LOOP_C, we just did that
345850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
345950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
346050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
346150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
346250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #endif
346350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
346450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
346550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
346650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
346750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
346850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_SETREF:
346950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx >= fActiveLimit) {
347050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
347150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
347250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
347350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
347450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
347550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
347650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // There is input left.  Pick up one char and test it for set membership.
347750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
347850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue > 0 && opValue < sets->size());
347950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c<256) {
348050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    Regex8BitSet *s8 = &fPattern->fSets8[opValue];
348150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s8->contains(c)) {
348250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
348350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
348450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
348550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
348650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue);
348750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s->contains(c)) {
348850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // The character is in the set.  A Match.
348950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
349050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
349150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
349250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
349350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
349450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // the character wasn't in the set.
349550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #ifdef REGEX_SMART_BACKTRACKING
349650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) {
349750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize);
349850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) {
349950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // Try to find it, backwards
350050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UTEXT_PREVIOUS32(fInputText); // skip the first character we tried
350150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UBool success = FALSE;
350250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        do {
350350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            c = UTEXT_PREVIOUS32(fInputText);
350450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (c == U_SENTINEL) {
350550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                break;
350650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            } else if (c < 256) {
350750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                Regex8BitSet *s8 = &fPattern->fSets8[opValue];
350850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (s8->contains(c)) {
350950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    success = TRUE;
351050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    break;
351150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
351250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            } else {
351350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue);
351450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (s->contains(c)) {
351550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    success = TRUE;
351650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    break;
351750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
351850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
351950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        } while (UTEXT_GETNATIVEINDEX(fInputText) >= backSearchIndex);
352050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
352150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (success) {
352250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
352350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
352450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (fp->fInputIdx > backSearchIndex) {
352550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp = StateSave(fp, fp->fPatIdx, status);
352650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
352750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp->fPatIdx++; // Skip the LOOP_C, we just did that
352850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
352950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
353050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
353150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
353250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #endif
353350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
353450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
353550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
353650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
353750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
353850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_DOTANY:
353950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
354050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // . matches anything, but stops at end-of-line.
354150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
354250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // At end of input.  Match failed.  Backtrack out.
354350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
354450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
354550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
354650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
354750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
354850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
354950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
355050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // There is input left.  Advance over one char, unless we've hit end-of-line
355150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
355250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
355350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
355450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // End of line in normal mode.   . does not match.
355550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp = (REStackFrame *)fStack->popFrame(fFrameSize);
355650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
355750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
355850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
355950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
356050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
356150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
356250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
356350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_DOTANY_ALL:
356450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
356550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // ., in dot-matches-all (including new lines) mode
356650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
356750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // At end of input.  Match failed.  Backtrack out.
356850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
356950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
357050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
357150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
357250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
357350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
357450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
357550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // There is input left.  Advance over one char, except if we are
357650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   at a cr/lf, advance over both of them.
357750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c;
357850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
357950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
358050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c==0x0d && fp->fInputIdx < fActiveLimit) {
358150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // In the case of a CR/LF, we need to advance over both.
358250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar32 nextc = UTEXT_CURRENT32(fInputText);
358350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (nextc == 0x0a) {
3584b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        (void)UTEXT_NEXT32(fInputText);
358550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
358650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
358750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
358850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
358950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
359050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
359150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
359250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_DOTANY_UNIX:
359350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
359450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // '.' operator, matches all, but stops at end-of-line.
359550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   UNIX_LINES mode, so 0x0a is the only recognized line ending.
359650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
359750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // At end of input.  Match failed.  Backtrack out.
359850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
359950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
360050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
360150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
360250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
360350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
360450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
360550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // There is input left.  Advance over one char, unless we've hit end-of-line
360650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
360750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c == 0x0a) {
360850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // End of line in normal mode.   '.' does not match the \n
360950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
361050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
361150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
361250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
361350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
361450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
361550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
361650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
361750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_JMP:
361850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp->fPatIdx = opValue;
361950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
362050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
362150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_FAIL:
362250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isMatch = FALSE;
362350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto breakFromLoop;
362450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
362550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_JMP_SAV:
362650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U_ASSERT(opValue < fPattern->fCompiledPat->size());
362750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp = StateSave(fp, fp->fPatIdx, status);       // State save to loc following current
362850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp->fPatIdx = opValue;                         // Then JMP.
362950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
363050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
363150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_JMP_SAV_X:
363250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // This opcode is used with (x)+, when x can match a zero length string.
363350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Same as JMP_SAV, except conditional on the match having made forward progress.
363450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Destination of the JMP must be a URX_STO_INP_LOC, from which we get the
363550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   data address of the input position at the start of the loop.
363650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
363750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue > 0 && opValue < fPattern->fCompiledPat->size());
363850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t  stoOp = (int32_t)pat[opValue-1];
363950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(URX_TYPE(stoOp) == URX_STO_INP_LOC);
364050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t  frameLoc = URX_VAL(stoOp);
364150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(frameLoc >= 0 && frameLoc < fFrameSize);
364250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t prevInputIdx = fp->fExtra[frameLoc];
364350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(prevInputIdx <= fp->fInputIdx);
364450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (prevInputIdx < fp->fInputIdx) {
364550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // The match did make progress.  Repeat the loop.
364650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = StateSave(fp, fp->fPatIdx, status);  // State save to loc following current
364750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx = opValue;
364850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fExtra[frameLoc] = fp->fInputIdx;
364950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
365050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If the input position did not advance, we do nothing here,
365150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   execution will fall out of the loop.
365250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
365350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
365450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
365550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_CTR_INIT:
365650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
365750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
365850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fExtra[opValue] = 0;       //  Set the loop counter variable to zero
365950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
366050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Pick up the three extra operands that CTR_INIT has, and
366150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    skip the pattern location counter past
366250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
366350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fPatIdx += 3;
366450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t loopLoc  = URX_VAL(pat[instrOperandLoc]);
366550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount = (int32_t)pat[instrOperandLoc+1];
366650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount = (int32_t)pat[instrOperandLoc+2];
366750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(minCount>=0);
366850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(maxCount>=minCount || maxCount==-1);
366950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(loopLoc>fp->fPatIdx);
367050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
367150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (minCount == 0) {
367250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = StateSave(fp, loopLoc+1, status);
367350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
367450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (maxCount == 0) {
367550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
367650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
367750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
367850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
367950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
368050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_CTR_LOOP:
368150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
368250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2);
368350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t initOp = (int32_t)pat[opValue];
368450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT);
368550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
368650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount  = (int32_t)pat[opValue+2];
368750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount  = (int32_t)pat[opValue+3];
368850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Increment the counter.  Note: we DIDN'T worry about counter
368950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   overflow, since the data comes from UnicodeStrings, which
369050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   stores its length in an int32_t. Do we have to think about
369150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   this now that we're using UText? Probably not, since the length
369250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   in UChar32s is still an int32_t.
369350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (*pCounter)++;
369450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(*pCounter > 0);
369550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if ((uint64_t)*pCounter >= (uint32_t)maxCount) {
369650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(*pCounter == maxCount || maxCount == -1);
369750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
369850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
369950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (*pCounter >= minCount) {
370050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = StateSave(fp, fp->fPatIdx, status);
370150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
370250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fPatIdx = opValue + 4;    // Loop back.
370350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
370450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
370550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
370650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_CTR_INIT_NG:
370750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
370850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Initialize a non-greedy loop
370950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
371050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fExtra[opValue] = 0;       //  Set the loop counter variable to zero
371150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
371250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Pick up the three extra operands that CTR_INIT has, and
371350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    skip the pattern location counter past
371450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
371550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fPatIdx += 3;
371650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t loopLoc  = URX_VAL(pat[instrOperandLoc]);
371750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount = (int32_t)pat[instrOperandLoc+1];
371850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount = (int32_t)pat[instrOperandLoc+2];
371950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(minCount>=0);
372050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(maxCount>=minCount || maxCount==-1);
372150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(loopLoc>fp->fPatIdx);
372250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
372350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (minCount == 0) {
372450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (maxCount != 0) {
372550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp = StateSave(fp, fp->fPatIdx, status);
372650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
372750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx = loopLoc+1;   // Continue with stuff after repeated block
372850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
372950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
373050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
373150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
373250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_CTR_LOOP_NG:
373350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
373450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Non-greedy {min, max} loops
373550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2);
373650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t initOp = (int32_t)pat[opValue];
373750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT_NG);
373850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
373950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount  = (int32_t)pat[opValue+2];
374050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount  = (int32_t)pat[opValue+3];
374150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Increment the counter.  Note: we DIDN'T worry about counter
374250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   overflow, since the data comes from UnicodeStrings, which
374350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   stores its length in an int32_t. Do we have to think about
374450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   this now that we're using UText? Probably not, since the length
374550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   in UChar32s is still an int32_t.
374650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (*pCounter)++;
374750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(*pCounter > 0);
374850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
374950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if ((uint64_t)*pCounter >= (uint32_t)maxCount) {
375050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // The loop has matched the maximum permitted number of times.
375150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   Break out of here with no action.  Matching will
375250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   continue with the following pattern.
375350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(*pCounter == maxCount || maxCount == -1);
375450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
375550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
375650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
375750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (*pCounter < minCount) {
375850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We haven't met the minimum number of matches yet.
375950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   Loop back for another one.
376050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx = opValue + 4;    // Loop back.
376150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
376250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We do have the minimum number of matches.
376350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   Fall into the following pattern, but first do
376450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   a state save to the top of the loop, so that a failure
376550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   in the following pattern will try another iteration of the loop.
376650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = StateSave(fp, opValue + 4, status);
376750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
376850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
376950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
377050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
377150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_STO_SP:
377250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize);
377350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fData[opValue] = fStack->size();
377450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
377550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
377650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LD_SP:
377750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
377850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize);
377950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t newStackSize = (int32_t)fData[opValue];
378050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(newStackSize <= fStack->size());
378150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
378250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (newFP == (int64_t *)fp) {
378350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
378450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
378550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t i;
378650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                for (i=0; i<fFrameSize; i++) {
378750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    newFP[i] = ((int64_t *)fp)[i];
378850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
378950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)newFP;
379050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fStack->setSize(newStackSize);
379150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
379250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
379350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
379450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKREF:
379550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKREF_I:
379650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
379750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue < fFrameSize);
379850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t groupStartIdx = fp->fExtra[opValue];
379950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t groupEndIdx   = fp->fExtra[opValue+1];
380050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(groupStartIdx <= groupEndIdx);
380150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (groupStartIdx < 0) {
380250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // This capture group has not participated in the match thus far,
380350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no match.
380450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
380550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
380650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (groupEndIdx == groupStartIdx) {
380750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   The capture group match was of an empty string.
380850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   Verified by testing:  Perl matches succeed in this case, so
380950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   we do too.
381050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
381150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
381250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
381350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fAltInputText, groupStartIdx);
381450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
381550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
381650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UBool haveMatch = (opType == URX_BACKREF ?
381750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (0 == utext_compareNativeLimit(fAltInputText, groupEndIdx, fInputText, -1)) :
381850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (0 == utext_caseCompareNativeLimit(fAltInputText, groupEndIdx, fInputText, -1, U_FOLD_CASE_DEFAULT, &status)));
381950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
382050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
382150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx > fActiveLimit) {
382250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
382350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no match.
382450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else if (!haveMatch) {
382550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (fp->fInputIdx == fActiveLimit) {
382650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fHitEnd = TRUE;
382750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
382850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no match.
382950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
383050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
383150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
383250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
383350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_STO_INP_LOC:
383450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
383550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue >= 0 && opValue < fFrameSize);
383650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fExtra[opValue] = fp->fInputIdx;
383750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
383850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
383950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
384050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_JMPX:
384150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
384250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
384350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fPatIdx += 1;
384450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t dataLoc  = URX_VAL(pat[instrOperandLoc]);
384550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(dataLoc >= 0 && dataLoc < fFrameSize);
384650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t savedInputIdx = fp->fExtra[dataLoc];
384750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(savedInputIdx <= fp->fInputIdx);
384850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (savedInputIdx < fp->fInputIdx) {
384950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx = opValue;                               // JMP
385050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
385150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no progress in loop.
385250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
385350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
385450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
385550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
385650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LA_START:
385750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
385850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Entering a lookahead block.
385950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Save Stack Ptr, Input Pos.
386050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
386150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fData[opValue]   = fStack->size();
386250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fData[opValue+1] = fp->fInputIdx;
386350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fActiveStart     = fLookStart;          // Set the match region change for
386450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fActiveLimit     = fLookLimit;          //   transparent bounds.
386550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
386650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
386750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
386850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LA_END:
386950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
387050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Leaving a look-ahead block.
387150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //  restore Stack Ptr, Input Pos to positions they had on entry to block.
387250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
387350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t stackSize = fStack->size();
387450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t newStackSize =(int32_t)fData[opValue];
387550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(stackSize >= newStackSize);
387650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (stackSize > newStackSize) {
387750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Copy the current top frame back to the new (cut back) top frame.
387850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   This makes the capture groups from within the look-ahead
387950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   expression available.
388050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
388150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t i;
388250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    for (i=0; i<fFrameSize; i++) {
388350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        newFP[i] = ((int64_t *)fp)[i];
388450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
388550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)newFP;
388650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fStack->setSize(newStackSize);
388750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
388850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = fData[opValue+1];
388950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
389050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Restore the active region bounds in the input string; they may have
389150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    been changed because of transparent bounds on a Region.
389250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fActiveStart = fRegionStart;
389350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fActiveLimit = fRegionLimit;
389450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
389550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
389650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
389750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_ONECHAR_I:
389850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx < fActiveLimit) {
389950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
390050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
390150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
390250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) {
390350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
390450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
390550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
390650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
390750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
390850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
390950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
391050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            #ifdef REGEX_SMART_BACKTRACKING
391150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) {
391250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize);
391350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) {
391450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UBool success = FALSE;
391550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar32 c = UTEXT_PREVIOUS32(fInputText);
391650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    while (UTEXT_GETNATIVEINDEX(fInputText) >= backSearchIndex) {
391750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) {
391850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            success = TRUE;
391950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
392050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        } else if (c == U_SENTINEL) {
392150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
392250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
392350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        c = UTEXT_PREVIOUS32(fInputText);
392450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
392550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (success) {
392650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fHitEnd = FALSE;
392750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp = (REStackFrame *)fStack->popFrame(fFrameSize);
392850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
392950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (fp->fInputIdx > backSearchIndex) {
393050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp = StateSave(fp, fp->fPatIdx, status);
393150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
393250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fPatIdx++; // Skip the LOOP_C, we just did that
393350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
393450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
393550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
393650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
393750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            #endif
393850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
393950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
394050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
394150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
394250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_STRING_I:
394350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
394450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Test input against a literal string.
394550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Strings require two slots in the compiled pattern, one for the
394650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   offset to the string text, and one for the length.
394727f654740f2a26ad62a5c155af9199af9e69b889claireho                const UCaseProps *csp = ucase_getSingleton();
394827f654740f2a26ad62a5c155af9199af9e69b889claireho                {
394950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t stringStartIdx, stringLen;
395050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    stringStartIdx = opValue;
395150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
395250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    op      = (int32_t)pat[fp->fPatIdx];
395350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx++;
395450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    opType  = URX_TYPE(op);
395550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    opValue = URX_VAL(op);
395650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(opType == URX_STRING_LEN);
395750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    stringLen = opValue;
395850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
395950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UChar *patternChars = litText+stringStartIdx;
396050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UChar *patternEnd = patternChars+stringLen;
396150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
396250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UChar *foldChars = NULL;
396350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t foldOffset, foldLength;
396450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar32 c;
396550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
396650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    foldOffset = foldLength = 0;
396750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UBool success = TRUE;
396850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
396950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
397050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    while (patternChars < patternEnd && success) {
397150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if(foldOffset < foldLength) {
397250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            U16_NEXT_UNSAFE(foldChars, foldOffset, c);
397350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        } else {
397450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            c = UTEXT_NEXT32(fInputText);
397550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (c != U_SENTINEL) {
397650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT);
397750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if(foldLength >= 0) {
397850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    if(foldLength <= UCASE_MAX_STRING_LENGTH) {   // !!!: Does not correctly handle chars that fold to 0-length strings
397950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        foldOffset = 0;
398050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        U16_NEXT_UNSAFE(foldChars, foldOffset, c);
398150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    } else {
398250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        c = foldLength;
398350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        foldLength = foldOffset; // to avoid reading chars from the folding buffer
398450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    }
398550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
398650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
398750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
398850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
398950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
399050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
399150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        success = FALSE;
399250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (c != U_SENTINEL && (fp->fInputIdx <= fActiveLimit)) {
399350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (U_IS_BMP(c)) {
399450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                success = (*patternChars == c);
399550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                patternChars += 1;
399650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            } else if (patternChars+1 < patternEnd) {
399750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                success = (*patternChars == U16_LEAD(c) && *(patternChars+1) == U16_TRAIL(c));
399850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                patternChars += 2;
399950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
400050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        } else {
400150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fHitEnd = TRUE;          //   TODO:  See ticket 6074
400250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
400350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
400450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
400550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (!success) {
400650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        #ifdef REGEX_SMART_BACKTRACKING
400750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (fp->fInputIdx > backSearchIndex && fStack->size()) {
400850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize);
400950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) {
401050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                // Reset to last start point
401150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
401250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                patternChars = litText+stringStartIdx;
401350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
401450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                // Search backwards for a possible start
401550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                do {
401650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    c = UTEXT_PREVIOUS32(fInputText);
401750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    if (c == U_SENTINEL) {
401850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        break;
401950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    } else {
402050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT);
402150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        if(foldLength >= 0) {
402250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                            if(foldLength <= UCASE_MAX_STRING_LENGTH) {   // !!!: Does not correctly handle chars that fold to 0-length strings
402350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                foldOffset = 0;
402450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                U16_NEXT_UNSAFE(foldChars, foldOffset, c);
402550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                            } else {
402650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                c = foldLength;
402750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                                foldLength = foldOffset; // to avoid reading chars from the folding buffer
402850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                            }
402950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        }
403050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
403150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        if ((U_IS_BMP(c) && *patternChars == c) ||
403250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                               (*patternChars == U16_LEAD(c) && *(patternChars+1) == U16_TRAIL(c))) {
403350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                            success = TRUE;
403450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                            break;
403550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        }
403650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    }
403750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                } while (UTEXT_GETNATIVEINDEX(fInputText) >= backSearchIndex);
403850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
403950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                // And try again
404050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (success) {
404150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
404250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
404350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    if (fp->fInputIdx > backSearchIndex) {
404450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        fp = StateSave(fp, fp->fPatIdx, status);
404550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    }
404650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    fp->fPatIdx++; // Skip the LOOP_C, we just did that
404750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    break;
404850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
404950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
405050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
405150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        #endif
405250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp = (REStackFrame *)fStack->popFrame(fFrameSize);
405350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
405450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
405550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
405650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
405750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
405850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LB_START:
405950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
406050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Entering a look-behind block.
406150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Save Stack Ptr, Input Pos.
406250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   TODO:  implement transparent bounds.  Ticket #6067
406350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
406450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fData[opValue]   = fStack->size();
406550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fData[opValue+1] = fp->fInputIdx;
406650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Init the variable containing the start index for attempted matches.
406750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fData[opValue+2] = -1;
406850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Save input string length, then reset to pin any matches to end at
406950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   the current position.
407050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fData[opValue+3] = fActiveLimit;
407150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fActiveLimit     = fp->fInputIdx;
407250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
407350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
407450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
407550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
407650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LB_CONT:
407750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
407850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Positive Look-Behind, at top of loop checking for matches of LB expression
407950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    at all possible input starting positions.
408050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
408150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fetch the min and max possible match lengths.  They are the operands
408250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   of this op in the pattern.
408350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minML = (int32_t)pat[fp->fPatIdx++];
408450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxML = (int32_t)pat[fp->fPatIdx++];
408550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(minML <= maxML);
408650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(minML >= 0);
408750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
408850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fetch (from data) the last input index where a match was attempted.
408950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
409050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t  *lbStartIdx = &fData[opValue+2];
409150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (*lbStartIdx < 0) {
409250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // First time through loop.
409350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *lbStartIdx = fp->fInputIdx - minML;
409450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
409550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // 2nd through nth time through the loop.
409650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Back up start position for match by one.
409750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (*lbStartIdx == 0) {
409850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (*lbStartIdx)--;
409950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
410050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UTEXT_SETNATIVEINDEX(fInputText, *lbStartIdx);
4101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        (void)UTEXT_PREVIOUS32(fInputText);
410250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText);
410350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
410450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
410550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
410650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
410750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We have tried all potential match starting points without
410850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  getting a match.  Backtrack out, and out of the
410950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   Look Behind altogether.
411050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
411150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int64_t restoreInputLen = fData[opValue+3];
411250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(restoreInputLen >= fActiveLimit);
411350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(restoreInputLen <= fInputLength);
411450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fActiveLimit = restoreInputLen;
411550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
411650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
411750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
411850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
411950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //      (successful match will fall off the end of the loop.)
412050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = StateSave(fp, fp->fPatIdx-3, status);
412150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = *lbStartIdx;
412250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
412350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
412450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
412550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LB_END:
412650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // End of a look-behind block, after a successful match.
412750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
412850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
412950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx != fActiveLimit) {
413050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  The look-behind expression matched, but the match did not
413150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //    extend all the way to the point that we are looking behind from.
413250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  FAIL out of here, which will take us back to the LB_CONT, which
413350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //     will retry the match starting at another position or fail
413450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //     the look-behind altogether, whichever is appropriate.
413550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
413650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
413750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
413850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
413950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Look-behind match is good.  Restore the orignal input string length,
414050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   which had been truncated to pin the end of the lookbehind match to the
414150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   position being looked-behind.
414250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t originalInputLen = fData[opValue+3];
414350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(originalInputLen >= fActiveLimit);
414450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(originalInputLen <= fInputLength);
414550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fActiveLimit = originalInputLen;
414650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
414750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
414850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
414950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
415050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LBN_CONT:
415150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
415250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Negative Look-Behind, at top of loop checking for matches of LB expression
415350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    at all possible input starting positions.
415450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
415550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fetch the extra parameters of this op.
415650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minML       = (int32_t)pat[fp->fPatIdx++];
415750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxML       = (int32_t)pat[fp->fPatIdx++];
415850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t continueLoc = (int32_t)pat[fp->fPatIdx++];
415950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        continueLoc = URX_VAL(continueLoc);
416050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(minML <= maxML);
416150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(minML >= 0);
416250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(continueLoc > fp->fPatIdx);
416350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
416450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fetch (from data) the last input index where a match was attempted.
416550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
416650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t  *lbStartIdx = &fData[opValue+2];
416750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (*lbStartIdx < 0) {
416850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // First time through loop.
416950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *lbStartIdx = fp->fInputIdx - minML;
417050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
417150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // 2nd through nth time through the loop.
417250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Back up start position for match by one.
417350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (*lbStartIdx == 0) {
417450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (*lbStartIdx)--;
417550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
417650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UTEXT_SETNATIVEINDEX(fInputText, *lbStartIdx);
4177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        (void)UTEXT_PREVIOUS32(fInputText);
417850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText);
417950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
418050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
418150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
418250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
418350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We have tried all potential match starting points without
418450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  getting a match, which means that the negative lookbehind as
418550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  a whole has succeeded.  Jump forward to the continue location
418650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int64_t restoreInputLen = fData[opValue+3];
418750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(restoreInputLen >= fActiveLimit);
418850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(restoreInputLen <= fInputLength);
418950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fActiveLimit = restoreInputLen;
419050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx = continueLoc;
419150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
419250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
419350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
419450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
419550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //      (successful match will cause a FAIL out of the loop altogether.)
419650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = StateSave(fp, fp->fPatIdx-4, status);
419750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = *lbStartIdx;
419850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
419950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
420050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
420150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LBN_END:
420250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // End of a negative look-behind block, after a successful match.
420350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
420450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
420550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx != fActiveLimit) {
420650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  The look-behind expression matched, but the match did not
420750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //    extend all the way to the point that we are looking behind from.
420850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  FAIL out of here, which will take us back to the LB_CONT, which
420950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //     will retry the match starting at another position or succeed
421050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //     the look-behind altogether, whichever is appropriate.
421150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
421250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
421350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
421450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
421550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Look-behind expression matched, which means look-behind test as
421650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   a whole Fails
421750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
421850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   Restore the orignal input string length, which had been truncated
421950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   inorder to pin the end of the lookbehind match
422050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   to the position being looked-behind.
422150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t originalInputLen = fData[opValue+3];
422250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(originalInputLen >= fActiveLimit);
422350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(originalInputLen <= fInputLength);
422450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fActiveLimit = originalInputLen;
422550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
422650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Restore original stack position, discarding any state saved
422750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   by the successful pattern match.
422850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
422950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t newStackSize = (int32_t)fData[opValue];
423050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(fStack->size() > newStackSize);
423150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fStack->setSize(newStackSize);
423250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
423350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //  FAIL, which will take control back to someplace
423450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //  prior to entering the look-behind test.
423550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
423650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
423750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
423850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
423950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
424050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LOOP_SR_I:
424150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Loop Initialization for the optimized implementation of
424250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //     [some character set]*
424350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   This op scans through all matching input.
424450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   The following LOOP_C op emulates stack unwinding if the following pattern fails.
424550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
424650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue > 0 && opValue < sets->size());
424750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                Regex8BitSet *s8 = &fPattern->fSets8[opValue];
424850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UnicodeSet   *s  = (UnicodeSet *)sets->elementAt(opValue);
424950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
425050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Loop through input, until either the input is exhausted or
425150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   we reach a character that is not a member of the set.
425250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t ix = fp->fInputIdx;
425350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, ix);
425450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                for (;;) {
425550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (ix >= fActiveLimit) {
425650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fHitEnd = TRUE;
425750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
425850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
425950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar32 c = UTEXT_NEXT32(fInputText);
426050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (c<256) {
426150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (s8->contains(c) == FALSE) {
426250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
426350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
426450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
426550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (s->contains(c) == FALSE) {
426650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
426750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
426850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
426950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ix = UTEXT_GETNATIVEINDEX(fInputText);
427050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
427150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
427250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If there were no matching characters, skip over the loop altogether.
427350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   The loop doesn't run at all, a * op always succeeds.
427450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (ix == fp->fInputIdx) {
427550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx++;   // skip the URX_LOOP_C op.
427650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
427750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
427850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
427950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Peek ahead in the compiled pattern, to the URX_LOOP_C that
428050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   must follow.  It's operand is the stack location
428150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   that holds the starting input index for the match of this [set]*
428250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t loopcOp = (int32_t)pat[fp->fPatIdx];
428350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
428450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t stackLoc = URX_VAL(loopcOp);
428550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize);
428650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fExtra[stackLoc] = fp->fInputIdx;
428750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #ifdef REGEX_SMART_BACKTRACKING
428850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                backSearchIndex = fp->fInputIdx;
428950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #endif
429050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = ix;
429150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
429250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Save State to the URX_LOOP_C op that follows this one,
429350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   so that match failures in the following code will return to there.
429450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
429550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = StateSave(fp, fp->fPatIdx, status);
429650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fPatIdx++;
429750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
429850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
429950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
430050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
430150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LOOP_DOT_I:
430250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Loop Initialization for the optimized implementation of .*
430350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   This op scans through all remaining input.
430450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   The following LOOP_C op emulates stack unwinding if the following pattern fails.
430550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
430650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Loop through input until the input is exhausted (we reach an end-of-line)
430750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // In DOTALL mode, we can just go straight to the end of the input.
430850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t ix;
430950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if ((opValue & 1) == 1) {
431050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Dot-matches-All mode.  Jump straight to the end of the string.
431150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ix = fActiveLimit;
431250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
431350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
431450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // NOT DOT ALL mode.  Line endings do not match '.'
431550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Scan forward until a line ending or end of input.
431650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ix = fp->fInputIdx;
431750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UTEXT_SETNATIVEINDEX(fInputText, ix);
431850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    for (;;) {
431950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (ix >= fActiveLimit) {
432050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fHitEnd = TRUE;
432150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
432250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
432350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UChar32 c = UTEXT_NEXT32(fInputText);
432427f654740f2a26ad62a5c155af9199af9e69b889claireho                        if ((c & 0x7f) <= 0x29) {          // Fast filter of non-new-line-s
432527f654740f2a26ad62a5c155af9199af9e69b889claireho                            if ((c == 0x0a) ||             //  0x0a is newline in both modes.
432627f654740f2a26ad62a5c155af9199af9e69b889claireho                               (((opValue & 2) == 0) &&    // IF not UNIX_LINES mode
432727f654740f2a26ad62a5c155af9199af9e69b889claireho                                    (c<=0x0d && c>=0x0a)) || c==0x85 ||c==0x2028 || c==0x2029) {
432850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                //  char is a line ending.  Exit the scanning loop.
432950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                break;
433050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
433150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
433250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        ix = UTEXT_GETNATIVEINDEX(fInputText);
433350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
433450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
433550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
433650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If there were no matching characters, skip over the loop altogether.
433750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   The loop doesn't run at all, a * op always succeeds.
433850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (ix == fp->fInputIdx) {
433950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx++;   // skip the URX_LOOP_C op.
434050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
434150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
434250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
434350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Peek ahead in the compiled pattern, to the URX_LOOP_C that
434450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   must follow.  It's operand is the stack location
434550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   that holds the starting input index for the match of this .*
434650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t loopcOp = (int32_t)pat[fp->fPatIdx];
434750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
434850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t stackLoc = URX_VAL(loopcOp);
434950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize);
435050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fExtra[stackLoc] = fp->fInputIdx;
435150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #ifdef REGEX_SMART_BACKTRACKING
435250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                backSearchIndex = fp->fInputIdx;
435350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #endif
435450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = ix;
435550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
435650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Save State to the URX_LOOP_C op that follows this one,
435750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   so that match failures in the following code will return to there.
435850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
435950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = StateSave(fp, fp->fPatIdx, status);
436050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fPatIdx++;
436150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
436250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
436350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
436450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
436550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LOOP_C:
436650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
436750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue<fFrameSize);
436850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                backSearchIndex = fp->fExtra[opValue];
436950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(backSearchIndex <= fp->fInputIdx);
437050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (backSearchIndex == fp->fInputIdx) {
437150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We've backed up the input idx to the point that the loop started.
437250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // The loop is done.  Leave here without saving state.
437350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  Subsequent failures won't come back here.
437450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
437550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
437650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Set up for the next iteration of the loop, with input index
437750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   backed up by one from the last time through,
437850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   and a state save to this instruction in case the following code fails again.
437950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   (We're going backwards because this loop emulates stack unwinding, not
438050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    the initial scan forward.)
438150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(fp->fInputIdx > 0);
438250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
438350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 prevC = UTEXT_PREVIOUS32(fInputText);
438450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
438550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
438650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 twoPrevC = UTEXT_PREVIOUS32(fInputText);
438750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (prevC == 0x0a &&
438850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx > backSearchIndex &&
438950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    twoPrevC == 0x0d) {
439050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t prevOp = (int32_t)pat[fp->fPatIdx-2];
439150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (URX_TYPE(prevOp) == URX_LOOP_DOT_I) {
439250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // .*, stepping back over CRLF pair.
439350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
439450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
439550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
4396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
439850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = StateSave(fp, fp->fPatIdx-1, status);
439950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
440050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
4401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
440350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
440450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        default:
440550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Trouble.  The compiled pattern contains an entry with an
440650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //           unrecognized type tag.
440750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U_ASSERT(FALSE);
4408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
440950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
441050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(status)) {
441150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isMatch = FALSE;
4412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
441650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehobreakFromLoop:
441750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fMatch = isMatch;
441850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (isMatch) {
441950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fLastMatchEnd = fMatchEnd;
442050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fMatchStart   = startIdx;
442150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fMatchEnd     = fp->fInputIdx;
442250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fTraceDebug) {
442350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_RUN_DEBUG_PRINTF(("Match.  start=%d   end=%d\n\n", fMatchStart, fMatchEnd));
4424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
442650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    else
442750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
442850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fTraceDebug) {
442950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_RUN_DEBUG_PRINTF(("No match\n\n"));
4430c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
443350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fFrame = fp;                // The active stack frame when the engine stopped.
443450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                //   Contains the capture group results that we need to
443550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                //    access later.
443650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return;
4437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
4441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
444250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   MatchChunkAt   This is the actual matching engine. Like MatchAt, but with the
444350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  assumption that the entire string is available in the UText's
444450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  chunk buffer. For now, that means we can use int32_t indexes,
444550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  except for anything that needs to be saved (like group starts
444650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  and ends).
4447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
4448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//                  startIdx:    begin matching a this index.
4449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//                  toEnd:       if true, match must extend to end of the input region
4450c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
4451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
445250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
4453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool       isMatch  = FALSE;      // True if the we have a match.
445450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
445550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     backSearchIndex = INT32_MAX; // used after greedy single-character matches for searching backwards
4456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t     op;                    // Operation from the compiled pattern, split into
4458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t     opType;                //    the opcode
4459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t     opValue;               //    and the operand value.
446050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
446150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifdef REGEX_RUN_DEBUG
4462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fTraceDebug)
4463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
446450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("MatchAt(startIdx=%ld)\n", startIdx);
4465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("Original Pattern: ");
446650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c = utext_next32From(fPattern->fPattern, 0);
446750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while (c != U_SENTINEL) {
446850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (c<32 || c>256) {
446950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = '.';
447050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
447150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            REGEX_DUMP_DEBUG_PRINTF(("%c", c));
447250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
447350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c = UTEXT_NEXT32(fPattern->fPattern);
4474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("\n");
4476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("Input String: ");
447750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        c = utext_next32From(fInputText, 0);
447850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while (c != U_SENTINEL) {
4479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (c<32 || c>256) {
4480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                c = '.';
4481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printf("%c", c);
448350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
448450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c = UTEXT_NEXT32(fInputText);
4485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("\n");
4487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("\n");
4488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
448950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
449050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
4492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
4493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
449450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Cache frequently referenced items from the compiled pattern
4496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
449750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t             *pat           = fPattern->fCompiledPat->getBuffer();
449850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar         *litText       = fPattern->fLiteralText.getBuffer();
4500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UVector             *sets          = fPattern->fSets;
450150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
450250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar         *inputBuf      = fInputText->chunkContents;
450350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4504c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFrameSize = fPattern->fFrameSize;
4505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REStackFrame        *fp            = resetStack();
450650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fp->fPatIdx   = 0;
4508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fp->fInputIdx = startIdx;
450950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Zero out the pattern's static data
4511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
4512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i = 0; i<fPattern->fDataSize; i++) {
4513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fData[i] = 0;
4514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
451550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Main loop for interpreting the compiled pattern.
4518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  One iteration of the loop per pattern operation performed.
4519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
4521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0
4522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (_heapchk() != _HEAPOK) {
4523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fprintf(stderr, "Heap Trouble\n");
4524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
452650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
452750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        op      = (int32_t)pat[fp->fPatIdx];
4528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        opType  = URX_TYPE(op);
4529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        opValue = URX_VAL(op);
453050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifdef REGEX_RUN_DEBUG
4531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fTraceDebug) {
453250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
453350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            printf("inputIdx=%d   inputChar=%x   sp=%3d   activeLimit=%d  ", fp->fInputIdx,
453450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit);
4535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fPattern->dumpOp(fp->fPatIdx);
4536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
453750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
4538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fp->fPatIdx++;
453950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        switch (opType) {
454150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
454250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_NOP:
4544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
454550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
454650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKTRACK:
4548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Force a backtrack.  In some circumstances, the pattern compiler
4549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   will notice that the pattern can't possibly match anything, and will
4550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   emit one of these at that point.
4551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
455350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
455450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_ONECHAR:
4556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx < fActiveLimit) {
455750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c;
4558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
4559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (c == opValue) {
4560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
4561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
4563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fHitEnd = TRUE;
4564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
456650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            #ifdef REGEX_SMART_BACKTRACKING
456750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) {
456850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize);
456950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) {
457050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int64_t reverseIndex = fp->fInputIdx;
457150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar32 c;
457250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    do {
457350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        U16_PREV(inputBuf, backSearchIndex, reverseIndex, c);
457450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (c == opValue) {
457550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
457650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
457750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } while (reverseIndex > backSearchIndex);
457850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (c == opValue) {
457950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fHitEnd = FALSE;
458050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp = (REStackFrame *)fStack->popFrame(fFrameSize);
458150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = reverseIndex;
458250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (fp->fInputIdx > backSearchIndex) {
458350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp = StateSave(fp, fp->fPatIdx, status);
458450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
458550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fPatIdx++; // Skip the LOOP_C, we just did that
458650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
458750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
458850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
458950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
459050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            #endif
4591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
459250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
459350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
459450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
459550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_STRING:
4597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
4598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Test input against a literal string.
4599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Strings require two slots in the compiled pattern, one for the
4600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   offset to the string text, and one for the length.
4601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t   stringStartIdx = opValue;
4602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t   stringLen;
460350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
460450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                op      = (int32_t)pat[fp->fPatIdx];     // Fetch the second operand
4605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fPatIdx++;
4606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                opType    = URX_TYPE(op);
4607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                stringLen = URX_VAL(op);
4608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opType == URX_STRING_LEN);
4609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(stringLen >= 2);
461050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4611c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx + stringLen > fActiveLimit) {
4612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // No match.  String is longer than the remaining input text.
4613c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;          //   TODO:  See ticket 6074
4614c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
4616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
461750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UChar * pInp = inputBuf + fp->fInputIdx;
4619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UChar * pPat = litText+stringStartIdx;
4620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UChar * pEnd = pInp + stringLen;
462150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UBool success = FALSE;
4622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                for(;;) {
4623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (*pInp == *pPat) {
4624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pInp++;
4625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pPat++;
4626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (pInp == pEnd) {
4627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Successful Match.
462850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            success = TRUE;
4629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            break;
4630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
4631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
4632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Match failed.
4633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
4634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
463650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
463750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (success) {
463850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx += stringLen;
463950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
464050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    #ifdef REGEX_SMART_BACKTRACKING
464150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (fp->fInputIdx > backSearchIndex && fStack->size()) {
464250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize);
464350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) {
464450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            // Reset to last start point
464550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            int64_t reverseIndex = fp->fInputIdx;
464650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            UChar32 c;
464750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            pPat = litText+stringStartIdx;
464850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
464950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            // Search backwards for a possible start
465050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            do {
465150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                U16_PREV(inputBuf, backSearchIndex, reverseIndex, c);
465250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if ((U_IS_BMP(c) && *pPat == c) ||
465350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    (*pPat == U16_LEAD(c) && *(pPat+1) == U16_TRAIL(c))) {
465450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    success = TRUE;
465550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    break;
465650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
465750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            } while (reverseIndex > backSearchIndex);
465850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
465950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            // And try again
466050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (success) {
466150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
466250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp->fInputIdx = reverseIndex;
466350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (fp->fInputIdx > backSearchIndex) {
466450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    fp = StateSave(fp, fp->fPatIdx, status);
466550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
466650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp->fPatIdx++; // Skip the LOOP_C, we just did that
466750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                break;
466850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
466950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
467050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
467150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    #endif
467250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
467350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
4674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
467650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
467750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_STATE_SAVE:
4679c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fp = StateSave(fp, opValue, status);
4680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
468150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
468250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_END:
4684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // The match loop will exit via this path on a successful match,
4685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   when we reach the end of the pattern.
4686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (toEnd && fp->fInputIdx != fActiveLimit) {
4687c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // The pattern matched, but not to the end of input.  Try some more.
4688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
4690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
4691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            isMatch = TRUE;
4692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto  breakFromLoop;
469350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
469450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Start and End Capture stack frame variables are laid out out like this:
4695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //  fp->fExtra[opValue]  - The start of a completed capture group
4696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //             opValue+1 - The end   of a completed capture group
4697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //             opValue+2 - the start of a capture group whose end
4698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                          has not yet been reached (and might not ever be).
4699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_START_CAPTURE:
4700c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            U_ASSERT(opValue >= 0 && opValue < fFrameSize-3);
4701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fp->fExtra[opValue+2] = fp->fInputIdx;
4702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
470350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
470450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_END_CAPTURE:
4706c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            U_ASSERT(opValue >= 0 && opValue < fFrameSize-3);
4707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(fp->fExtra[opValue+2] >= 0);            // Start pos for this group must be set.
4708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fp->fExtra[opValue]   = fp->fExtra[opValue+2];   // Tentative start becomes real.
4709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fp->fExtra[opValue+1] = fp->fInputIdx;           // End position
4710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(fp->fExtra[opValue] <= fp->fExtra[opValue+1]);
4711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
471250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
471350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_DOLLAR:                   //  $, test for End of line
471550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //     or for position before new line at end of input
4716c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx < fAnchorLimit-2) {
4717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // We are no where near the end of input.  Fail.
4718c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //   This is the common case.  Keep it first.
4719c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
4721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4722c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx >= fAnchorLimit) {
4723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // We really are at the end of input.  Success.
4724c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fHitEnd = TRUE;
4725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fRequireEnd = TRUE;
4726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
4727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
472850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // If we are positioned just before a new-line that is located at the
4730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   end of input, succeed.
4731c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx == fAnchorLimit-1) {
473250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c;
473350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_GET(inputBuf, fAnchorStart, fp->fInputIdx, fAnchorLimit, c);
473450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if ((c>=0x0a && c<=0x0d) || c==0x85 || c==0x2028 || c==0x2029) {
4736c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp->fInputIdx-1]==0x0d)) {
4737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // At new-line at end of input. Success
4738c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fHitEnd = TRUE;
4739c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fRequireEnd = TRUE;
4740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
4741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
474350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if (fp->fInputIdx == fAnchorLimit-2 &&
474450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                inputBuf[fp->fInputIdx]==0x0d && inputBuf[fp->fInputIdx+1]==0x0a) {
4745c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
4746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fRequireEnd = TRUE;
4747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;                         // At CR/LF at end of input.  Success
4748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
474950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
475150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
475350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
475450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
475550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_DOLLAR_D:                   //  $, test for End of Line, in UNIX_LINES mode.
4756c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx >= fAnchorLimit-1) {
4757c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Either at the last character of input, or off the end.
4758c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx == fAnchorLimit-1) {
4759c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // At last char of input.  Success if it's a new line.
476050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (inputBuf[fp->fInputIdx] == 0x0a) {
4761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fHitEnd = TRUE;
4762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fRequireEnd = TRUE;
4763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
4764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4765c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
4766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Off the end of input.  Success.
4767c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
4768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fRequireEnd = TRUE;
4769c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
4770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
477250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Not at end of input.  Back-track out.
4774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
477650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
477750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
477850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_DOLLAR_M:                //  $, test for End of line in multi-line mode
477950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
478050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fAnchorLimit) {
478150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We really are at the end of input.  Success.
478250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
478350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fRequireEnd = TRUE;
478450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
478550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
478650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If we are positioned just before a new-line, succeed.
478750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // It makes no difference where the new-line is within the input.
478850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = inputBuf[fp->fInputIdx];
478950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if ((c>=0x0a && c<=0x0d) || c==0x85 ||c==0x2028 || c==0x2029) {
479050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // At a line end, except for the odd chance of  being in the middle of a CR/LF sequence
479150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  In multi-line mode, hitting a new-line just before the end of input does not
479250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   set the hitEnd or requireEnd flags
479350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp->fInputIdx-1]==0x0d)) {
4794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
479550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
479650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
479750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // not at a new line.  Fail.
479850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
479950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
480050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
480150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
480250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
480350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_DOLLAR_MD:                //  $, test for End of line in multi-line and UNIX_LINES mode
480450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
480550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fAnchorLimit) {
480650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We really are at the end of input.  Success.
480750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
480850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fRequireEnd = TRUE;  // Java set requireEnd in this case, even though
480950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;               //   adding a new-line would not lose the match.
481050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
481150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If we are not positioned just before a new-line, the test fails; backtrack out.
481250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // It makes no difference where the new-line is within the input.
481350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (inputBuf[fp->fInputIdx] != 0x0a) {
481450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
481550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
481650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
481750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
481850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
481950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
482050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_CARET:                    //  ^, test for start of line
4821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx != fAnchorStart) {
4822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4823c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
4824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
482550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
482650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
482750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_CARET_M:                   //  ^, test for start of line in mulit-line mode
482850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
482950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx == fAnchorStart) {
483050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We are at the start input.  Success.
483150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
483250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
483350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Check whether character just before the current pos is a new-line
483450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   unless we are at the end of input
483550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar  c = inputBuf[fp->fInputIdx - 1];
483650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if ((fp->fInputIdx < fAnchorLimit) &&
483750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
483850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  It's a new-line.  ^ is true.  Success.
483950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  TODO:  what should be done with positions between a CR and LF?
484050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
484150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
484250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Not at the start of a line.  Fail.
484350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
484450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
484550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
484650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
484750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
484850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_CARET_M_UNIX:       //  ^, test for start of line in mulit-line + Unix-line mode
484950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
485050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(fp->fInputIdx >= fAnchorStart);
485150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx <= fAnchorStart) {
485250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We are at the start input.  Success.
485350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
485450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
485550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Check whether character just before the current pos is a new-line
485650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(fp->fInputIdx <= fAnchorLimit);
485750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar  c = inputBuf[fp->fInputIdx - 1];
485850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c != 0x0a) {
485950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Not at the start of a line.  Back-track out.
486050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
486150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
486250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
486350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
486450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKSLASH_B:          // Test for word boundaries
4866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
486750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UBool success = isChunkWordBoundary((int32_t)fp->fInputIdx);
4868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                success ^= (opValue != 0);     // flip sense for \B
4869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (!success) {
4870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
487450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
487550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKSLASH_BU:          // Test for word boundaries, Unicode-style
4877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
4878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UBool success = isUWordBoundary(fp->fInputIdx);
4879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                success ^= (opValue != 0);     // flip sense for \B
4880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (!success) {
4881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
488550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
488650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKSLASH_D:            // Test for decimal digit
4888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
4889c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx >= fActiveLimit) {
4890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
4891c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
4893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
489450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
489550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c;
489650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
4897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                int8_t ctype = u_charType(c);     // TODO:  make a unicode set for this.  Will be faster.
4898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UBool success = (ctype == U_DECIMAL_DIGIT_NUMBER);
4899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                success ^= (opValue != 0);        // flip sense for \D
490050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (!success) {
4901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
490550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
490650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKSLASH_G:          // Test for position at end of previous match
490827f654740f2a26ad62a5c155af9199af9e69b889claireho            if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->fInputIdx==fActiveStart))) {
4909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
491250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
491350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKSLASH_X:
491550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Match a Grapheme, as defined by Unicode TR 29.
491650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Differs slightly from Perl, which consumes combining marks independently
491750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //    of context.
491850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        {
4919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
492050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Fail if at end of input
492150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx >= fActiveLimit) {
492250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
492350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
492450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
492550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
4926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
492750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Examine (and consume) the current char.
492850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   Dispatch into a little state machine, based on the char.
492950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar32  c;
493050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
493150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UnicodeSet **sets = fPattern->fStaticSets;
493250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_NORMAL]->contains(c))  goto GC_Extend;
493350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_CONTROL]->contains(c)) goto GC_Control;
493450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_L]->contains(c))       goto GC_L;
493550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_LV]->contains(c))      goto GC_V;
493650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_LVT]->contains(c))     goto GC_T;
493750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_V]->contains(c))       goto GC_V;
493850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_T]->contains(c))       goto GC_T;
493950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto GC_Extend;
4940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_L:
494450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
494550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
494650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_L]->contains(c))       goto GC_L;
494750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_LV]->contains(c))      goto GC_V;
494850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_LVT]->contains(c))     goto GC_T;
494950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_V]->contains(c))       goto GC_V;
495050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_PREV(inputBuf, 0, fp->fInputIdx, c);
495150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto GC_Extend;
4952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_V:
495450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
495550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
495650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_V]->contains(c))       goto GC_V;
495750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_T]->contains(c))       goto GC_T;
495850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_PREV(inputBuf, 0, fp->fInputIdx, c);
495950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto GC_Extend;
4960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_T:
496250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
496350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
496450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_T]->contains(c))       goto GC_T;
496550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_PREV(inputBuf, 0, fp->fInputIdx, c);
496650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto GC_Extend;
4967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_Extend:
496950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Combining characters are consumed here
497050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            for (;;) {
497150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
497250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
4973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
497450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
497550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_EXTEND]->contains(c) == FALSE) {
497650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U16_BACK_1(inputBuf, 0, fp->fInputIdx);
497750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
497850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
497950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
498050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto GC_Done;
4981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_Control:
498350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Most control chars stand alone (don't combine with combining chars),
498450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   except for that CR/LF sequence is a single grapheme cluster.
498550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (c == 0x0d && fp->fInputIdx < fActiveLimit && inputBuf[fp->fInputIdx] == 0x0a) {
498650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx++;
498750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
4988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_Done:
499050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx >= fActiveLimit) {
499150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
4992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
499350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
499450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
499550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
499650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
499750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
4998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case URX_BACKSLASH_Z:          // Test for end of Input
5000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx < fAnchorLimit) {
5001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
5003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fHitEnd = TRUE;
5004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fRequireEnd = TRUE;
5005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
500750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
500850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
500950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_STATIC_SETREF:
5011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Test input character against one of the predefined sets
5013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    (Word Characters, for example)
5014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // The high bit of the op value is a flag for the match polarity.
5015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    0:   success if input char is in set.
5016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    1:   success if input char is not in set.
5017c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx >= fActiveLimit) {
5018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
5019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
502250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UBool success = ((opValue & URX_NEG_SET) == URX_NEG_SET);
5024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                opValue &= ~URX_NEG_SET;
5025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
502650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
502750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c;
5028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
5029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (c < 256) {
5030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
5031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (s8->contains(c)) {
5032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        success = !success;
5033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
5035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    const UnicodeSet *s = fPattern->fStaticSets[opValue];
5036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (s->contains(c)) {
5037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        success = !success;
5038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (!success) {
504150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    #ifdef REGEX_SMART_BACKTRACKING
504250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) {
504350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize);
504450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) {
504550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            // Try to find it, backwards
504650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            int64_t reverseIndex = fp->fInputIdx;
504750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            U16_BACK_1(inputBuf, backSearchIndex, reverseIndex); // skip the first character we tried
504850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            success = ((opValue & URX_NEG_SET) == URX_NEG_SET); // reset
504950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            do {
505050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                U16_PREV(inputBuf, backSearchIndex, reverseIndex, c);
505150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (c < 256) {
505250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
505350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    if (s8->contains(c)) {
505450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        success = !success;
505550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    }
505650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                } else {
505750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    const UnicodeSet *s = fPattern->fStaticSets[opValue];
505850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    if (s->contains(c)) {
505950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        success = !success;
506050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    }
506150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
506250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            } while (reverseIndex > backSearchIndex && !success);
506350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
506450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (success) {
506550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
506650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp->fInputIdx = reverseIndex;
506750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (fp->fInputIdx > backSearchIndex) {
506850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    fp = StateSave(fp, fp->fPatIdx, status);
506950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
507050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp->fPatIdx++; // Skip the LOOP_C, we just did that
507150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                break;
507250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
507350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
507450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
507550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    #endif
5076c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
508150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_STAT_SETREF_N:
5083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Test input character for NOT being a member of  one of
5085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    the predefined sets (Word Characters, for example)
5086c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx >= fActiveLimit) {
5087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
5088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
509150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
509350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UChar32  c;
5095c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
5096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (c < 256) {
5097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
5098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (s8->contains(c) == FALSE) {
5099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
5100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
5102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    const UnicodeSet *s = fPattern->fStaticSets[opValue];
5103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (s->contains(c) == FALSE) {
5104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
5105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
510850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #ifdef REGEX_SMART_BACKTRACKING
510950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) {
511050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize);
511150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) {
511250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // Try to find it, backwards
511350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        int64_t reverseIndex = fp->fInputIdx;
511450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        U16_BACK_1(inputBuf, backSearchIndex, reverseIndex); // skip the first character we tried
511550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UBool success = FALSE;
511650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        do {
511750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            U16_PREV(inputBuf, backSearchIndex, reverseIndex, c);
511850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (c < 256) {
511950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
512050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (s8->contains(c) == FALSE) {
512150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    success = TRUE;
512250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    break;
512350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
512450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            } else {
512550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                const UnicodeSet *s = fPattern->fStaticSets[opValue];
512650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (s->contains(c) == FALSE) {
512750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    success = TRUE;
512850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    break;
512950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
513050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
513150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        } while (reverseIndex > backSearchIndex);
513250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
513350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (success) {
513450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
513550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp->fInputIdx = reverseIndex;
513650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (fp->fInputIdx > backSearchIndex) {
513750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp = StateSave(fp, fp->fPatIdx, status);
513850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
513950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp->fPatIdx++; // Skip the LOOP_C, we just did that
514050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
514150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
514250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
514350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
514450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #endif
5145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
514950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_SETREF:
515150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
515250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
515350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
515450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
5156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
515750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
515850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue > 0 && opValue < sets->size());
515950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
516050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // There is input left.  Pick up one char and test it for set membership.
516150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32  c;
516250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
516350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c<256) {
516450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    Regex8BitSet *s8 = &fPattern->fSets8[opValue];
516550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s8->contains(c)) {
516650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // The character is in the set.  A Match.
516750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
516850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
516950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
517050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue);
517150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s->contains(c)) {
517250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // The character is in the set.  A Match.
517350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
517450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
517550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
517650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
517750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // the character wasn't in the set.
517850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #ifdef REGEX_SMART_BACKTRACKING
517950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) {
518050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize);
518150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) {
518250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // Try to find it, backwards
518350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        int64_t reverseIndex = fp->fInputIdx;
518450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        U16_BACK_1(inputBuf, backSearchIndex, reverseIndex); // skip the first character we tried
518550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UBool success = FALSE;
518650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        do {
518750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            U16_PREV(inputBuf, backSearchIndex, reverseIndex, c);
518850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (c < 256) {
518950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                Regex8BitSet *s8 = &fPattern->fSets8[opValue];
519050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (s8->contains(c)) {
519150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    success = TRUE;
519250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    break;
519350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
519450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            } else {
519550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue);
519650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (s->contains(c)) {
519750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    success = TRUE;
519850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    break;
519950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
520050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
520150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        } while (reverseIndex > backSearchIndex);
520250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
520350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (success) {
520450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
520550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp->fInputIdx = reverseIndex;
520650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (fp->fInputIdx > reverseIndex) {
520750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                fp = StateSave(fp, fp->fPatIdx, status);
520850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
520950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp->fPatIdx++; // Skip the LOOP_C, we just did that
521050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
521150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
521250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
5213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
521450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #endif
521550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5216c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
521850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
521950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_DOTANY:
5221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // . matches anything, but stops at end-of-line.
5223c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx >= fActiveLimit) {
5224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // At end of input.  Match failed.  Backtrack out.
5225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
5226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
522950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // There is input left.  Advance over one char, unless we've hit end-of-line
523150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32  c;
5232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
5233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (((c & 0x7f) <= 0x29) &&     // First quickly bypass as many chars as possible
5234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ((c<=0x0d && c>=0x0a) || c==0x85 ||c==0x2028 || c==0x2029)) {
5235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // End of line in normal mode.   . does not match.
523650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
524150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
524250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_DOTANY_ALL:
5244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
524550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // . in dot-matches-all (including new lines) mode
5246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx >= fActiveLimit) {
5247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // At end of input.  Match failed.  Backtrack out.
5248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
5249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
525250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // There is input left.  Advance over one char, except if we are
5254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   at a cr/lf, advance over both of them.
5255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UChar32 c;
5256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
5257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (c==0x0d && fp->fInputIdx < fActiveLimit) {
5258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // In the case of a CR/LF, we need to advance over both.
525950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (inputBuf[fp->fInputIdx] == 0x0a) {
526050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        U16_FWD_1(inputBuf, fp->fInputIdx, fActiveLimit);
5261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
526550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
526650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case URX_DOTANY_UNIX:
5268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5269c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // '.' operator, matches all, but stops at end-of-line.
5270c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //   UNIX_LINES mode, so 0x0a is the only recognized line ending.
5271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx >= fActiveLimit) {
5272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // At end of input.  Match failed.  Backtrack out.
5273c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
5274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
527750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // There is input left.  Advance over one char, unless we've hit end-of-line
527950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c;
5280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
5281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (c == 0x0a) {
5282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // End of line in normal mode.   '.' does not match the \n
5283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
528750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
528850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_JMP:
5290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fp->fPatIdx = opValue;
5291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
529250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_FAIL:
5294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            isMatch = FALSE;
5295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto breakFromLoop;
529650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_JMP_SAV:
5298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(opValue < fPattern->fCompiledPat->size());
5299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fp = StateSave(fp, fp->fPatIdx, status);       // State save to loc following current
5300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fp->fPatIdx = opValue;                         // Then JMP.
5301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
530250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_JMP_SAV_X:
5304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // This opcode is used with (x)+, when x can match a zero length string.
5305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Same as JMP_SAV, except conditional on the match having made forward progress.
5306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Destination of the JMP must be a URX_STO_INP_LOC, from which we get the
5307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   data address of the input position at the start of the loop.
5308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue > 0 && opValue < fPattern->fCompiledPat->size());
531050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t  stoOp = (int32_t)pat[opValue-1];
5311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(URX_TYPE(stoOp) == URX_STO_INP_LOC);
5312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t  frameLoc = URX_VAL(stoOp);
5313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(frameLoc >= 0 && frameLoc < fFrameSize);
531450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t prevInputIdx = (int32_t)fp->fExtra[frameLoc];
5315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(prevInputIdx <= fp->fInputIdx);
5316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (prevInputIdx < fp->fInputIdx) {
5317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // The match did make progress.  Repeat the loop.
5318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = StateSave(fp, fp->fPatIdx, status);  // State save to loc following current
5319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fPatIdx = opValue;
5320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fExtra[frameLoc] = fp->fInputIdx;
5321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // If the input position did not advance, we do nothing here,
5323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   execution will fall out of the loop.
5324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
532650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_CTR_INIT:
5328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
5330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fExtra[opValue] = 0;       //  Set the loop counter variable to zero
533150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Pick up the three extra operands that CTR_INIT has, and
5333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    skip the pattern location counter past
533450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
5335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fPatIdx += 3;
5336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t loopLoc  = URX_VAL(pat[instrOperandLoc]);
533750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount = (int32_t)pat[instrOperandLoc+1];
533850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount = (int32_t)pat[instrOperandLoc+2];
5339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(minCount>=0);
5340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(maxCount>=minCount || maxCount==-1);
5341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(loopLoc>fp->fPatIdx);
534250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (minCount == 0) {
5344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = StateSave(fp, loopLoc+1, status);
5345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (maxCount == 0) {
5347c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
535150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_CTR_LOOP:
5353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2);
535550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t initOp = (int32_t)pat[opValue];
5356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT);
535750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
535850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount  = (int32_t)pat[opValue+2];
535950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount  = (int32_t)pat[opValue+3];
536050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Increment the counter.  Note: we DIDN'T worry about counter
5361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   overflow, since the data comes from UnicodeStrings, which
536250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   stores its length in an int32_t. Do we have to think about
536350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   this now that we're using UText? Probably not, since the length
536450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   in UChar32s is still an int32_t.
5365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                (*pCounter)++;
5366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(*pCounter > 0);
536750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if ((uint64_t)*pCounter >= (uint32_t)maxCount) {
5368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    U_ASSERT(*pCounter == maxCount || maxCount == -1);
5369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (*pCounter >= minCount) {
5372c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = StateSave(fp, fp->fPatIdx, status);
5373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fPatIdx = opValue + 4;    // Loop back.
5375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
537750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_CTR_INIT_NG:
5379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Initialize a non-greedy loop
5381c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
5382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fExtra[opValue] = 0;       //  Set the loop counter variable to zero
538350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Pick up the three extra operands that CTR_INIT has, and
5385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    skip the pattern location counter past
538650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
5387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fPatIdx += 3;
5388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t loopLoc  = URX_VAL(pat[instrOperandLoc]);
538950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount = (int32_t)pat[instrOperandLoc+1];
539050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount = (int32_t)pat[instrOperandLoc+2];
5391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(minCount>=0);
5392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(maxCount>=minCount || maxCount==-1);
5393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(loopLoc>fp->fPatIdx);
539450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (minCount == 0) {
5396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (maxCount != 0) {
5397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fp = StateSave(fp, fp->fPatIdx, status);
5398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fPatIdx = loopLoc+1;   // Continue with stuff after repeated block
5400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
540350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_CTR_LOOP_NG:
5405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Non-greedy {min, max} loops
5407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2);
540850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t initOp = (int32_t)pat[opValue];
5409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT_NG);
541050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
541150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount  = (int32_t)pat[opValue+2];
541250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount  = (int32_t)pat[opValue+3];
541350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Increment the counter.  Note: we DIDN'T worry about counter
5414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   overflow, since the data comes from UnicodeStrings, which
541550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   stores its length in an int32_t. Do we have to think about
541650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   this now that we're using UText? Probably not, since the length
541750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   in UChar32s is still an int32_t.
5418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                (*pCounter)++;
5419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(*pCounter > 0);
542050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
542150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if ((uint64_t)*pCounter >= (uint32_t)maxCount) {
5422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // The loop has matched the maximum permitted number of times.
5423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   Break out of here with no action.  Matching will
5424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   continue with the following pattern.
5425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    U_ASSERT(*pCounter == maxCount || maxCount == -1);
5426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
542850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (*pCounter < minCount) {
5430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // We haven't met the minimum number of matches yet.
5431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   Loop back for another one.
5432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fPatIdx = opValue + 4;    // Loop back.
5433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
5434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // We do have the minimum number of matches.
5435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   Fall into the following pattern, but first do
5436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   a state save to the top of the loop, so that a failure
5437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   in the following pattern will try another iteration of the loop.
5438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = StateSave(fp, opValue + 4, status);
5439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
544250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_STO_SP:
5444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize);
5445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fData[opValue] = fStack->size();
5446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
544750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LD_SP:
5449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize);
545150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t newStackSize = (int32_t)fData[opValue];
5452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(newStackSize <= fStack->size());
545350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
545450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (newFP == (int64_t *)fp) {
5455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t i;
5458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for (i=0; i<fFrameSize; i++) {
545950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    newFP[i] = ((int64_t *)fp)[i];
5460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp = (REStackFrame *)newFP;
5462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fStack->setSize(newStackSize);
5463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
546550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKREF:
5467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKREF_I:
5468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5469c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(opValue < fFrameSize);
547050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t groupStartIdx = fp->fExtra[opValue];
547150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t groupEndIdx   = fp->fExtra[opValue+1];
5472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(groupStartIdx <= groupEndIdx);
547350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t len = groupEndIdx-groupStartIdx;
5474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (groupStartIdx < 0) {
5475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // This capture group has not participated in the match thus far,
5476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no match.
5477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (len == 0) {
5480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        //   The capture group match was of an empty string.
5481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        //   Verified by testing:  Perl matches succeed in this case, so
5482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        //   we do too.
5483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
5484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UBool  haveMatch = FALSE;
5487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx + len <= fActiveLimit) {
5488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (opType == URX_BACKREF) {
548950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (u_strncmp(inputBuf+groupStartIdx, inputBuf+fp->fInputIdx, (int32_t)len) == 0) {
5490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            haveMatch = TRUE;
5491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
5492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
5493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (u_strncasecmp(inputBuf+groupStartIdx, inputBuf+fp->fInputIdx,
549450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  (int32_t)len, U_FOLD_CASE_DEFAULT) == 0) {
5495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            haveMatch = TRUE;
5496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
5497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
5499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // TODO: probably need to do a partial string comparison, and only
5500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //       set HitEnd if the available input matched.  Ticket #6074
5501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
5502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (haveMatch) {
5504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fInputIdx += len;     // Match.  Advance current input position.
5505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
5506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no match.
5507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
551050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_STO_INP_LOC:
5512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(opValue >= 0 && opValue < fFrameSize);
5514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fExtra[opValue] = fp->fInputIdx;
5515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
551750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_JMPX:
5519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
552050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
5521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fPatIdx += 1;
5522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t dataLoc  = URX_VAL(pat[instrOperandLoc]);
5523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(dataLoc >= 0 && dataLoc < fFrameSize);
552450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t savedInputIdx = (int32_t)fp->fExtra[dataLoc];
5525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(savedInputIdx <= fp->fInputIdx);
5526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (savedInputIdx < fp->fInputIdx) {
5527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fPatIdx = opValue;                               // JMP
5528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
552950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no progress in loop.
5530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
553350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LA_START:
5535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Entering a lookahead block.
5537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Save Stack Ptr, Input Pos.
5538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
5539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fData[opValue]   = fStack->size();
5540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fData[opValue+1] = fp->fInputIdx;
5541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fActiveStart     = fLookStart;          // Set the match region change for
5542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fActiveLimit     = fLookLimit;          //   transparent bounds.
5543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
554550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LA_END:
5547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Leaving a look-ahead block.
5549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //  restore Stack Ptr, Input Pos to positions they had on entry to block.
5550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
5551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t stackSize = fStack->size();
555250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t newStackSize = (int32_t)fData[opValue];
5553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(stackSize >= newStackSize);
5554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (stackSize > newStackSize) {
5555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Copy the current top frame back to the new (cut back) top frame.
5556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //   This makes the capture groups from within the look-ahead
5557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //   expression available.
555850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
5559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t i;
5560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    for (i=0; i<fFrameSize; i++) {
556150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        newFP[i] = ((int64_t *)fp)[i];
5562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp = (REStackFrame *)newFP;
5564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fStack->setSize(newStackSize);
5565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fInputIdx = fData[opValue+1];
556750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Restore the active region bounds in the input string; they may have
5569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //    been changed because of transparent bounds on a Region.
5570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fActiveStart = fRegionStart;
5571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fActiveLimit = fRegionLimit;
5572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
557450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_ONECHAR_I:
5576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx < fActiveLimit) {
557750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c;
5578c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
5579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) {
5580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
5583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fHitEnd = TRUE;
5584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
558550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
558650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            #ifdef REGEX_SMART_BACKTRACKING
558750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx > backSearchIndex && fStack->size() > fFrameSize) {
558850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize);
558950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) {
559050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UBool success = FALSE;
559150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int64_t reverseIndex = fp->fInputIdx;
559250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar32 c;
559350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    while (reverseIndex > backSearchIndex) {
559450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        U16_PREV(inputBuf, backSearchIndex, reverseIndex, c);
559550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) {
559650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            success = TRUE;
559750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
559850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        } else if (c == U_SENTINEL) {
559950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
560050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
560150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
560250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (success) {
560350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fHitEnd = FALSE;
560450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp = (REStackFrame *)fStack->popFrame(fFrameSize);
560550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = reverseIndex;
560650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (fp->fInputIdx > backSearchIndex) {
560750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fp = StateSave(fp, fp->fPatIdx, status);
560850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
560950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fPatIdx++; // Skip the LOOP_C, we just did that
561050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
561150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
561250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
561350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
561450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            #endif
561550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5616c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
561850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_STRING_I:
5620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Test input against a literal string.
5622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Strings require two slots in the compiled pattern, one for the
5623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   offset to the string text, and one for the length.
562427f654740f2a26ad62a5c155af9199af9e69b889claireho                const UCaseProps *csp = ucase_getSingleton();
562527f654740f2a26ad62a5c155af9199af9e69b889claireho                {
562650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t stringStartIdx, stringLen;
562750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    stringStartIdx = opValue;
5628b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
562950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    op      = (int32_t)pat[fp->fPatIdx];
563050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx++;
563150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    opType  = URX_TYPE(op);
563250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    opValue = URX_VAL(op);
563350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(opType == URX_STRING_LEN);
563450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    stringLen = opValue;
5635b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
563650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UChar *patternChars = litText+stringStartIdx;
563750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UChar *patternEnd = patternChars+stringLen;
5638b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
563950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UChar *foldChars = NULL;
564050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t foldOffset, foldLength;
564150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar32 c;
56420fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                    UBool c_is_valid = FALSE;
5643b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
564450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    #ifdef REGEX_SMART_BACKTRACKING
564550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t originalInputIdx = fp->fInputIdx;
564650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    #endif
564750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UBool success = TRUE;
5648b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
564950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    foldOffset = foldLength = 0;
565050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
565150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    while (patternChars < patternEnd && success) {
56520fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                        if (fp->fInputIdx < fActiveLimit) {  // don't read past end of string
56530fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                            if(foldOffset < foldLength) {
56540fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                U16_NEXT_UNSAFE(foldChars, foldOffset, c);
56550fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                c_is_valid = TRUE;
56560fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                            } else {
56570fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                // test pre-condition of U16_NEXT: i < length
56580fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                U_ASSERT(fp->fInputIdx < fActiveLimit);
56590fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
56600fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                c_is_valid = TRUE;
56610fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT);
56620fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                if(foldLength >= 0) {
56630fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                    if(foldLength <= UCASE_MAX_STRING_LENGTH) {   // !!!: Does not correctly handle chars that fold to 0-length strings
56640fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                        foldOffset = 0;
56650fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                        U16_NEXT_UNSAFE(foldChars, foldOffset, c);
56660fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                    } else {
56670fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                        c = foldLength;
56680fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                        foldLength = foldOffset; // to avoid reading chars from the folding buffer
56690fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                                    }
567050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
567150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
56720fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                        } else {
5673b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                          c_is_valid = FALSE;
567450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
5675b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
56760fa67b93b831c6636ca18b152a1b1b14cc99b034claireho                        if (fp->fInputIdx <= fActiveLimit && c_is_valid) {
567750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (U_IS_BMP(c)) {
567850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                success = (*patternChars == c);
567950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                patternChars += 1;
568050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            } else if (patternChars+1 < patternEnd) {
568150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                success = (*patternChars == U16_LEAD(c) && *(patternChars+1) == U16_TRAIL(c));
568250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                patternChars += 2;
568350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
568450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        } else {
568550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            success = FALSE;
568650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fHitEnd = TRUE;          //   TODO:  See ticket 6074
568750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
568850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
5689b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
569050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (!success) {
569150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        #ifdef REGEX_SMART_BACKTRACKING
569250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (fp->fInputIdx > backSearchIndex && fStack->size()) {
569350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            REStackFrame *prevFrame = (REStackFrame *)fStack->peekFrame(fFrameSize);
569450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            if (URX_LOOP_C == URX_TYPE(pat[prevFrame->fPatIdx]) && fp->fInputIdx <= prevFrame->fInputIdx) {
569550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                // Reset to last start point
569650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                int64_t reverseIndex = originalInputIdx;
569750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                patternChars = litText+stringStartIdx;
5698b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
569950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                // Search backwards for a possible start
570050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                do {
570150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    U16_PREV(inputBuf, backSearchIndex, reverseIndex, c);
570250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    foldLength = ucase_toFullFolding(csp, c, &foldChars, U_FOLD_CASE_DEFAULT);
570350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    if(foldLength >= 0) {
570450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        if(foldLength <= UCASE_MAX_STRING_LENGTH) {   // !!!: Does not correctly handle chars that fold to 0-length strings
570550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                            foldOffset = 0;
570650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                            U16_NEXT_UNSAFE(foldChars, foldOffset, c);
570750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        } else {
570850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                            c = foldLength;
570950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                            foldLength = foldOffset; // to avoid reading chars from the folding buffer
571050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        }
571150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    }
5712b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
571350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    if ((U_IS_BMP(c) && *patternChars == c) ||
571450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                           (*patternChars == U16_LEAD(c) && *(patternChars+1) == U16_TRAIL(c))) {
571550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        success = TRUE;
571650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        break;
571750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    }
571850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                } while (reverseIndex > backSearchIndex);
5719b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
572050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                // And try again
572150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                if (success) {
572250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
572350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    fp->fInputIdx = reverseIndex;
572450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    if (fp->fInputIdx > backSearchIndex) {
572550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                        fp = StateSave(fp, fp->fPatIdx, status);
572650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    }
572750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    fp->fPatIdx++; // Skip the LOOP_C, we just did that
572850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    break;
572950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                }
573050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
573150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
573250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        #endif
573350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5735c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
573850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LB_START:
5740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Entering a look-behind block.
5742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Save Stack Ptr, Input Pos.
5743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //   TODO:  implement transparent bounds.  Ticket #6067
5744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
5745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fData[opValue]   = fStack->size();
5746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fData[opValue+1] = fp->fInputIdx;
5747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Init the variable containing the start index for attempted matches.
5748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fData[opValue+2] = -1;
5749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Save input string length, then reset to pin any matches to end at
5750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   the current position.
5751c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fData[opValue+3] = fActiveLimit;
5752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fActiveLimit     = fp->fInputIdx;
5753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
575550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
575650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LB_CONT:
5758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Positive Look-Behind, at top of loop checking for matches of LB expression
5760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    at all possible input starting positions.
576150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Fetch the min and max possible match lengths.  They are the operands
5763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   of this op in the pattern.
576450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minML = (int32_t)pat[fp->fPatIdx++];
576550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxML = (int32_t)pat[fp->fPatIdx++];
5766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(minML <= maxML);
5767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(minML >= 0);
576850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Fetch (from data) the last input index where a match was attempted.
5770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
577150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t  *lbStartIdx = &fData[opValue+2];
5772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (*lbStartIdx < 0) {
5773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // First time through loop.
5774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *lbStartIdx = fp->fInputIdx - minML;
5775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
5776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // 2nd through nth time through the loop.
5777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Back up start position for match by one.
5778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (*lbStartIdx == 0) {
577950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (*lbStartIdx)--;
5780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
5781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        U16_BACK_1(inputBuf, 0, *lbStartIdx);
5782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
578450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
5786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // We have tried all potential match starting points without
5787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  getting a match.  Backtrack out, and out of the
5788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   Look Behind altogether.
5789c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
579050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int64_t restoreInputLen = fData[opValue+3];
5791c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    U_ASSERT(restoreInputLen >= fActiveLimit);
579250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(restoreInputLen <= fInputLength);
5793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fActiveLimit = restoreInputLen;
5794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
579650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
5798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //      (successful match will fall off the end of the loop.)
5799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = StateSave(fp, fp->fPatIdx-3, status);
5800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fInputIdx =  *lbStartIdx;
5801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
580350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LB_END:
5805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // End of a look-behind block, after a successful match.
5806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
5808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx != fActiveLimit) {
5809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  The look-behind expression matched, but the match did not
5810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //    extend all the way to the point that we are looking behind from.
5811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  FAIL out of here, which will take us back to the LB_CONT, which
5812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //     will retry the match starting at another position or fail
5813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //     the look-behind altogether, whichever is appropriate.
5814c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
581750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Look-behind match is good.  Restore the orignal input string length,
5819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   which had been truncated to pin the end of the lookbehind match to the
5820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   position being looked-behind.
582150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t originalInputLen = fData[opValue+3];
5822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(originalInputLen >= fActiveLimit);
582350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(originalInputLen <= fInputLength);
5824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fActiveLimit = originalInputLen;
5825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
582750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
582850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LBN_CONT:
5830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Negative Look-Behind, at top of loop checking for matches of LB expression
5832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    at all possible input starting positions.
583350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Fetch the extra parameters of this op.
583550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minML       = (int32_t)pat[fp->fPatIdx++];
583650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxML       = (int32_t)pat[fp->fPatIdx++];
583750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t continueLoc = (int32_t)pat[fp->fPatIdx++];
583850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continueLoc = URX_VAL(continueLoc);
5839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(minML <= maxML);
5840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(minML >= 0);
5841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(continueLoc > fp->fPatIdx);
584250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Fetch (from data) the last input index where a match was attempted.
5844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
584550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t  *lbStartIdx = &fData[opValue+2];
5846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (*lbStartIdx < 0) {
5847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // First time through loop.
5848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *lbStartIdx = fp->fInputIdx - minML;
5849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
5850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // 2nd through nth time through the loop.
5851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Back up start position for match by one.
5852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (*lbStartIdx == 0) {
5853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (*lbStartIdx)--;   // Because U16_BACK is unsafe starting at 0.
5854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
5855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        U16_BACK_1(inputBuf, 0, *lbStartIdx);
5856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
585850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
5860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // We have tried all potential match starting points without
5861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  getting a match, which means that the negative lookbehind as
5862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  a whole has succeeded.  Jump forward to the continue location
586350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int64_t restoreInputLen = fData[opValue+3];
5864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    U_ASSERT(restoreInputLen >= fActiveLimit);
586550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(restoreInputLen <= fInputLength);
5866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fActiveLimit = restoreInputLen;
5867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fPatIdx = continueLoc;
5868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
587050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
5872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //      (successful match will cause a FAIL out of the loop altogether.)
5873c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = StateSave(fp, fp->fPatIdx-4, status);
5874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fInputIdx =  *lbStartIdx;
5875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
587750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LBN_END:
5879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // End of a negative look-behind block, after a successful match.
5880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
5882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx != fActiveLimit) {
5883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  The look-behind expression matched, but the match did not
5884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //    extend all the way to the point that we are looking behind from.
5885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  FAIL out of here, which will take us back to the LB_CONT, which
5886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //     will retry the match starting at another position or succeed
5887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //     the look-behind altogether, whichever is appropriate.
5888c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
589150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Look-behind expression matched, which means look-behind test as
5893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   a whole Fails
5894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   Restore the orignal input string length, which had been truncated
5896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   inorder to pin the end of the lookbehind match
5897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   to the position being looked-behind.
589850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t originalInputLen = fData[opValue+3];
5899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(originalInputLen >= fActiveLimit);
590050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(originalInputLen <= fInputLength);
5901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fActiveLimit = originalInputLen;
590250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Restore original stack position, discarding any state saved
5904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   by the successful pattern match.
5905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
590650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t newStackSize = (int32_t)fData[opValue];
5907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(fStack->size() > newStackSize);
5908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fStack->setSize(newStackSize);
5909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //  FAIL, which will take control back to someplace
5911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //  prior to entering the look-behind test.
5912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
591550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
591650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LOOP_SR_I:
5918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Loop Initialization for the optimized implementation of
5919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //     [some character set]*
5920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   This op scans through all matching input.
5921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   The following LOOP_C op emulates stack unwinding if the following pattern fails.
5922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue > 0 && opValue < sets->size());
5924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                Regex8BitSet *s8 = &fPattern->fSets8[opValue];
5925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UnicodeSet   *s  = (UnicodeSet *)sets->elementAt(opValue);
592650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Loop through input, until either the input is exhausted or
5928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   we reach a character that is not a member of the set.
592950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t ix = (int32_t)fp->fInputIdx;
5930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                for (;;) {
5931c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (ix >= fActiveLimit) {
5932c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fHitEnd = TRUE;
5933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
5934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UChar32   c;
5936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    U16_NEXT(inputBuf, ix, fActiveLimit, c);
5937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (c<256) {
5938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (s8->contains(c) == FALSE) {
5939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            U16_BACK_1(inputBuf, 0, ix);
5940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            break;
5941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
5942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
5943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (s->contains(c) == FALSE) {
5944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            U16_BACK_1(inputBuf, 0, ix);
5945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            break;
5946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
5947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
594950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // If there were no matching characters, skip over the loop altogether.
5951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   The loop doesn't run at all, a * op always succeeds.
5952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (ix == fp->fInputIdx) {
5953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fPatIdx++;   // skip the URX_LOOP_C op.
5954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
595650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Peek ahead in the compiled pattern, to the URX_LOOP_C that
5958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   must follow.  It's operand is the stack location
5959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   that holds the starting input index for the match of this [set]*
596050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t loopcOp = (int32_t)pat[fp->fPatIdx];
5961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
5962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t stackLoc = URX_VAL(loopcOp);
5963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize);
5964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fExtra[stackLoc] = fp->fInputIdx;
596550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #ifdef REGEX_SMART_BACKTRACKING
596650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                backSearchIndex = fp->fInputIdx;
596750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #endif
5968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fInputIdx = ix;
596950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Save State to the URX_LOOP_C op that follows this one,
5971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   so that match failures in the following code will return to there.
5972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
5973c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = StateSave(fp, fp->fPatIdx, status);
5974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fPatIdx++;
5975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
597750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
597850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
5979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LOOP_DOT_I:
5980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Loop Initialization for the optimized implementation of .*
5981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   This op scans through all remaining input.
5982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   The following LOOP_C op emulates stack unwinding if the following pattern fails.
5983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Loop through input until the input is exhausted (we reach an end-of-line)
5985c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // In DOTALL mode, we can just go straight to the end of the input.
5986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t ix;
5987c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if ((opValue & 1) == 1) {
5988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Dot-matches-All mode.  Jump straight to the end of the string.
598950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ix = (int32_t)fActiveLimit;
5990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
5991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
5992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // NOT DOT ALL mode.  Line endings do not match '.'
5993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Scan forward until a line ending or end of input.
599450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ix = (int32_t)fp->fInputIdx;
5995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    for (;;) {
5996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (ix >= fActiveLimit) {
5997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            fHitEnd = TRUE;
5998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            break;
5999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
6000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UChar32   c;
6001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        U16_NEXT(inputBuf, ix, fActiveLimit, c);   // c = inputBuf[ix++]
600227f654740f2a26ad62a5c155af9199af9e69b889claireho                        if ((c & 0x7f) <= 0x29) {          // Fast filter of non-new-line-s
600327f654740f2a26ad62a5c155af9199af9e69b889claireho                            if ((c == 0x0a) ||             //  0x0a is newline in both modes.
600427f654740f2a26ad62a5c155af9199af9e69b889claireho                                (((opValue & 2) == 0) &&    // IF not UNIX_LINES mode
600527f654740f2a26ad62a5c155af9199af9e69b889claireho                                   ((c<=0x0d && c>=0x0a) || c==0x85 || c==0x2028 || c==0x2029))) {
6006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                //  char is a line ending.  Put the input pos back to the
6007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                //    line ending char, and exit the scanning loop.
6008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                U16_BACK_1(inputBuf, 0, ix);
6009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                break;
6010c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
6011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
6012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
6013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
601450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // If there were no matching characters, skip over the loop altogether.
6016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   The loop doesn't run at all, a * op always succeeds.
6017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (ix == fp->fInputIdx) {
6018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fPatIdx++;   // skip the URX_LOOP_C op.
6019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
6020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
602150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Peek ahead in the compiled pattern, to the URX_LOOP_C that
6023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   must follow.  It's operand is the stack location
6024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //   that holds the starting input index for the match of this .*
602550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t loopcOp = (int32_t)pat[fp->fPatIdx];
6026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
6027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t stackLoc = URX_VAL(loopcOp);
6028c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize);
6029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fExtra[stackLoc] = fp->fInputIdx;
603050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #ifdef REGEX_SMART_BACKTRACKING
603150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                backSearchIndex = fp->fInputIdx;
603250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                #endif
6033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fInputIdx = ix;
603450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Save State to the URX_LOOP_C op that follows this one,
6036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   so that match failures in the following code will return to there.
6037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
6038c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = StateSave(fp, fp->fPatIdx, status);
6039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fPatIdx++;
6040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
6041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
604250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
604350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LOOP_C:
6045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
6046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue<fFrameSize);
604750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                backSearchIndex = (int32_t)fp->fExtra[opValue];
604850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(backSearchIndex <= fp->fInputIdx);
604950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (backSearchIndex == fp->fInputIdx) {
6050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // We've backed up the input idx to the point that the loop started.
6051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // The loop is done.  Leave here without saving state.
6052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  Subsequent failures won't come back here.
6053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
6054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
6055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Set up for the next iteration of the loop, with input index
6056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   backed up by one from the last time through,
6057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   and a state save to this instruction in case the following code fails again.
6058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   (We're going backwards because this loop emulates stack unwinding, not
6059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    the initial scan forward.)
6060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(fp->fInputIdx > 0);
606150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 prevC;
606250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_PREV(inputBuf, 0, fp->fInputIdx, prevC); // !!!: should this 0 be one of f*Limit?
606350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
606450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (prevC == 0x0a &&
606550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx > backSearchIndex &&
6066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    inputBuf[fp->fInputIdx-1] == 0x0d) {
606750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t prevOp = (int32_t)pat[fp->fPatIdx-2];
6068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (URX_TYPE(prevOp) == URX_LOOP_DOT_I) {
6069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // .*, stepping back over CRLF pair.
607050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        U16_BACK_1(inputBuf, 0, fp->fInputIdx);
6071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
6072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
607350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
607450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = StateSave(fp, fp->fPatIdx-1, status);
6076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
6077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
607850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
607950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
608050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        default:
6082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Trouble.  The compiled pattern contains an entry with an
6083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //           unrecognized type tag.
6084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(FALSE);
6085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
608650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(status)) {
6088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            isMatch = FALSE;
6089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
6090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerubreakFromLoop:
6094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fMatch = isMatch;
6095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (isMatch) {
6096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fLastMatchEnd = fMatchEnd;
6097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fMatchStart   = startIdx;
6098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fMatchEnd     = fp->fInputIdx;
6099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fTraceDebug) {
6100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_RUN_DEBUG_PRINTF(("Match.  start=%d   end=%d\n\n", fMatchStart, fMatchEnd));
6101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
6103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else
6104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
6105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fTraceDebug) {
6106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            REGEX_RUN_DEBUG_PRINTF(("No match\n\n"));
6107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
6108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
610950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
6110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fFrame = fp;                // The active stack frame when the engine stopped.
611150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   Contains the capture group results that we need to
611250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    access later.
6113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return;
6115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
6116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexMatcher)
6119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
6121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
6122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
6123