1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**************************************************************************
31b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert*   Copyright (C) 2002-2015 International Business Machines Corporation  *
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   and others. All rights reserved.                                     *
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**************************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
8c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//  file:  rematch.cpp
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//         Contains the implementation of class RegexMatcher,
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//         which is one of the main API classes for the ICU regular expression package.
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_REGULAR_EXPRESSIONS
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/regex.h"
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h"
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h"
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h"
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/rbbi.h"
22103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf.h"
23103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h"
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h"
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvectr32.h"
2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uvectr64.h"
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regeximp.h"
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "regexst.h"
3150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "regextxt.h"
3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "ucase.h"
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// #include <malloc.h>        // Needed for heapcheck testing
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
38c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Default limit for the size of the back track stack, to avoid system
39c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//    failures causedby heap exhaustion.  Units are in 32 bit words, not bytes.
40c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// This value puts ICU's limits higher than most other regexp implementations,
41c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//    which use recursion rather than the heap, and take more storage per
42c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//    backtrack point.
43c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
44c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t DEFAULT_BACKTRACK_STACK_CAPACITY = 8000000;
45c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
46c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru// Time limit counter constant.
47c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//   Time limits for expression evaluation are in terms of quanta of work by
48c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//   the engine, each of which is 10,000 state saves.
49c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//   This constant determines that state saves per tick number.
50c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic const int32_t TIMER_INITIAL_VALUE = 10000;
51c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
521b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
531b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert// Test for any of the Unicode line terminating characters.
541b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertstatic inline UBool isLineTerminator(UChar32 c) {
551b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (c & ~(0x0a | 0x0b | 0x0c | 0x0d | 0x85 | 0x2028 | 0x2029)) {
561b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        return false;
571b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
581b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    return (c<=0x0d && c>=0x0a) || c==0x85 || c==0x2028 || c==0x2029;
591b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert}
601b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-----------------------------------------------------------------------------
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   Constructor and Destructor
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-----------------------------------------------------------------------------
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusRegexMatcher::RegexMatcher(const RegexPattern *pat)  {
67c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fDeferredStatus = U_ZERO_ERROR;
68c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    init(fDeferredStatus);
69c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
70c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
71c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pat==NULL) {
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fDeferredStatus = U_ILLEGAL_ARGUMENT_ERROR;
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
76c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fPattern = pat;
7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init2(RegexStaticSets::gStaticSets->fEmptyText, fDeferredStatus);
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher::RegexMatcher(const UnicodeString &regexp, const UnicodeString &input,
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                           uint32_t flags, UErrorCode &status) {
84c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    init(status);
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
88c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UParseError    pe;
89c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fPatternOwned      = RegexPattern::compile(regexp, flags, pe, status);
90c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fPattern           = fPatternOwned;
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText inputText = UTEXT_INITIALIZER;
9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openConstUnicodeString(&inputText, &input, &status);
9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init2(&inputText, status);
9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&inputText);
9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    fInputUniStrMaybeMutable = TRUE;
9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
10150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher::RegexMatcher(UText *regexp, UText *input,
10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                           uint32_t flags, UErrorCode &status) {
10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init(status);
10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError    pe;
10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fPatternOwned      = RegexPattern::compile(regexp, flags, pe, status);
10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fPattern           = fPatternOwned;
114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    init2(input, status);
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusRegexMatcher::RegexMatcher(const UnicodeString &regexp,
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                           uint32_t flags, UErrorCode &status) {
120c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    init(status);
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UParseError    pe;
125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fPatternOwned      = RegexPattern::compile(regexp, flags, pe, status);
12650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
12850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fPattern           = fPatternOwned;
13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init2(RegexStaticSets::gStaticSets->fEmptyText, status);
13150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
13250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusRegexMatcher::RegexMatcher(UText *regexp,
13450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                           uint32_t flags, UErrorCode &status) {
13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init(status);
13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UParseError    pe;
14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fPatternOwned      = RegexPattern::compile(regexp, flags, pe, status);
14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(status)) {
14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fPattern           = fPatternOwned;
14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init2(RegexStaticSets::gStaticSets->fEmptyText, status);
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher::~RegexMatcher() {
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete fStack;
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fData != fSmallData) {
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(fData);
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fData = NULL;
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fPatternOwned) {
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete fPatternOwned;
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fPatternOwned = NULL;
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fPattern = NULL;
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInput) {
16550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete fInput;
16650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInputText) {
16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(fInputText);
16950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
17050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fAltInputText) {
17150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(fAltInputText);
17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    #if UCONFIG_NO_BREAK_ITERATION==0
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete fWordBreakItr;
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    #endif
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//   init()   common initialization for use by all constructors.
181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//            Initialize all fields, get the object into a consistent state.
182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//            This must be done even when the initial status shows an error,
183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//            so that the object is initialized sufficiently well for the destructor
184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//            to run safely.
185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid RegexMatcher::init(UErrorCode &status) {
187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fPattern           = NULL;
188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fPatternOwned      = NULL;
189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFrameSize         = 0;
190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fRegionStart       = 0;
191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fRegionLimit       = 0;
192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fAnchorStart       = 0;
193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fAnchorLimit       = 0;
194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fLookStart         = 0;
195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fLookLimit         = 0;
196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fActiveStart       = 0;
197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fActiveLimit       = 0;
198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fTransparentBounds = FALSE;
199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fAnchoringBounds   = TRUE;
200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fMatch             = FALSE;
201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fMatchStart        = 0;
202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fMatchEnd          = 0;
203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fLastMatchEnd      = -1;
204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fAppendPosition    = 0;
205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fHitEnd            = FALSE;
206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fRequireEnd        = FALSE;
207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fStack             = NULL;
208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFrame             = NULL;
209c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fTimeLimit         = 0;
210c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fTime              = 0;
211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fTickCounter       = 0;
212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fStackLimit        = DEFAULT_BACKTRACK_STACK_CAPACITY;
213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fCallbackFn        = NULL;
214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fCallbackContext   = NULL;
21527f654740f2a26ad62a5c155af9199af9e69b889claireho    fFindProgressCallbackFn      = NULL;
21627f654740f2a26ad62a5c155af9199af9e69b889claireho    fFindProgressCallbackContext = NULL;
217c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fTraceDebug        = FALSE;
218c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fDeferredStatus    = status;
219c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fData              = fSmallData;
220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fWordBreakItr      = NULL;
221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
222103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    fStack             = NULL;
22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInputText         = NULL;
22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fAltInputText      = NULL;
22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInput             = NULL;
22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInputLength       = 0;
22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInputUniStrMaybeMutable = FALSE;
22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        fDeferredStatus = status;
231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
233c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
235c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//  init2()   Common initialization for use by RegexMatcher constructors, part 2.
236c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//            This handles the common setup to be done after the Pattern is available.
237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
23850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::init2(UText *input, UErrorCode &status) {
239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        fDeferredStatus = status;
241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(fSmallData[0]))) {
245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        fData = (int64_t *)uprv_malloc(fPattern->fDataSize * sizeof(int64_t));
246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (fData == NULL) {
247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            status = fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return;
249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
252103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    fStack = new UVector64(status);
253103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if (fStack == NULL) {
254103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        status = fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
255103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        return;
256103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    }
257103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    reset(input);
259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    setStackLimit(DEFAULT_BACKTRACK_STACK_CAPACITY, status);
260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        fDeferredStatus = status;
262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar BACKSLASH  = 0x5c;
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar DOLLARSIGN = 0x24;
2691b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertstatic const UChar LEFTBRACKET = 0x7b;
2701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertstatic const UChar RIGHTBRACKET = 0x7d;
2711b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    appendReplacement
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruRegexMatcher &RegexMatcher::appendReplacement(UnicodeString &dest,
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                              const UnicodeString &replacement,
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                              UErrorCode &status) {
28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText replacementText = UTEXT_INITIALIZER;
281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openConstUnicodeString(&replacementText, &replacement, &status);
283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_SUCCESS(status)) {
28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText resultText = UTEXT_INITIALIZER;
28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUnicodeString(&resultText, &dest, &status);
286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_SUCCESS(status)) {
28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            appendReplacement(&resultText, &replacementText, status);
28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_close(&resultText);
29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&replacementText);
29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    appendReplacement, UText mode
29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
30050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::appendReplacement(UText *dest,
30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                              UText *replacement,
30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                              UErrorCode &status) {
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = fDeferredStatus;
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fMatch == FALSE) {
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_REGEX_INVALID_STATE;
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Copy input string from the end of previous match to start of current match
31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t  destLen = utext_nativeLength(dest);
31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fMatchStart > fAppendPosition) {
31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            destLen += utext_replace(dest, destLen, destLen, fInputText->chunkContents+fAppendPosition,
32027f654740f2a26ad62a5c155af9199af9e69b889claireho                                     (int32_t)(fMatchStart-fAppendPosition), &status);
32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t len16;
32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTEXT_USES_U16(fInputText)) {
32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                len16 = (int32_t)(fMatchStart-fAppendPosition);
32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UErrorCode lengthStatus = U_ZERO_ERROR;
32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                len16 = utext_extract(fInputText, fAppendPosition, fMatchStart, NULL, 0, &lengthStatus);
32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1));
33027f654740f2a26ad62a5c155af9199af9e69b889claireho            if (inputChars == NULL) {
33127f654740f2a26ad62a5c155af9199af9e69b889claireho                status = U_MEMORY_ALLOCATION_ERROR;
33227f654740f2a26ad62a5c155af9199af9e69b889claireho                return *this;
33327f654740f2a26ad62a5c155af9199af9e69b889claireho            }
33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_extract(fInputText, fAppendPosition, fMatchStart, inputChars, len16+1, &status);
33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            destLen += utext_replace(dest, destLen, destLen, inputChars, len16, &status);
33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uprv_free(inputChars);
33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fAppendPosition = fMatchEnd;
340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // scan the replacement text, looking for substitutions ($n) and \escapes.
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  TODO:  optimize this loop by efficiently scanning for '$' or '\',
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //         move entire ranges not containing substitutions.
34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UTEXT_SETNATIVEINDEX(replacement, 0);
3461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    for (UChar32 c = UTEXT_NEXT32(replacement); U_SUCCESS(status) && c != U_SENTINEL;  c = UTEXT_NEXT32(replacement)) {
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (c == BACKSLASH) {
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Backslash Escape.  Copy the following char out without further checks.
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                    Note:  Surrogate pairs don't need any special handling
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                           The second half wont be a '$' or a '\', and
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                           will move to the dest normally on the next
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                           loop iteration.
35350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c = UTEXT_CURRENT32(replacement);
35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (c == U_SENTINEL) {
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (c==0x55/*U*/ || c==0x75/*u*/) {
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // We have a \udddd or \Udddddddd escape sequence.
36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t offset = 0;
36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                struct URegexUTextUnescapeCharContext context = U_REGEX_UTEXT_UNESCAPE_CONTEXT(replacement);
36250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 escapedChar = u_unescapeAt(uregex_utext_unescape_charAt, &offset, INT32_MAX, &context);
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (escapedChar != (UChar32)0xFFFFFFFF) {
36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (U_IS_BMP(escapedChar)) {
36550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UChar c16 = (UChar)escapedChar;
36650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status);
36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UChar surrogate[2];
36950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        surrogate[0] = U16_LEAD(escapedChar);
37050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        surrogate[1] = U16_TRAIL(escapedChar);
37150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (U_SUCCESS(status)) {
37250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status);
37350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
37450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // TODO:  Report errors for mal-formed \u escapes?
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //        As this is, the original sequence is output, which may be OK.
37750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (context.lastOffset == offset) {
378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        (void)UTEXT_PREVIOUS32(replacement);
37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else if (context.lastOffset != offset-1) {
38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        utext_moveIndex32(replacement, offset - context.lastOffset - 1);
38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
38250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
384b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                (void)UTEXT_NEXT32(replacement);
38550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Plain backslash escape.  Just put out the escaped character.
38650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (U_IS_BMP(c)) {
38750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar c16 = (UChar)c;
38850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status);
38950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
39050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar surrogate[2];
39150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    surrogate[0] = U16_LEAD(c);
39250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    surrogate[1] = U16_TRAIL(c);
39350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (U_SUCCESS(status)) {
39450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status);
39550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
39850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if (c != DOLLARSIGN) {
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Normal char, not a $.  Copy it out without further checks.
40050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (U_IS_BMP(c)) {
40150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar c16 = (UChar)c;
40250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                destLen += utext_replace(dest, destLen, destLen, &c16, 1, &status);
40350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
40450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar surrogate[2];
40550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                surrogate[0] = U16_LEAD(c);
40650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                surrogate[1] = U16_TRAIL(c);
40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (U_SUCCESS(status)) {
40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    destLen += utext_replace(dest, destLen, destLen, surrogate, 2, &status);
40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
4121b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            // We've got a $.  Pick up a capture group name or number if one follows.
4131b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            // Consume digits so long as the resulting group number <= the number of
4141b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            // number of capture groups in the pattern.
415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t groupNum  = 0;
4171b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            int32_t numDigits = 0;
4181b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            UChar32 nextChar = utext_current32(replacement);
4191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            if (nextChar == LEFTBRACKET) {
4201b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                // Scan for a Named Capture Group, ${name}.
4211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                UnicodeString groupName;
4221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                utext_next32(replacement);
4231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                while(U_SUCCESS(status) && nextChar != RIGHTBRACKET) {
4241b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    nextChar = utext_next32(replacement);
4251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    if (nextChar == U_SENTINEL) {
4261b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
4271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    } else if ((nextChar >= 0x41 && nextChar <= 0x5a) ||       // A..Z
4281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                               (nextChar >= 0x61 && nextChar <= 0x7a) ||       // a..z
4291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                               (nextChar >= 0x31 && nextChar <= 0x39)) {       // 0..9
4301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        groupName.append(nextChar);
4311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    } else if (nextChar == RIGHTBRACKET) {
4321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        groupNum = uhash_geti(fPattern->fNamedCaptureMap, &groupName);
4331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        if (groupNum == 0) {
4341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                            status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
4351b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        }
4361b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    } else {
4371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        // Character was something other than a name char or a closing '}'
4381b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
4391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    }
44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
4411b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
4421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            } else if (u_isdigit(nextChar)) {
4431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                // $n    Scan for a capture group number
4441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                int32_t numCaptureGroups = fPattern->fGroupMap->size();
4451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                for (;;) {
4461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    nextChar = UTEXT_CURRENT32(replacement);
4471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    if (nextChar == U_SENTINEL) {
4481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        break;
4491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    }
4501b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    if (u_isdigit(nextChar) == FALSE) {
4511b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        break;
4521b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    }
4531b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    int32_t nextDigitVal = u_charDigitValue(nextChar);
4541b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    if (groupNum*10 + nextDigitVal > numCaptureGroups) {
4551b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        // Don't consume the next digit if it makes the capture group number too big.
4561b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        if (numDigits == 0) {
4571b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                            status = U_INDEX_OUTOFBOUNDS_ERROR;
4581b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        }
4591b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        break;
4601b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    }
4611b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    (void)UTEXT_NEXT32(replacement);
4621b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    groupNum=groupNum*10 + nextDigitVal;
4631b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    ++numDigits;
46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
4651b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            } else {
4661b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                // $ not followed by capture group name or number.
4671b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
469fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            if (U_SUCCESS(status)) {
47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                destLen += appendGroup(groupNum, dest, status);
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }  // End of $ capture group handling
4741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }  // End of per-character loop through the replacement string.
475fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    appendTail     Intended to be used in conjunction with appendReplacement()
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                   To the destination string, append everything following
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                   the last match position from the input string.
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//                   Note:  Match ranges do not affect appendTail or appendReplacement
488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString &RegexMatcher::appendTail(UnicodeString &dest) {
49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode status = U_ZERO_ERROR;
49250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText resultText = UTEXT_INITIALIZER;
49350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUnicodeString(&resultText, &dest, &status);
494fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
49550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_SUCCESS(status)) {
49627f654740f2a26ad62a5c155af9199af9e69b889claireho        appendTail(&resultText, status);
49750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&resultText);
49850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
499fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
50050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return dest;
50150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
50250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
50350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
50450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   appendTail, UText mode
50550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
50627f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::appendTail(UText *dest, UErrorCode &status) {
50727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
508fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return dest;
50927f654740f2a26ad62a5c155af9199af9e69b889claireho    }
51027f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(fDeferredStatus)) {
51127f654740f2a26ad62a5c155af9199af9e69b889claireho        status = fDeferredStatus;
512fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return dest;
51327f654740f2a26ad62a5c155af9199af9e69b889claireho    }
514fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
51550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInputLength > fAppendPosition) {
51650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
51750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int64_t destLen = utext_nativeLength(dest);
518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            utext_replace(dest, destLen, destLen, fInputText->chunkContents+fAppendPosition,
51927f654740f2a26ad62a5c155af9199af9e69b889claireho                          (int32_t)(fInputLength-fAppendPosition), &status);
52050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
52150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t len16;
52250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTEXT_USES_U16(fInputText)) {
52350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                len16 = (int32_t)(fInputLength-fAppendPosition);
52450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
52550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                len16 = utext_extract(fInputText, fAppendPosition, fInputLength, NULL, 0, &status);
52650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                status = U_ZERO_ERROR; // buffer overflow
52750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
528fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
52950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16));
53027f654740f2a26ad62a5c155af9199af9e69b889claireho            if (inputChars == NULL) {
53127f654740f2a26ad62a5c155af9199af9e69b889claireho                fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
53227f654740f2a26ad62a5c155af9199af9e69b889claireho            } else {
533fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                utext_extract(fInputText, fAppendPosition, fInputLength, inputChars, len16, &status); // unterminated
53427f654740f2a26ad62a5c155af9199af9e69b889claireho                int64_t destLen = utext_nativeLength(dest);
53527f654740f2a26ad62a5c155af9199af9e69b889claireho                utext_replace(dest, destLen, destLen, inputChars, len16, &status);
53627f654740f2a26ad62a5c155af9199af9e69b889claireho                uprv_free(inputChars);
53727f654740f2a26ad62a5c155af9199af9e69b889claireho            }
53850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return dest;
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   end
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t RegexMatcher::end(UErrorCode &err) const {
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return end(0, err);
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
55427f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::end64(UErrorCode &err) const {
55527f654740f2a26ad62a5c155af9199af9e69b889claireho    return end64(0, err);
55627f654740f2a26ad62a5c155af9199af9e69b889claireho}
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
55827f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::end64(int32_t group, UErrorCode &err) const {
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(err)) {
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -1;
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fMatch == FALSE) {
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        err = U_REGEX_INVALID_STATE;
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -1;
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (group < 0 || group > fPattern->fGroupMap->size()) {
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        err = U_INDEX_OUTOFBOUNDS_ERROR;
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -1;
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
57050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t e = -1;
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (group == 0) {
572fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        e = fMatchEnd;
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Get the position within the stack frame of the variables for
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //    this capture group.
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t groupOffset = fPattern->fGroupMap->elementAti(group-1);
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_ASSERT(groupOffset < fPattern->fFrameSize);
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_ASSERT(groupOffset >= 0);
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        e = fFrame->fExtra[groupOffset + 1];
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
581fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
58227f654740f2a26ad62a5c155af9199af9e69b889claireho        return e;
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
58527f654740f2a26ad62a5c155af9199af9e69b889clairehoint32_t RegexMatcher::end(int32_t group, UErrorCode &err) const {
58627f654740f2a26ad62a5c155af9199af9e69b889claireho    return (int32_t)end64(group, err);
58727f654740f2a26ad62a5c155af9199af9e69b889claireho}
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
589f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//--------------------------------------------------------------------------------
590f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//
591f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//   findProgressInterrupt  This function is called once for each advance in the target
592f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//                          string from the find() function, and calls the user progress callback
593f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//                          function if there is one installed.
594f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//
595f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//         Return:  TRUE if the find operation is to be terminated.
596f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//                  FALSE if the find operation is to continue running.
597f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//
598f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//--------------------------------------------------------------------------------
599f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusUBool RegexMatcher::findProgressInterrupt(int64_t pos, UErrorCode &status) {
600f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if (fFindProgressCallbackFn && !(*fFindProgressCallbackFn)(fFindProgressCallbackContext, pos)) {
601f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        status = U_REGEX_STOPPED_BY_CALLER;
602f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        return TRUE;
603f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
604f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    return FALSE;
605f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius}
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   find()
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool RegexMatcher::find() {
613f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if (U_FAILURE(fDeferredStatus)) {
614f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        return FALSE;
615f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
616f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    UErrorCode status = U_ZERO_ERROR;
617f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    UBool result = find(status);
618f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    return result;
619f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius}
620f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius
621f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//--------------------------------------------------------------------------------
622f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//
623f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//   find()
624f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//
625f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius//--------------------------------------------------------------------------------
626f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusUBool RegexMatcher::find(UErrorCode &status) {
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Start at the position of the last match end.  (Will be zero if the
62850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   matcher has been reset.)
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
630f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if (U_FAILURE(status)) {
631f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        return FALSE;
632f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
634f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        status = fDeferredStatus;
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
637fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
63850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
639f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        return findUsingChunk(status);
64050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
64250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t startPos = fMatchEnd;
643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (startPos==0) {
644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        startPos = fActiveStart;
645c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (fMatch) {
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Save the position of any previous successful match.
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fLastMatchEnd = fMatchEnd;
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fMatchStart == fMatchEnd) {
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Previous match had zero length.  Move start position up one position
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //  to avoid sending find() into a loop on zero-length matches.
654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (startPos >= fActiveLimit) {
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fMatch = FALSE;
656c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fHitEnd = TRUE;
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return FALSE;
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
65950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTEXT_SETNATIVEINDEX(fInputText, startPos);
660b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            (void)UTEXT_NEXT32(fInputText);
66150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            startPos = UTEXT_GETNATIVEINDEX(fInputText);
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fLastMatchEnd >= 0) {
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // A previous find() failed to match.  Don't try again.
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   (without this test, a pattern with a zero-length match
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //    could match again at the end of an input string.)
668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fHitEnd = TRUE;
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Compute the position in the input string beyond which a match can not begin, because
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   the minimum length match would extend past the end of the input.
676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //   Note:  some patterns that cannot match anything will have fMinMatchLength==Max Int.
677c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    //          Be aware of possible overflows if making changes here.
67850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t testStartLimit;
67950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_USES_U16(fInputText)) {
68050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        testStartLimit = fActiveLimit - fPattern->fMinMatchLen;
68150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (startPos > testStartLimit) {
68250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fMatch = FALSE;
68350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fHitEnd = TRUE;
68450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
68550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
68650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
687f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        // We don't know exactly how long the minimum match length is in native characters.
688f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        // Treat anything > 0 as 1.
689f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        testStartLimit = fActiveLimit - (fPattern->fMinMatchLen > 0 ? 1 : 0);
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32  c;
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_ASSERT(startPos >= 0);
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch (fPattern->fStartType) {
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case START_NO_INFO:
697fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // No optimization was found.
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  Try a match at each input position.
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (;;) {
700f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            MatchAt(startPos, FALSE, status);
701f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            if (U_FAILURE(status)) {
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return FALSE;
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (fMatch) {
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return TRUE;
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
70750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (startPos >= testStartLimit) {
708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fHitEnd = TRUE;
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return FALSE;
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
71150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTEXT_SETNATIVEINDEX(fInputText, startPos);
712b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            (void)UTEXT_NEXT32(fInputText);
71350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            startPos = UTEXT_GETNATIVEINDEX(fInputText);
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Note that it's perfectly OK for a pattern to have a zero-length
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   match at the end of a string, so we must make sure that the loop
71650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   runs with startPos == testStartLimit the last time through.
717f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            if  (findProgressInterrupt(startPos, status))
71827f654740f2a26ad62a5c155af9199af9e69b889claireho                return FALSE;
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_ASSERT(FALSE);
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case START_START:
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Matches are only possible at the start of the input string
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //   (pattern begins with ^ or \A)
725c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (startPos > fActiveStart) {
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fMatch = FALSE;
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
729f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        MatchAt(startPos, FALSE, status);
730f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        if (U_FAILURE(status)) {
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return fMatch;
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case START_SET:
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Match may start on any char from a pre-computed set.
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(fPattern->fMinMatchLen > 0);
74050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTEXT_SETNATIVEINDEX(fInputText, startPos);
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for (;;) {
742f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                int64_t pos = startPos;
74350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
744f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                startPos = UTEXT_GETNATIVEINDEX(fInputText);
74550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // c will be -1 (U_SENTINEL) at end of text, in which case we
74650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // skip this next block (so we don't have a negative array index)
74750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // and handle end of text in the following block.
74827f654740f2a26ad62a5c155af9199af9e69b889claireho                if (c >= 0 && ((c<256 && fPattern->fInitialChars8->contains(c)) ||
74927f654740f2a26ad62a5c155af9199af9e69b889claireho                              (c>=256 && fPattern->fInitialChars->contains(c)))) {
750f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                    MatchAt(pos, FALSE, status);
751f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                    if (U_FAILURE(status)) {
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return FALSE;
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (fMatch) {
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return TRUE;
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
75750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UTEXT_SETNATIVEINDEX(fInputText, pos);
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
759f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                if (startPos > testStartLimit) {
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fMatch = FALSE;
761c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return FALSE;
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
764f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                if  (findProgressInterrupt(startPos, status))
76527f654740f2a26ad62a5c155af9199af9e69b889claireho                    return FALSE;
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_ASSERT(FALSE);
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case START_STRING:
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case START_CHAR:
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Match starts on exactly one char.
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(fPattern->fMinMatchLen > 0);
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 theChar = fPattern->fInitialChar;
77650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTEXT_SETNATIVEINDEX(fInputText, startPos);
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for (;;) {
778f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                int64_t pos = startPos;
77950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
780f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                startPos = UTEXT_GETNATIVEINDEX(fInputText);
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (c == theChar) {
782f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                    MatchAt(pos, FALSE, status);
783f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                    if (U_FAILURE(status)) {
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return FALSE;
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (fMatch) {
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return TRUE;
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
78950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UTEXT_SETNATIVEINDEX(fInputText, pos);
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
791f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                if (startPos > testStartLimit) {
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fMatch = FALSE;
793c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return FALSE;
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
796f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                if  (findProgressInterrupt(startPos, status))
79727f654740f2a26ad62a5c155af9199af9e69b889claireho                    return FALSE;
79850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           }
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_ASSERT(FALSE);
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case START_LINE:
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        {
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32  c;
805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (startPos == fAnchorStart) {
806f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                MatchAt(startPos, FALSE, status);
807f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                if (U_FAILURE(status)) {
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return FALSE;
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (fMatch) {
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return TRUE;
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
81350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, startPos);
81450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
81550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                startPos = UTEXT_GETNATIVEINDEX(fInputText);
81650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
81750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, startPos);
81850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_PREVIOUS32(fInputText);
81950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, startPos);
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fPattern->fFlags & UREGEX_UNIX_LINES) {
82350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                for (;;) {
824c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (c == 0x0a) {
825f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                            MatchAt(startPos, FALSE, status);
826f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                            if (U_FAILURE(status)) {
827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                return FALSE;
828c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            if (fMatch) {
830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                return TRUE;
831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
83250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            UTEXT_SETNATIVEINDEX(fInputText, startPos);
833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
83450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (startPos >= testStartLimit) {
835c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fMatch = FALSE;
836c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fHitEnd = TRUE;
837c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return FALSE;
838c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
83950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    c = UTEXT_NEXT32(fInputText);
84050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    startPos = UTEXT_GETNATIVEINDEX(fInputText);
841c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Note that it's perfectly OK for a pattern to have a zero-length
842c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //   match at the end of a string, so we must make sure that the loop
84350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   runs with startPos == testStartLimit the last time through.
844f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                    if  (findProgressInterrupt(startPos, status))
84527f654740f2a26ad62a5c155af9199af9e69b889claireho                        return FALSE;
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for (;;) {
8491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    if (isLineTerminator(c)) {
8501b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        if (c == 0x0d && startPos < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) {
8511b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                            (void)UTEXT_NEXT32(fInputText);
8521b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                            startPos = UTEXT_GETNATIVEINDEX(fInputText);
8531b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        }
8541b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        MatchAt(startPos, FALSE, status);
8551b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        if (U_FAILURE(status)) {
8561b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                            return FALSE;
8571b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        }
8581b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        if (fMatch) {
8591b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                            return TRUE;
8601b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        }
8611b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        UTEXT_SETNATIVEINDEX(fInputText, startPos);
862c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
86350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (startPos >= testStartLimit) {
864c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fMatch = FALSE;
865c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fHitEnd = TRUE;
866c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        return FALSE;
867c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
86850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    c = UTEXT_NEXT32(fInputText);
86950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    startPos = UTEXT_GETNATIVEINDEX(fInputText);
870c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Note that it's perfectly OK for a pattern to have a zero-length
871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //   match at the end of a string, so we must make sure that the loop
87250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   runs with startPos == testStartLimit the last time through.
873f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                    if  (findProgressInterrupt(startPos, status))
87427f654740f2a26ad62a5c155af9199af9e69b889claireho                        return FALSE;
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_ASSERT(FALSE);
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_ASSERT(FALSE);
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return FALSE;
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
88927f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool RegexMatcher::find(int64_t start, UErrorCode &status) {
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = fDeferredStatus;
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
897c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    this->reset();                        // Note:  Reset() is specified by Java Matcher documentation.
898c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                          //        This will reset the region to be the full input length.
89950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (start < 0) {
90050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
90150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
90250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
903fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
90427f654740f2a26ad62a5c155af9199af9e69b889claireho    int64_t nativeStart = start;
90527f654740f2a26ad62a5c155af9199af9e69b889claireho    if (nativeStart < fActiveStart || nativeStart > fActiveLimit) {
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_INDEX_OUTOFBOUNDS_ERROR;
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
909fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    fMatchEnd = nativeStart;
910f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    return find(status);
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
91650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   findUsingChunk() -- like find(), but with the advance knowledge that the
91750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                       entire string is available in the UText's chunk buffer.
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
920f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusUBool RegexMatcher::findUsingChunk(UErrorCode &status) {
92150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Start at the position of the last match end.  (Will be zero if the
92250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   matcher has been reset.
92350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
92550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t startPos = (int32_t)fMatchEnd;
92650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (startPos==0) {
92750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        startPos = (int32_t)fActiveStart;
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
929fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
93050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *inputBuf = fInputText->chunkContents;
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
93250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fMatch) {
93350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Save the position of any previous successful match.
93450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fLastMatchEnd = fMatchEnd;
935fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
93650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fMatchStart == fMatchEnd) {
93750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Previous match had zero length.  Move start position up one position
93850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  to avoid sending find() into a loop on zero-length matches.
93950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (startPos >= fActiveLimit) {
94050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fMatch = FALSE;
94150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
94250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
94350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
94450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_FWD_1(inputBuf, startPos, fInputLength);
94550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
94650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
94750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fLastMatchEnd >= 0) {
94850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // A previous find() failed to match.  Don't try again.
94950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   (without this test, a pattern with a zero-length match
95050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //    could match again at the end of an input string.)
95150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fHitEnd = TRUE;
95250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
95350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
955fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
956fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
95750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Compute the position in the input string beyond which a match can not begin, because
95850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   the minimum length match would extend past the end of the input.
95950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //   Note:  some patterns that cannot match anything will have fMinMatchLength==Max Int.
96050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //          Be aware of possible overflows if making changes here.
961f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    //   Note:  a match can begin at inputBuf + testLen; it is an inclusive limit.
96250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t testLen  = (int32_t)(fActiveLimit - fPattern->fMinMatchLen);
96350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (startPos > testLen) {
96450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fMatch = FALSE;
96550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fHitEnd = TRUE;
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
968fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
96950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32  c;
97050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    U_ASSERT(startPos >= 0);
971fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
97250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    switch (fPattern->fStartType) {
97350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    case START_NO_INFO:
974fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // No optimization was found.
97550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Try a match at each input position.
97650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for (;;) {
977f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            MatchChunkAt(startPos, FALSE, status);
978f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            if (U_FAILURE(status)) {
97950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
98050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
98150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fMatch) {
98250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return TRUE;
98350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
98450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (startPos >= testLen) {
98550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
98650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
98750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
98850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_FWD_1(inputBuf, startPos, fActiveLimit);
98950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Note that it's perfectly OK for a pattern to have a zero-length
99050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   match at the end of a string, so we must make sure that the loop
99150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   runs with startPos == testLen the last time through.
992f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            if  (findProgressInterrupt(startPos, status))
99327f654740f2a26ad62a5c155af9199af9e69b889claireho                return FALSE;
99450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
99550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(FALSE);
996fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
99750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    case START_START:
99850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Matches are only possible at the start of the input string
99950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   (pattern begins with ^ or \A)
100050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (startPos > fActiveStart) {
100150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fMatch = FALSE;
100250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
100350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
1004f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        MatchChunkAt(startPos, FALSE, status);
1005f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        if (U_FAILURE(status)) {
100650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
100750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
100850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return fMatch;
1009fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1010fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
101150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    case START_SET:
101250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
101350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Match may start on any char from a pre-computed set.
101450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(fPattern->fMinMatchLen > 0);
101550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for (;;) {
101650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t pos = startPos;
101750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(inputBuf, startPos, fActiveLimit, c);  // like c = inputBuf[startPos++];
101827f654740f2a26ad62a5c155af9199af9e69b889claireho            if ((c<256 && fPattern->fInitialChars8->contains(c)) ||
101927f654740f2a26ad62a5c155af9199af9e69b889claireho                (c>=256 && fPattern->fInitialChars->contains(c))) {
1020f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                MatchChunkAt(pos, FALSE, status);
1021f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                if (U_FAILURE(status)) {
102250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return FALSE;
102350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
102450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fMatch) {
102550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return TRUE;
102650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
102750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
1028f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            if (startPos > testLen) {
102950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fMatch = FALSE;
103050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
103150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
103250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
1033f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            if  (findProgressInterrupt(startPos, status))
103427f654740f2a26ad62a5c155af9199af9e69b889claireho                return FALSE;
103550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
103750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(FALSE);
1038fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
103950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    case START_STRING:
104050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    case START_CHAR:
104150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
104250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Match starts on exactly one char.
104350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(fPattern->fMinMatchLen > 0);
104450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 theChar = fPattern->fInitialChar;
104550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        for (;;) {
104650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t pos = startPos;
104750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(inputBuf, startPos, fActiveLimit, c);  // like c = inputBuf[startPos++];
104850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (c == theChar) {
1049f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                MatchChunkAt(pos, FALSE, status);
1050f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                if (U_FAILURE(status)) {
105150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return FALSE;
105250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
105350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fMatch) {
105450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return TRUE;
105550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
105650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
1057f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            if (startPos > testLen) {
105850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fMatch = FALSE;
105950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
106050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
106150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
1062f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            if  (findProgressInterrupt(startPos, status))
106327f654740f2a26ad62a5c155af9199af9e69b889claireho                return FALSE;
106450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1066f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    U_ASSERT(FALSE);
1067fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
106850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    case START_LINE:
106950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
107050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32  c;
107150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (startPos == fAnchorStart) {
1072f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            MatchChunkAt(startPos, FALSE, status);
1073f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            if (U_FAILURE(status)) {
107450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return FALSE;
107550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
107650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fMatch) {
107750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return TRUE;
107850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
107950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_FWD_1(inputBuf, startPos, fActiveLimit);
108050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
1081fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
108250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fPattern->fFlags & UREGEX_UNIX_LINES) {
108350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            for (;;) {
108450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = inputBuf[startPos-1];
108550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c == 0x0a) {
1086f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                    MatchChunkAt(startPos, FALSE, status);
1087f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                    if (U_FAILURE(status)) {
108850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        return FALSE;
108950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
109050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (fMatch) {
109150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        return TRUE;
109250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
109350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
109450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (startPos >= testLen) {
109550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fMatch = FALSE;
109650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
109750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return FALSE;
109850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
109950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_FWD_1(inputBuf, startPos, fActiveLimit);
110050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Note that it's perfectly OK for a pattern to have a zero-length
110150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   match at the end of a string, so we must make sure that the loop
110250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   runs with startPos == testLen the last time through.
1103f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                if  (findProgressInterrupt(startPos, status))
110427f654740f2a26ad62a5c155af9199af9e69b889claireho                    return FALSE;
110550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
110650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
110750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            for (;;) {
110850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = inputBuf[startPos-1];
11091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (isLineTerminator(c)) {
111050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) {
111150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        startPos++;
111250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
1113f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                    MatchChunkAt(startPos, FALSE, status);
1114f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                    if (U_FAILURE(status)) {
111550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        return FALSE;
111650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
111750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (fMatch) {
111850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        return TRUE;
111950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
112050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
112150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (startPos >= testLen) {
112250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fMatch = FALSE;
112350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
112450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return FALSE;
112550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
112650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_FWD_1(inputBuf, startPos, fActiveLimit);
112750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Note that it's perfectly OK for a pattern to have a zero-length
112850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   match at the end of a string, so we must make sure that the loop
112950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   runs with startPos == testLen the last time through.
1130f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius                if  (findProgressInterrupt(startPos, status))
113127f654740f2a26ad62a5c155af9199af9e69b889claireho                    return FALSE;
113250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
113350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
113650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    default:
113750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(FALSE);
1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
114050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    U_ASSERT(FALSE);
114150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return FALSE;
1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
114850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  group()
1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
115150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::group(UErrorCode &status) const {
115250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return group(0, status);
115350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
115450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
115527f654740f2a26ad62a5c155af9199af9e69b889claireho//  Return immutable shallow clone
115627f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::group(UText *dest, int64_t &group_len, UErrorCode &status) const {
115727f654740f2a26ad62a5c155af9199af9e69b889claireho    return group(0, dest, group_len, status);
115850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
115950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
116027f654740f2a26ad62a5c155af9199af9e69b889claireho//  Return immutable shallow clone
116127f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const {
116227f654740f2a26ad62a5c155af9199af9e69b889claireho    group_len = 0;
116327f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
116427f654740f2a26ad62a5c155af9199af9e69b889claireho        return dest;
116527f654740f2a26ad62a5c155af9199af9e69b889claireho    }
116627f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(fDeferredStatus)) {
116727f654740f2a26ad62a5c155af9199af9e69b889claireho        status = fDeferredStatus;
1168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if (fMatch == FALSE) {
116927f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_REGEX_INVALID_STATE;
1170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) {
117127f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
117227f654740f2a26ad62a5c155af9199af9e69b889claireho    }
1173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_FAILURE(status)) {
1175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return dest;
117627f654740f2a26ad62a5c155af9199af9e69b889claireho    }
1177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
117827f654740f2a26ad62a5c155af9199af9e69b889claireho    int64_t s, e;
117927f654740f2a26ad62a5c155af9199af9e69b889claireho    if (groupNum == 0) {
118027f654740f2a26ad62a5c155af9199af9e69b889claireho        s = fMatchStart;
118127f654740f2a26ad62a5c155af9199af9e69b889claireho        e = fMatchEnd;
118227f654740f2a26ad62a5c155af9199af9e69b889claireho    } else {
118327f654740f2a26ad62a5c155af9199af9e69b889claireho        int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1);
118427f654740f2a26ad62a5c155af9199af9e69b889claireho        U_ASSERT(groupOffset < fPattern->fFrameSize);
118527f654740f2a26ad62a5c155af9199af9e69b889claireho        U_ASSERT(groupOffset >= 0);
118627f654740f2a26ad62a5c155af9199af9e69b889claireho        s = fFrame->fExtra[groupOffset];
118727f654740f2a26ad62a5c155af9199af9e69b889claireho        e = fFrame->fExtra[groupOffset+1];
118827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
118950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
119027f654740f2a26ad62a5c155af9199af9e69b889claireho    if (s < 0) {
119127f654740f2a26ad62a5c155af9199af9e69b889claireho        // A capture group wasn't part of the match
119227f654740f2a26ad62a5c155af9199af9e69b889claireho        return utext_clone(dest, fInputText, FALSE, TRUE, &status);
119327f654740f2a26ad62a5c155af9199af9e69b889claireho    }
119427f654740f2a26ad62a5c155af9199af9e69b889claireho    U_ASSERT(s <= e);
119527f654740f2a26ad62a5c155af9199af9e69b889claireho    group_len = e - s;
1196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
119727f654740f2a26ad62a5c155af9199af9e69b889claireho    dest = utext_clone(dest, fInputText, FALSE, TRUE, &status);
119827f654740f2a26ad62a5c155af9199af9e69b889claireho    if (dest)
119927f654740f2a26ad62a5c155af9199af9e69b889claireho        UTEXT_SETNATIVEINDEX(dest, s);
120027f654740f2a26ad62a5c155af9199af9e69b889claireho    return dest;
120127f654740f2a26ad62a5c155af9199af9e69b889claireho}
120250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
120350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::group(int32_t groupNum, UErrorCode &status) const {
120450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString result;
12051b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    int64_t groupStart = start64(groupNum, status);
12061b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    int64_t groupEnd = end64(groupNum, status);
12071b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (U_FAILURE(status) || groupStart == -1 || groupStart == groupEnd) {
120827f654740f2a26ad62a5c155af9199af9e69b889claireho        return result;
120927f654740f2a26ad62a5c155af9199af9e69b889claireho    }
1210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
12111b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    // Get the group length using a utext_extract preflight.
12121b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    //    UText is actually pretty efficient at this when underlying encoding is UTF-16.
12131b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    int32_t length = utext_extract(fInputText, groupStart, groupEnd, NULL, 0, &status);
12141b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (status != U_BUFFER_OVERFLOW_ERROR) {
12151b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        return result;
121650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
12181b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    status = U_ZERO_ERROR;
12191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    UChar *buf = result.getBuffer(length);
12201b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (buf == NULL) {
12211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        status = U_MEMORY_ALLOCATION_ERROR;
122250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
12231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        int32_t extractLength = utext_extract(fInputText, groupStart, groupEnd, buf, length, &status);
12241b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        result.releaseBuffer(extractLength);
12251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        U_ASSERT(length == extractLength);
122650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
12271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    return result;
1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
12301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
123150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
123250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
123350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  appendGroup() -- currently internal only, appends a group to a UText rather
123450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                   than replacing its contents
123550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
123650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
123750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
123850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint64_t RegexMatcher::appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const {
1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
124027f654740f2a26ad62a5c155af9199af9e69b889claireho        return 0;
1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = fDeferredStatus;
124427f654740f2a26ad62a5c155af9199af9e69b889claireho        return 0;
1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
124627f654740f2a26ad62a5c155af9199af9e69b889claireho    int64_t destLen = utext_nativeLength(dest);
1247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
124850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fMatch == FALSE) {
124950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_REGEX_INVALID_STATE;
125050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return utext_replace(dest, destLen, destLen, NULL, 0, &status);
125150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
125250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (groupNum < 0 || groupNum > fPattern->fGroupMap->size()) {
1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_INDEX_OUTOFBOUNDS_ERROR;
125450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return utext_replace(dest, destLen, destLen, NULL, 0, &status);
1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
125750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t s, e;
125850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (groupNum == 0) {
125950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        s = fMatchStart;
126050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        e = fMatchEnd;
126150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
126250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t groupOffset = fPattern->fGroupMap->elementAti(groupNum-1);
126350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(groupOffset < fPattern->fFrameSize);
126450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(groupOffset >= 0);
126550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        s = fFrame->fExtra[groupOffset];
126650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        e = fFrame->fExtra[groupOffset+1];
126750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
126950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (s < 0) {
1270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // A capture group wasn't part of the match
127150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return utext_replace(dest, destLen, destLen, NULL, 0, &status);
127250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
127350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    U_ASSERT(s <= e);
1274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
127550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t deltaLen;
127650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
127750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(e <= fInputLength);
127850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        deltaLen = utext_replace(dest, destLen, destLen, fInputText->chunkContents+s, (int32_t)(e-s), &status);
127950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
128050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t len16;
128150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTEXT_USES_U16(fInputText)) {
128250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            len16 = (int32_t)(e-s);
128350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
128450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UErrorCode lengthStatus = U_ZERO_ERROR;
128550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            len16 = utext_extract(fInputText, s, e, NULL, 0, &lengthStatus);
128650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
128750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar *groupChars = (UChar *)uprv_malloc(sizeof(UChar)*(len16+1));
128827f654740f2a26ad62a5c155af9199af9e69b889claireho        if (groupChars == NULL) {
128927f654740f2a26ad62a5c155af9199af9e69b889claireho            status = U_MEMORY_ALLOCATION_ERROR;
129027f654740f2a26ad62a5c155af9199af9e69b889claireho            return 0;
129127f654740f2a26ad62a5c155af9199af9e69b889claireho        }
129250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_extract(fInputText, s, e, groupChars, len16+1, &status);
1293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
129450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        deltaLen = utext_replace(dest, destLen, destLen, groupChars, len16, &status);
129550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        uprv_free(groupChars);
129650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
129750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return deltaLen;
1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
130450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  groupCount()
1305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
130750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::groupCount() const {
130850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fPattern->fGroupMap->size();
1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
131350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  hasAnchoringBounds()
1314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
131650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::hasAnchoringBounds() const {
131750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fAnchoringBounds;
1318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1319c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
132350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  hasTransparentBounds()
1324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
132650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::hasTransparentBounds() const {
132750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fTransparentBounds;
1328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
133150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
133450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  hitEnd()
1335c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1336c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
133750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::hitEnd() const {
133850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fHitEnd;
1339c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1340c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
134450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  input()
1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
134750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst UnicodeString &RegexMatcher::input() const {
134850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (!fInput) {
134950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status = U_ZERO_ERROR;
135050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t len16;
135150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTEXT_USES_U16(fInputText)) {
135250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            len16 = (int32_t)fInputLength;
135350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
135450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            len16 = utext_extract(fInputText, 0, fInputLength, NULL, 0, &status);
135550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR; // overflow, length status
1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
135750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString *result = new UnicodeString(len16, 0, 0);
1358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
135950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar *inputChars = result->getBuffer(len16);
136050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_extract(fInputText, 0, fInputLength, inputChars, len16, &status); // unterminated warning
136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        result->releaseBuffer(len16);
1362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
136350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (*(const UnicodeString **)&fInput) = result; // pointer assignment, rather than operator=
136450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
136650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *fInput;
136750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
136850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
136950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
137050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
137150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  inputText()
137250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
137350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
137450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUText *RegexMatcher::inputText() const {
137550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fInputText;
137650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
137750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
137850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
137950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
138050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
138150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  getInput() -- like inputText(), but makes a clone or copies into another UText
138250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
138350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
138427f654740f2a26ad62a5c155af9199af9e69b889clairehoUText *RegexMatcher::getInput (UText *dest, UErrorCode &status) const {
138527f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
138627f654740f2a26ad62a5c155af9199af9e69b889claireho        return dest;
138727f654740f2a26ad62a5c155af9199af9e69b889claireho    }
138827f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(fDeferredStatus)) {
138927f654740f2a26ad62a5c155af9199af9e69b889claireho        status = fDeferredStatus;
1390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return dest;
139127f654740f2a26ad62a5c155af9199af9e69b889claireho    }
1392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
139350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (dest) {
139450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
139550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_replace(dest, 0, utext_nativeLength(dest), fInputText->chunkContents, (int32_t)fInputLength, &status);
139650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
139750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t input16Len;
139850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTEXT_USES_U16(fInputText)) {
139950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                input16Len = (int32_t)fInputLength;
140050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
140150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UErrorCode lengthStatus = U_ZERO_ERROR;
140250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                input16Len = utext_extract(fInputText, 0, fInputLength, NULL, 0, &lengthStatus); // buffer overflow error
140350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
140450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(input16Len));
140527f654740f2a26ad62a5c155af9199af9e69b889claireho            if (inputChars == NULL) {
140627f654740f2a26ad62a5c155af9199af9e69b889claireho                return dest;
140727f654740f2a26ad62a5c155af9199af9e69b889claireho            }
1408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
140950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
141050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_extract(fInputText, 0, fInputLength, inputChars, input16Len, &status); // not terminated warning
141150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_ZERO_ERROR;
141250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            utext_replace(dest, 0, utext_nativeLength(dest), inputChars, input16Len, &status);
1413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
141450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            uprv_free(inputChars);
141550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
141650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return dest;
141750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
141850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return utext_clone(NULL, fInputText, FALSE, TRUE, &status);
1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
142350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool compat_SyncMutableUTextContents(UText *ut);
142450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool compat_SyncMutableUTextContents(UText *ut) {
142550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool retVal = FALSE;
1426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
142750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  In the following test, we're really only interested in whether the UText should switch
142850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  between heap and stack allocation.  If length hasn't changed, we won't, so the chunkContents
142950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  will still point to the correct data.
143050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (utext_nativeLength(ut) != ut->nativeIndexingLimit) {
143150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString *us=(UnicodeString *)ut->context;
1432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
143350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Update to the latest length.
143450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // For example, (utext_nativeLength(ut) != ut->nativeIndexingLimit).
143550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t newLength = us->length();
1436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
143750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Update the chunk description.
143850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // The buffer may have switched between stack- and heap-based.
143950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ut->chunkContents    = us->getBuffer();
144050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ut->chunkLength      = newLength;
144150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ut->chunkNativeLimit = newLength;
144250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ut->nativeIndexingLimit = newLength;
144350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        retVal = TRUE;
144450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
144650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return retVal;
144750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
145150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  lookingAt()
1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
145450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::lookingAt(UErrorCode &status) {
1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
145650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = fDeferredStatus;
146050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
146150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
146350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInputUniStrMaybeMutable) {
146450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (compat_SyncMutableUTextContents(fInputText)) {
146550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fInputLength = utext_nativeLength(fInputText);
146650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        reset();
146750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
146850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
146950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    else {
147050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        resetPreserveRegion();
147150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
147250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
147350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchChunkAt((int32_t)fActiveStart, FALSE, status);
147450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
147550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchAt(fActiveStart, FALSE, status);
1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
147750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fMatch;
147850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
147950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
148127f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool RegexMatcher::lookingAt(int64_t start, UErrorCode &status) {
148250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
148350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
148450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
148550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(fDeferredStatus)) {
148650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = fDeferredStatus;
148750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
148850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reset();
1490fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
149150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (start < 0) {
149250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
149350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
149450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
149650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInputUniStrMaybeMutable) {
149750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (compat_SyncMutableUTextContents(fInputText)) {
149850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fInputLength = utext_nativeLength(fInputText);
149950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        reset();
150050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
150350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t nativeStart;
150427f654740f2a26ad62a5c155af9199af9e69b889claireho    nativeStart = start;
150527f654740f2a26ad62a5c155af9199af9e69b889claireho    if (nativeStart < fActiveStart || nativeStart > fActiveLimit) {
150650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
150750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
150850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1509fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
151050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
151150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchChunkAt((int32_t)nativeStart, FALSE, status);
151250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
151350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchAt(nativeStart, FALSE, status);
151450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
151550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fMatch;
1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
152250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//  matches()
1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
152550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::matches(UErrorCode &status) {
152650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
152750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
152850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
152950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(fDeferredStatus)) {
153050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = fDeferredStatus;
153150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
153250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
153450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInputUniStrMaybeMutable) {
153550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (compat_SyncMutableUTextContents(fInputText)) {
153650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fInputLength = utext_nativeLength(fInputText);
153750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        reset();
153850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
153950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
154050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    else {
154150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        resetPreserveRegion();
154250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1543c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
154450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
154550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchChunkAt((int32_t)fActiveStart, TRUE, status);
154650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
154750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchAt(fActiveStart, TRUE, status);
154850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
154950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fMatch;
1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
155327f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool RegexMatcher::matches(int64_t start, UErrorCode &status) {
155450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
155550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
155650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
155750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(fDeferredStatus)) {
155850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = fDeferredStatus;
155950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
156050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reset();
1562fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
156350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (start < 0) {
156450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
156550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
1566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
156850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInputUniStrMaybeMutable) {
156950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (compat_SyncMutableUTextContents(fInputText)) {
157050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fInputLength = utext_nativeLength(fInputText);
157150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        reset();
157250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
157350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
157550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t nativeStart;
157627f654740f2a26ad62a5c155af9199af9e69b889claireho    nativeStart = start;
157727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (nativeStart < fActiveStart || nativeStart > fActiveLimit) {
1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_INDEX_OUTOFBOUNDS_ERROR;
157950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
1580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
158250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (UTEXT_FULL_TEXT_IN_CHUNK(fInputText, fInputLength)) {
158350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchChunkAt((int32_t)nativeStart, TRUE, status);
158450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
158550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        MatchAt(nativeStart, TRUE, status);
158650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
158750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fMatch;
158850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
159450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    pattern
1595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
159750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoconst RegexPattern &RegexMatcher::pattern() const {
159850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *fPattern;
1599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
160350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
160550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    region
1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
160750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
160827f654740f2a26ad62a5c155af9199af9e69b889clairehoRegexMatcher &RegexMatcher::region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status) {
1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
161050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return *this;
1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1612fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
161327f654740f2a26ad62a5c155af9199af9e69b889claireho    if (regionStart>regionLimit || regionStart<0 || regionLimit<0) {
161450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1616fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
161727f654740f2a26ad62a5c155af9199af9e69b889claireho    int64_t nativeStart = regionStart;
161827f654740f2a26ad62a5c155af9199af9e69b889claireho    int64_t nativeLimit = regionLimit;
161927f654740f2a26ad62a5c155af9199af9e69b889claireho    if (nativeStart > fInputLength || nativeLimit > fInputLength) {
162027f654740f2a26ad62a5c155af9199af9e69b889claireho      status = U_ILLEGAL_ARGUMENT_ERROR;
1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
162250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
162327f654740f2a26ad62a5c155af9199af9e69b889claireho    if (startIndex == -1)
162427f654740f2a26ad62a5c155af9199af9e69b889claireho      this->reset();
162527f654740f2a26ad62a5c155af9199af9e69b889claireho    else
1626fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      resetPreserveRegion();
1627fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
162850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fRegionStart = nativeStart;
162950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fRegionLimit = nativeLimit;
163050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fActiveStart = nativeStart;
163150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fActiveLimit = nativeLimit;
163227f654740f2a26ad62a5c155af9199af9e69b889claireho
163327f654740f2a26ad62a5c155af9199af9e69b889claireho    if (startIndex != -1) {
163427f654740f2a26ad62a5c155af9199af9e69b889claireho      if (startIndex < fActiveStart || startIndex > fActiveLimit) {
163527f654740f2a26ad62a5c155af9199af9e69b889claireho          status = U_INDEX_OUTOFBOUNDS_ERROR;
163627f654740f2a26ad62a5c155af9199af9e69b889claireho      }
1637fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius      fMatchEnd = startIndex;
163827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
163927f654740f2a26ad62a5c155af9199af9e69b889claireho
164050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (!fTransparentBounds) {
164150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fLookStart = nativeStart;
164250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fLookLimit = nativeLimit;
164350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
164450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fAnchoringBounds) {
164550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fAnchorStart = nativeStart;
164650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fAnchorLimit = nativeLimit;
164750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
164850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
165127f654740f2a26ad62a5c155af9199af9e69b889clairehoRegexMatcher &RegexMatcher::region(int64_t start, int64_t limit, UErrorCode &status) {
165227f654740f2a26ad62a5c155af9199af9e69b889claireho  return region(start, limit, -1, status);
165327f654740f2a26ad62a5c155af9199af9e69b889claireho}
1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
165750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    regionEnd
1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
166050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::regionEnd() const {
166127f654740f2a26ad62a5c155af9199af9e69b889claireho    return (int32_t)fRegionLimit;
1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
166427f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::regionEnd64() const {
166527f654740f2a26ad62a5c155af9199af9e69b889claireho    return fRegionLimit;
166627f654740f2a26ad62a5c155af9199af9e69b889claireho}
1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
167050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    regionStart
1671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
167350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::regionStart() const {
167427f654740f2a26ad62a5c155af9199af9e69b889claireho    return (int32_t)fRegionStart;
167527f654740f2a26ad62a5c155af9199af9e69b889claireho}
167627f654740f2a26ad62a5c155af9199af9e69b889claireho
167727f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::regionStart64() const {
167827f654740f2a26ad62a5c155af9199af9e69b889claireho    return fRegionStart;
1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1682c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1683c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
168450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    replaceAll
1685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
168750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::replaceAll(const UnicodeString &replacement, UErrorCode &status) {
168850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText replacementText = UTEXT_INITIALIZER;
168950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText resultText = UTEXT_INITIALIZER;
169050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString resultString;
169127f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
169227f654740f2a26ad62a5c155af9199af9e69b889claireho        return resultString;
169327f654740f2a26ad62a5c155af9199af9e69b889claireho    }
1694fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
169550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openConstUnicodeString(&replacementText, &replacement, &status);
169650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUnicodeString(&resultText, &resultString, &status);
1697fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
169850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    replaceAll(&replacementText, &resultText, status);
1699c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
170050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&resultText);
170150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&replacementText);
1702fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
170350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return resultString;
1704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
170650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1707c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
170850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    replaceAll, UText mode
1709c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
171050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUText *RegexMatcher::replaceAll(UText *replacement, UText *dest, UErrorCode &status) {
1711c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
171227f654740f2a26ad62a5c155af9199af9e69b889claireho        return dest;
1713c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
1715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = fDeferredStatus;
171627f654740f2a26ad62a5c155af9199af9e69b889claireho        return dest;
1717c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1718fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
171950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (dest == NULL) {
172050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString emptyString;
172150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText empty = UTEXT_INITIALIZER;
1722fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
172350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUnicodeString(&empty, &emptyString, &status);
172450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dest = utext_clone(NULL, &empty, TRUE, FALSE, &status);
172550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&empty);
1726c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
172750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
172850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_SUCCESS(status)) {
172950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        reset();
173050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while (find()) {
173150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            appendReplacement(dest, replacement, status);
173250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (U_FAILURE(status)) {
173350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
173450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
173550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
173627f654740f2a26ad62a5c155af9199af9e69b889claireho        appendTail(dest, status);
173750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1738fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
173950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return dest;
1740c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1741c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1742c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1743c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1744c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
174550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    replaceFirst
1746c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
174850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUnicodeString RegexMatcher::replaceFirst(const UnicodeString &replacement, UErrorCode &status) {
174950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText replacementText = UTEXT_INITIALIZER;
175050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText resultText = UTEXT_INITIALIZER;
175150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString resultString;
1752fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
175350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openConstUnicodeString(&replacementText, &replacement, &status);
175450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openUnicodeString(&resultText, &resultString, &status);
1755fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
175650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    replaceFirst(&replacementText, &resultText, status);
1757fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
175850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&resultText);
175950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&replacementText);
1760fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
176150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return resultString;
1762c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1763c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1764c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
176550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    replaceFirst, UText mode
1766c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
176750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUText *RegexMatcher::replaceFirst(UText *replacement, UText *dest, UErrorCode &status) {
1768c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
176927f654740f2a26ad62a5c155af9199af9e69b889claireho        return dest;
1770c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1771c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(fDeferredStatus)) {
1772c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = fDeferredStatus;
177327f654740f2a26ad62a5c155af9199af9e69b889claireho        return dest;
1774c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
177550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1776c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    reset();
177750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (!find()) {
177827f654740f2a26ad62a5c155af9199af9e69b889claireho        return getInput(dest, status);
177950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
1780fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
178150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (dest == NULL) {
178250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString emptyString;
178350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText empty = UTEXT_INITIALIZER;
1784fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
178550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_openUnicodeString(&empty, &emptyString, &status);
178650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        dest = utext_clone(NULL, &empty, TRUE, FALSE, &status);
178750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(&empty);
1788c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1789fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
179050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    appendReplacement(dest, replacement, status);
179127f654740f2a26ad62a5c155af9199af9e69b889claireho    appendTail(dest, status);
1792fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
179350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return dest;
1794c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1796c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1797c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
179950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     requireEnd
1800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
180250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::requireEnd() const {
180350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fRequireEnd;
1804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1808c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
180950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     reset
1810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
181250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::reset() {
181350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fRegionStart    = 0;
181450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fRegionLimit    = fInputLength;
181550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fActiveStart    = 0;
181650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fActiveLimit    = fInputLength;
181750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fAnchorStart    = 0;
181850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fAnchorLimit    = fInputLength;
181950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fLookStart      = 0;
182050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fLookLimit      = fInputLength;
182150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    resetPreserveRegion();
182250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
182350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
182450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
182550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
182650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
182750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::resetPreserveRegion() {
182850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fMatchStart     = 0;
182950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fMatchEnd       = 0;
183050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fLastMatchEnd   = -1;
183150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fAppendPosition = 0;
183250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fMatch          = FALSE;
183350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fHitEnd         = FALSE;
183450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fRequireEnd     = FALSE;
183550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTime           = 0;
183650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTickCounter    = TIMER_INITIAL_VALUE;
183750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //resetStack(); // more expensive than it looks...
183850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
183950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
184050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
184150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::reset(const UnicodeString &input) {
184250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInputText = utext_openConstUnicodeString(fInputText, &input, &fDeferredStatus);
184327f654740f2a26ad62a5c155af9199af9e69b889claireho    if (fPattern->fNeedsAltInput) {
184427f654740f2a26ad62a5c155af9199af9e69b889claireho        fAltInputText = utext_clone(fAltInputText, fInputText, FALSE, TRUE, &fDeferredStatus);
184527f654740f2a26ad62a5c155af9199af9e69b889claireho    }
18461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (U_FAILURE(fDeferredStatus)) {
18471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        return *this;
18481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
184950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInputLength = utext_nativeLength(fInputText);
1850fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
185150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reset();
185250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete fInput;
185350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInput = NULL;
185450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
185550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Do the following for any UnicodeString.
185650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  This is for compatibility for those clients who modify the input string "live" during regex operations.
1857fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    fInputUniStrMaybeMutable = TRUE;
1858fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
185950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fWordBreakItr != NULL) {
186050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==0
186150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode status = U_ZERO_ERROR;
186250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fWordBreakItr->setText(fInputText, status);
186350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
186450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
186550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
186650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
186750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
186850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
186950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::reset(UText *input) {
187050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fInputText != input) {
187150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fInputText = utext_clone(fInputText, input, FALSE, TRUE, &fDeferredStatus);
187250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fPattern->fNeedsAltInput) fAltInputText = utext_clone(fAltInputText, fInputText, FALSE, TRUE, &fDeferredStatus);
18731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        if (U_FAILURE(fDeferredStatus)) {
18741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            return *this;
18751b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
187650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fInputLength = utext_nativeLength(fInputText);
1877fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
187850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete fInput;
187950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fInput = NULL;
1880fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
188150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fWordBreakItr != NULL) {
188250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==0
188350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UErrorCode status = U_ZERO_ERROR;
188450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fWordBreakItr->setText(input, status);
188550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
188650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
188750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
188850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reset();
188950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fInputUniStrMaybeMutable = FALSE;
189050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
189150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
189250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
189350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
189450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*RegexMatcher &RegexMatcher::reset(const UChar *) {
189550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fDeferredStatus = U_INTERNAL_PROGRAM_ERROR;
189650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
189750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}*/
189850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
189927f654740f2a26ad62a5c155af9199af9e69b889clairehoRegexMatcher &RegexMatcher::reset(int64_t position, UErrorCode &status) {
190050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
190150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return *this;
190250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
190350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reset();       // Reset also resets the region to be the entire string.
1904fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
190527f654740f2a26ad62a5c155af9199af9e69b889claireho    if (position < 0 || position > fActiveLimit) {
190650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
190750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return *this;
190850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
190927f654740f2a26ad62a5c155af9199af9e69b889claireho    fMatchEnd = position;
191050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
1911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
191327f654740f2a26ad62a5c155af9199af9e69b889claireho
19142e615e9896b12236afe0ff2695e8afc2ee73f961claireho//--------------------------------------------------------------------------------
19152e615e9896b12236afe0ff2695e8afc2ee73f961claireho//
19162e615e9896b12236afe0ff2695e8afc2ee73f961claireho//    refresh
19172e615e9896b12236afe0ff2695e8afc2ee73f961claireho//
19182e615e9896b12236afe0ff2695e8afc2ee73f961claireho//--------------------------------------------------------------------------------
19192e615e9896b12236afe0ff2695e8afc2ee73f961clairehoRegexMatcher &RegexMatcher::refreshInputText(UText *input, UErrorCode &status) {
19202e615e9896b12236afe0ff2695e8afc2ee73f961claireho    if (U_FAILURE(status)) {
19212e615e9896b12236afe0ff2695e8afc2ee73f961claireho        return *this;
19222e615e9896b12236afe0ff2695e8afc2ee73f961claireho    }
19232e615e9896b12236afe0ff2695e8afc2ee73f961claireho    if (input == NULL) {
19242e615e9896b12236afe0ff2695e8afc2ee73f961claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
19252e615e9896b12236afe0ff2695e8afc2ee73f961claireho        return *this;
19262e615e9896b12236afe0ff2695e8afc2ee73f961claireho    }
19272e615e9896b12236afe0ff2695e8afc2ee73f961claireho    if (utext_nativeLength(fInputText) != utext_nativeLength(input)) {
19282e615e9896b12236afe0ff2695e8afc2ee73f961claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
19292e615e9896b12236afe0ff2695e8afc2ee73f961claireho        return *this;
19302e615e9896b12236afe0ff2695e8afc2ee73f961claireho    }
19312e615e9896b12236afe0ff2695e8afc2ee73f961claireho    int64_t  pos = utext_getNativeIndex(fInputText);
19322e615e9896b12236afe0ff2695e8afc2ee73f961claireho    //  Shallow read-only clone of the new UText into the existing input UText
19332e615e9896b12236afe0ff2695e8afc2ee73f961claireho    fInputText = utext_clone(fInputText, input, FALSE, TRUE, &status);
19342e615e9896b12236afe0ff2695e8afc2ee73f961claireho    if (U_FAILURE(status)) {
19352e615e9896b12236afe0ff2695e8afc2ee73f961claireho        return *this;
19362e615e9896b12236afe0ff2695e8afc2ee73f961claireho    }
19372e615e9896b12236afe0ff2695e8afc2ee73f961claireho    utext_setNativeIndex(fInputText, pos);
1938c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
19392e615e9896b12236afe0ff2695e8afc2ee73f961claireho    if (fAltInputText != NULL) {
19402e615e9896b12236afe0ff2695e8afc2ee73f961claireho        pos = utext_getNativeIndex(fAltInputText);
19412e615e9896b12236afe0ff2695e8afc2ee73f961claireho        fAltInputText = utext_clone(fAltInputText, input, FALSE, TRUE, &status);
19422e615e9896b12236afe0ff2695e8afc2ee73f961claireho        if (U_FAILURE(status)) {
19432e615e9896b12236afe0ff2695e8afc2ee73f961claireho            return *this;
19442e615e9896b12236afe0ff2695e8afc2ee73f961claireho        }
19452e615e9896b12236afe0ff2695e8afc2ee73f961claireho        utext_setNativeIndex(fAltInputText, pos);
19462e615e9896b12236afe0ff2695e8afc2ee73f961claireho    }
19472e615e9896b12236afe0ff2695e8afc2ee73f961claireho    return *this;
19482e615e9896b12236afe0ff2695e8afc2ee73f961claireho}
1949b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
195050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
195150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
1953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
195450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    setTrace
1955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
1956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//--------------------------------------------------------------------------------
195750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setTrace(UBool state) {
195850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTraceDebug = state;
1959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1960c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
196250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
19631b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert/**
19641b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  *  UText, replace entire contents of the destination UText with a substring of the source UText.
19651b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  *
19661b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  *     @param src    The source UText
19671b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  *     @param dest   The destination UText. Must be writable.
19681b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  *                   May be NULL, in which case a new UText will be allocated.
19691b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  *     @param start  Start index of source substring.
19701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  *     @param limit  Limit index of source substring.
19711b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  *     @param status An error code.
19721b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  */
19731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubertstatic UText *utext_extract_replace(UText *src, UText *dest, int64_t start, int64_t limit, UErrorCode *status) {
19741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (U_FAILURE(*status)) {
19751b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        return dest;
19761b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
19771b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (start == limit) {
19781b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        if (dest) {
19791b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            utext_replace(dest, 0, utext_nativeLength(dest), NULL, 0, status);
19801b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            return dest;
19811b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        } else {
19821b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            return utext_openUChars(NULL, NULL, 0, status);
19831b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
19841b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
19851b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    int32_t length = utext_extract(src, start, limit, NULL, 0, status);
19861b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (*status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(*status)) {
19871b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        return dest;
19881b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
19891b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    *status = U_ZERO_ERROR;
19901b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    MaybeStackArray<UChar, 40> buffer;
19911b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (length >= buffer.getCapacity()) {
19921b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        UChar *newBuf = buffer.resize(length+1);   // Leave space for terminating Nul.
19931b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        if (newBuf == NULL) {
19941b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            *status = U_MEMORY_ALLOCATION_ERROR;
19951b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
19961b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
19971b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    utext_extract(src, start, limit, buffer.getAlias(), length+1, status);
19981b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (dest) {
19991b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        utext_replace(dest, 0, utext_nativeLength(dest), buffer.getAlias(), length, status);
20001b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        return dest;
20011b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
20021b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
20031b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    // Caller did not provide a prexisting UText.
20041b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    // Open a new one, and have it adopt the text buffer storage.
20051b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (U_FAILURE(*status)) {
20061b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        return NULL;
20071b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
20081b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    int32_t ownedLength = 0;
20091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    UChar *ownedBuf = buffer.orphanOrClone(length+1, ownedLength);
20101b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (ownedBuf == NULL) {
20111b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        *status = U_MEMORY_ALLOCATION_ERROR;
20121b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        return NULL;
20131b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
20141b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    UText *result = utext_openUChars(NULL, ownedBuf, length, status);
20151b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if (U_FAILURE(*status)) {
20161b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        uprv_free(ownedBuf);
20171b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        return NULL;
20181b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
20191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    result->providerProperties |= (1 << UTEXT_PROVIDER_OWNS_TEXT);
20201b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    return result;
20211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert}
20221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
20231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
202450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------
2025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
202650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   split
2027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
202850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//---------------------------------------------------------------------
202950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t  RegexMatcher::split(const UnicodeString &input,
203050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString    dest[],
203150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t          destCapacity,
203250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode      &status)
203350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{
203450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UText inputText = UTEXT_INITIALIZER;
203550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_openConstUnicodeString(&inputText, &input, &status);
203627f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
203727f654740f2a26ad62a5c155af9199af9e69b889claireho        return 0;
203827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
203950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
204027f654740f2a26ad62a5c155af9199af9e69b889claireho    UText **destText = (UText **)uprv_malloc(sizeof(UText*)*destCapacity);
204127f654740f2a26ad62a5c155af9199af9e69b889claireho    if (destText == NULL) {
204227f654740f2a26ad62a5c155af9199af9e69b889claireho        status = U_MEMORY_ALLOCATION_ERROR;
204327f654740f2a26ad62a5c155af9199af9e69b889claireho        return 0;
204427f654740f2a26ad62a5c155af9199af9e69b889claireho    }
204550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t i;
204650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i = 0; i < destCapacity; i++) {
204750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        destText[i] = utext_openUnicodeString(NULL, &dest[i], &status);
204850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
2049fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
205050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t fieldCount = split(&inputText, destText, destCapacity, status);
2051fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
205250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i = 0; i < destCapacity; i++) {
205350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        utext_close(destText[i]);
205450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
2055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
205627f654740f2a26ad62a5c155af9199af9e69b889claireho    uprv_free(destText);
205750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    utext_close(&inputText);
205850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fieldCount;
205950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
2060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
206250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   split, UText mode
206350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
206450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t  RegexMatcher::split(UText *input,
206550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UText           *dest[],
206650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t          destCapacity,
206750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode      &status)
206850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho{
206950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
207050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Check arguements for validity
207150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
207250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
207350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
207450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    };
207550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
207650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (destCapacity < 1) {
207750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
207850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
207950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
208050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
208150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
208250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Reset for the input text
208350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
208450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reset(input);
208550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t   nextOutputStringStart = 0;
208650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fActiveLimit == 0) {
208750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return 0;
208850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
208950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
209050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
209150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Loop through the input text, searching for the delimiter pattern
209250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
209350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t i;
209450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t numCaptureGroups = fPattern->fGroupMap->size();
209550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i=0; ; i++) {
209650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (i>=destCapacity-1) {
209750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // There is one or zero output string left.
209850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Fill the last output string with whatever is left from the input, then exit the loop.
209950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  ( i will be == destCapacity if we filled the output array while processing
210050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //    capture groups of the delimiter expression, in which case we will discard the
210150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //    last capture group saved in favor of the unprocessed remainder of the
210250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //    input string.)
210350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            i = destCapacity-1;
210450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fActiveLimit > nextOutputStringStart) {
210550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) {
210650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (dest[i]) {
2107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        utext_replace(dest[i], 0, utext_nativeLength(dest[i]),
2108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                      input->chunkContents+nextOutputStringStart,
210927f654740f2a26ad62a5c155af9199af9e69b889claireho                                      (int32_t)(fActiveLimit-nextOutputStringStart), &status);
211050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
211150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UText remainingText = UTEXT_INITIALIZER;
2112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart,
211327f654740f2a26ad62a5c155af9199af9e69b889claireho                                         fActiveLimit-nextOutputStringStart, &status);
211450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
211550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        utext_close(&remainingText);
211650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
211750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
211850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UErrorCode lengthStatus = U_ZERO_ERROR;
2119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    int32_t remaining16Length =
212027f654740f2a26ad62a5c155af9199af9e69b889claireho                        utext_extract(input, nextOutputStringStart, fActiveLimit, NULL, 0, &lengthStatus);
212150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1));
212227f654740f2a26ad62a5c155af9199af9e69b889claireho                    if (remainingChars == NULL) {
212327f654740f2a26ad62a5c155af9199af9e69b889claireho                        status = U_MEMORY_ALLOCATION_ERROR;
212427f654740f2a26ad62a5c155af9199af9e69b889claireho                        break;
212527f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
212627f654740f2a26ad62a5c155af9199af9e69b889claireho
212750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_extract(input, nextOutputStringStart, fActiveLimit, remainingChars, remaining16Length+1, &status);
212850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (dest[i]) {
212950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status);
213050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
213150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UText remainingText = UTEXT_INITIALIZER;
213250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        utext_openUChars(&remainingText, remainingChars, remaining16Length, &status);
213350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
213450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        utext_close(&remainingText);
213550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
2136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
213750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    uprv_free(remainingChars);
213850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
213950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
214050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
214150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
214250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (find()) {
214350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // We found another delimiter.  Move everything from where we started looking
214450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  up until the start of the delimiter into the next output string.
214550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) {
214650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (dest[i]) {
2147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    utext_replace(dest[i], 0, utext_nativeLength(dest[i]),
2148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                  input->chunkContents+nextOutputStringStart,
214927f654740f2a26ad62a5c155af9199af9e69b889claireho                                  (int32_t)(fMatchStart-nextOutputStringStart), &status);
215050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
215150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UText remainingText = UTEXT_INITIALIZER;
2152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart,
215327f654740f2a26ad62a5c155af9199af9e69b889claireho                                      fMatchStart-nextOutputStringStart, &status);
215450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
215550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_close(&remainingText);
215650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
215750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
215850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UErrorCode lengthStatus = U_ZERO_ERROR;
215950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t remaining16Length = utext_extract(input, nextOutputStringStart, fMatchStart, NULL, 0, &lengthStatus);
216050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1));
216127f654740f2a26ad62a5c155af9199af9e69b889claireho                if (remainingChars == NULL) {
216227f654740f2a26ad62a5c155af9199af9e69b889claireho                    status = U_MEMORY_ALLOCATION_ERROR;
216327f654740f2a26ad62a5c155af9199af9e69b889claireho                    break;
216427f654740f2a26ad62a5c155af9199af9e69b889claireho                }
216550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                utext_extract(input, nextOutputStringStart, fMatchStart, remainingChars, remaining16Length+1, &status);
216650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (dest[i]) {
216750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status);
216850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
216950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UText remainingText = UTEXT_INITIALIZER;
217050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_openUChars(&remainingText, remainingChars, remaining16Length, &status);
217150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
217250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_close(&remainingText);
217350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
2174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
217550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                uprv_free(remainingChars);
217650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
217750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            nextOutputStringStart = fMatchEnd;
217850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
217950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // If the delimiter pattern has capturing parentheses, the captured
218050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  text goes out into the next n destination strings.
218150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t groupNum;
218250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
2183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (i >= destCapacity-2) {
2184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // Never fill the last available output string with capture group text.
2185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // It will filled with the last field, the remainder of the
2186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    //  unsplit input text.
218750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
218850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
218950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                i++;
21901b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                dest[i] = utext_extract_replace(fInputText, dest[i],
21911b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                                               start64(groupNum, status), end64(groupNum, status), &status);
219250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
219350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
219450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (nextOutputStringStart == fActiveLimit) {
2195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // The delimiter was at the end of the string.  We're done, but first
2196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // we output one last empty string, for the empty field following
2197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                //   the delimiter at the end of input.
2198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if (i+1 < destCapacity) {
2199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    ++i;
2200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if (dest[i] == NULL) {
2201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        dest[i] = utext_openUChars(NULL, NULL, 0, &status);
2202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    } else {
2203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        static UChar emptyString[] = {(UChar)0};
2204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        utext_replace(dest[i], 0, utext_nativeLength(dest[i]), emptyString, 0, &status);
2205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
220650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
2207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                break;
2208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
2209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
221050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
221150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        else
221250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        {
221350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // We ran off the end of the input while looking for the next delimiter.
221450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // All the remaining text goes into the current output string.
221550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (UTEXT_FULL_TEXT_IN_CHUNK(input, fInputLength)) {
221650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (dest[i]) {
2217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    utext_replace(dest[i], 0, utext_nativeLength(dest[i]),
2218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                  input->chunkContents+nextOutputStringStart,
221927f654740f2a26ad62a5c155af9199af9e69b889claireho                                  (int32_t)(fActiveLimit-nextOutputStringStart), &status);
222050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
222150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UText remainingText = UTEXT_INITIALIZER;
2222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    utext_openUChars(&remainingText, input->chunkContents+nextOutputStringStart,
222327f654740f2a26ad62a5c155af9199af9e69b889claireho                                     fActiveLimit-nextOutputStringStart, &status);
222450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
222550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_close(&remainingText);
222650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
222750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
222850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UErrorCode lengthStatus = U_ZERO_ERROR;
222950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t remaining16Length = utext_extract(input, nextOutputStringStart, fActiveLimit, NULL, 0, &lengthStatus);
223050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar *remainingChars = (UChar *)uprv_malloc(sizeof(UChar)*(remaining16Length+1));
223127f654740f2a26ad62a5c155af9199af9e69b889claireho                if (remainingChars == NULL) {
223227f654740f2a26ad62a5c155af9199af9e69b889claireho                    status = U_MEMORY_ALLOCATION_ERROR;
223327f654740f2a26ad62a5c155af9199af9e69b889claireho                    break;
223427f654740f2a26ad62a5c155af9199af9e69b889claireho                }
2235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
223650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                utext_extract(input, nextOutputStringStart, fActiveLimit, remainingChars, remaining16Length+1, &status);
223750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (dest[i]) {
223850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_replace(dest[i], 0, utext_nativeLength(dest[i]), remainingChars, remaining16Length, &status);
223950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
224050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UText remainingText = UTEXT_INITIALIZER;
224150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_openUChars(&remainingText, remainingChars, remaining16Length, &status);
224250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    dest[i] = utext_clone(NULL, &remainingText, TRUE, FALSE, &status);
224350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utext_close(&remainingText);
224450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
2245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
224650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                uprv_free(remainingChars);
224750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
224850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
224950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
225027f654740f2a26ad62a5c155af9199af9e69b889claireho        if (U_FAILURE(status)) {
225127f654740f2a26ad62a5c155af9199af9e69b889claireho            break;
225227f654740f2a26ad62a5c155af9199af9e69b889claireho        }
225327f654740f2a26ad62a5c155af9199af9e69b889claireho    }   // end of for loop
225450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return i+1;
225550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
225650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
225750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
225850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
225950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
226050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     start
226150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
226250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
226350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::start(UErrorCode &status) const {
226450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return start(0, status);
226550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
226650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
226727f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::start64(UErrorCode &status) const {
226827f654740f2a26ad62a5c155af9199af9e69b889claireho    return start64(0, status);
226927f654740f2a26ad62a5c155af9199af9e69b889claireho}
227050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
227150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
227250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
227350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     start(int32_t group, UErrorCode &status)
227450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
227550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
227627f654740f2a26ad62a5c155af9199af9e69b889claireho
227727f654740f2a26ad62a5c155af9199af9e69b889clairehoint64_t RegexMatcher::start64(int32_t group, UErrorCode &status) const {
227850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
227950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return -1;
228050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
228150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(fDeferredStatus)) {
228250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = fDeferredStatus;
228350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return -1;
228450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
228550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fMatch == FALSE) {
228650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_REGEX_INVALID_STATE;
228750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return -1;
228850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
228950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (group < 0 || group > fPattern->fGroupMap->size()) {
229050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_INDEX_OUTOFBOUNDS_ERROR;
229150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return -1;
229250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
229350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t s;
229450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (group == 0) {
2295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        s = fMatchStart;
229650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
229750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t groupOffset = fPattern->fGroupMap->elementAti(group-1);
229850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(groupOffset < fPattern->fFrameSize);
229950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U_ASSERT(groupOffset >= 0);
230050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        s = fFrame->fExtra[groupOffset];
230150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
2302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
230327f654740f2a26ad62a5c155af9199af9e69b889claireho    return s;
230450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
230550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
230650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
230727f654740f2a26ad62a5c155af9199af9e69b889clairehoint32_t RegexMatcher::start(int32_t group, UErrorCode &status) const {
230827f654740f2a26ad62a5c155af9199af9e69b889claireho    return (int32_t)start64(group, status);
230927f654740f2a26ad62a5c155af9199af9e69b889claireho}
231050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
231150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
231250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
231350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     useAnchoringBounds
231450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
231550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
231650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::useAnchoringBounds(UBool b) {
231750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fAnchoringBounds = b;
231850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fAnchorStart = (fAnchoringBounds ? fRegionStart : 0);
231950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fAnchorLimit = (fAnchoringBounds ? fRegionLimit : fInputLength);
232050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
232150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
232250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
232350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
232450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
232550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
232650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     useTransparentBounds
232750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
232850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
232950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoRegexMatcher &RegexMatcher::useTransparentBounds(UBool b) {
233050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTransparentBounds = b;
233150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fLookStart = (fTransparentBounds ? 0 : fRegionStart);
233250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fLookLimit = (fTransparentBounds ? fInputLength : fRegionLimit);
233350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return *this;
233450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
233550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
233650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
233750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
233850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     setTimeLimit
233950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
234050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
234150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setTimeLimit(int32_t limit, UErrorCode &status) {
234250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
234350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
234450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
234550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(fDeferredStatus)) {
234650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = fDeferredStatus;
234750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
234850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
234950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (limit < 0) {
235050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
235150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
235250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
235350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTimeLimit = limit;
235450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
235550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
235650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
235750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
235850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
235950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     getTimeLimit
236050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
236150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
236250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::getTimeLimit() const {
236350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fTimeLimit;
236450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
236550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
236650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
236750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
236850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
236950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     setStackLimit
237050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
237150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
237250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setStackLimit(int32_t limit, UErrorCode &status) {
237350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
237450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
237550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
237650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(fDeferredStatus)) {
237750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = fDeferredStatus;
237850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
237950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
238050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (limit < 0) {
238150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_ILLEGAL_ARGUMENT_ERROR;
238250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
238350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
2384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
238550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Reset the matcher.  This is needed here in case there is a current match
2386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    //    whose final stack frame (containing the match results, pointed to by fFrame)
238750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //    would be lost by resizing to a smaller stack size.
238850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reset();
2389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
239050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (limit == 0) {
239150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Unlimited stack expansion
239250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fStack->setMaxCapacity(0);
239350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
239450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Change the units of the limit  from bytes to ints, and bump the size up
2395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        //   to be big enough to hold at least one stack frame for the pattern,
239650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   if it isn't there already.
239750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t adjustedLimit = limit / sizeof(int32_t);
239850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (adjustedLimit < fPattern->fFrameSize) {
239950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            adjustedLimit = fPattern->fFrameSize;
240050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
240150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fStack->setMaxCapacity(adjustedLimit);
240250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
240350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fStackLimit = limit;
240450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
240550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
240650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
240750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
240850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
240950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     getStackLimit
241050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
241150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
241250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoint32_t RegexMatcher::getStackLimit() const {
241350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return fStackLimit;
241450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
241550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
241650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
241750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
241850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
241950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     setMatchCallback
242050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
242150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
242250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::setMatchCallback(URegexMatchCallback     *callback,
242350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    const void              *context,
242450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                    UErrorCode              &status) {
242550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
242650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
242750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
242850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fCallbackFn = callback;
242950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fCallbackContext = context;
243050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
243150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
243250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
243350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
243450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
243550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//     getMatchCallback
243650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
243750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
243850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::getMatchCallback(URegexMatchCallback   *&callback,
243950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  const void              *&context,
244050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                  UErrorCode              &status) {
244150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
244250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       return;
244350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
244450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    callback = fCallbackFn;
244550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    context  = fCallbackContext;
244650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
244750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
244850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
244927f654740f2a26ad62a5c155af9199af9e69b889claireho//--------------------------------------------------------------------------------
245027f654740f2a26ad62a5c155af9199af9e69b889claireho//
245127f654740f2a26ad62a5c155af9199af9e69b889claireho//     setMatchCallback
245227f654740f2a26ad62a5c155af9199af9e69b889claireho//
245327f654740f2a26ad62a5c155af9199af9e69b889claireho//--------------------------------------------------------------------------------
245427f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexMatcher::setFindProgressCallback(URegexFindProgressCallback      *callback,
245527f654740f2a26ad62a5c155af9199af9e69b889claireho                                                const void                      *context,
245627f654740f2a26ad62a5c155af9199af9e69b889claireho                                                UErrorCode                      &status) {
245727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
245827f654740f2a26ad62a5c155af9199af9e69b889claireho        return;
245927f654740f2a26ad62a5c155af9199af9e69b889claireho    }
246027f654740f2a26ad62a5c155af9199af9e69b889claireho    fFindProgressCallbackFn = callback;
246127f654740f2a26ad62a5c155af9199af9e69b889claireho    fFindProgressCallbackContext = context;
246227f654740f2a26ad62a5c155af9199af9e69b889claireho}
246327f654740f2a26ad62a5c155af9199af9e69b889claireho
246427f654740f2a26ad62a5c155af9199af9e69b889claireho
246527f654740f2a26ad62a5c155af9199af9e69b889claireho//--------------------------------------------------------------------------------
246627f654740f2a26ad62a5c155af9199af9e69b889claireho//
246727f654740f2a26ad62a5c155af9199af9e69b889claireho//     getMatchCallback
246827f654740f2a26ad62a5c155af9199af9e69b889claireho//
246927f654740f2a26ad62a5c155af9199af9e69b889claireho//--------------------------------------------------------------------------------
247027f654740f2a26ad62a5c155af9199af9e69b889clairehovoid RegexMatcher::getFindProgressCallback(URegexFindProgressCallback    *&callback,
247127f654740f2a26ad62a5c155af9199af9e69b889claireho                                                const void                    *&context,
247227f654740f2a26ad62a5c155af9199af9e69b889claireho                                                UErrorCode                    &status) {
247327f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) {
247427f654740f2a26ad62a5c155af9199af9e69b889claireho       return;
247527f654740f2a26ad62a5c155af9199af9e69b889claireho    }
247627f654740f2a26ad62a5c155af9199af9e69b889claireho    callback = fFindProgressCallbackFn;
247727f654740f2a26ad62a5c155af9199af9e69b889claireho    context  = fFindProgressCallbackContext;
247827f654740f2a26ad62a5c155af9199af9e69b889claireho}
247927f654740f2a26ad62a5c155af9199af9e69b889claireho
248027f654740f2a26ad62a5c155af9199af9e69b889claireho
248150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//================================================================================
248250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
248350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    Code following this point in this file is the internal
248450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    Match Engine Implementation.
248550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
248650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//================================================================================
248750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
248850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
248950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
249050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
249150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   resetStack
249250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//           Discard any previous contents of the state save stack, and initialize a
2493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius//           new stack frame to all -1.  The -1s are needed for capture group limits,
249450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//           where they indicate that a group has not yet matched anything.
249550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
249650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoREStackFrame *RegexMatcher::resetStack() {
249750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Discard any previous contents of the state save stack, and initialize a
249850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  new stack frame with all -1 data.  The -1s are needed for capture group limits,
249950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  where they indicate that a group has not yet matched anything.
250050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fStack->removeAllElements();
250150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
250250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REStackFrame *iFrame = (REStackFrame *)fStack->reserveBlock(fPattern->fFrameSize, fDeferredStatus);
250350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t i;
250450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i=0; i<fPattern->fFrameSize-RESTACKFRAME_HDRCOUNT; i++) {
250550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        iFrame->fExtra[i] = -1;
250650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
250750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return iFrame;
250850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
250950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
251050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
251150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
251250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
251350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
2514fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius//   isWordBoundary
251550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     in perl, "xab..cd..", \b is true at positions 0,3,5,7
251650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     For us,
251750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                       If the current char is a combining mark,
251850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                          \b is FALSE.
251950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                       Else Scan backwards to the first non-combining char.
252050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                            We are at a boundary if the this char and the original chars are
252150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                               opposite in membership in \w set
252250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
252350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//          parameters:   pos   - the current position in the input buffer
252450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
252550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//              TODO:  double-check edge cases at region boundaries.
252650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
252750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
252850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::isWordBoundary(int64_t pos) {
252950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool isBoundary = FALSE;
253050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool cIsWord    = FALSE;
2531fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
253250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (pos >= fLookLimit) {
253350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fHitEnd = TRUE;
253450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
253550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Determine whether char c at current position is a member of the word set of chars.
253650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // If we're off the end of the string, behave as though we're not at a word char.
253750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UTEXT_SETNATIVEINDEX(fInputText, pos);
253850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32  c = UTEXT_CURRENT32(fInputText);
253950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND) || u_charType(c) == U_FORMAT_CHAR) {
254050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Current char is a combining one.  Not a boundary.
254150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
254250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
254350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        cIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(c);
254450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
2545fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
254650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Back up until we come to a non-combining char, determine whether
254750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  that char is a word char.
254850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool prevCIsWord = FALSE;
254950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (;;) {
255050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (UTEXT_GETNATIVEINDEX(fInputText) <= fLookStart) {
255150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
255250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
255350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 prevChar = UTEXT_PREVIOUS32(fInputText);
255450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (!(u_hasBinaryProperty(prevChar, UCHAR_GRAPHEME_EXTEND)
255550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              || u_charType(prevChar) == U_FORMAT_CHAR)) {
255650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevCIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(prevChar);
255750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
255850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
255950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
256050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    isBoundary = cIsWord ^ prevCIsWord;
256150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return isBoundary;
256250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
256350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
256450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::isChunkWordBoundary(int32_t pos) {
256550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool isBoundary = FALSE;
256650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool cIsWord    = FALSE;
2567fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
256850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *inputBuf = fInputText->chunkContents;
2569fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
257050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (pos >= fLookLimit) {
257150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fHitEnd = TRUE;
257250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
257350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Determine whether char c at current position is a member of the word set of chars.
257450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // If we're off the end of the string, behave as though we're not at a word char.
257550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c;
257650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U16_GET(inputBuf, fLookStart, pos, fLookLimit, c);
257750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (u_hasBinaryProperty(c, UCHAR_GRAPHEME_EXTEND) || u_charType(c) == U_FORMAT_CHAR) {
257850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Current char is a combining one.  Not a boundary.
257950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
258050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
258150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        cIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(c);
258250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
2583fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
258450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Back up until we come to a non-combining char, determine whether
258550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  that char is a word char.
258650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool prevCIsWord = FALSE;
258750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (;;) {
258850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (pos <= fLookStart) {
258950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
259050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
259150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 prevChar;
259250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        U16_PREV(inputBuf, fLookStart, pos, prevChar);
259350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (!(u_hasBinaryProperty(prevChar, UCHAR_GRAPHEME_EXTEND)
259450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              || u_charType(prevChar) == U_FORMAT_CHAR)) {
259550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            prevCIsWord = fPattern->fStaticSets[URX_ISWORD_SET]->contains(prevChar);
259650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
259750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
259850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
259950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    isBoundary = cIsWord ^ prevCIsWord;
260050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return isBoundary;
260150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
260250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
260350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
260450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
2605fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius//   isUWordBoundary
260650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
260750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//         Test for a word boundary using RBBI word break.
260850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
260950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//          parameters:   pos   - the current position in the input buffer
261050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
261150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
261250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool RegexMatcher::isUWordBoundary(int64_t pos) {
261350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool       returnVal = FALSE;
261450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION==0
2615fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
261650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // If we haven't yet created a break iterator for this matcher, do it now.
261750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fWordBreakItr == NULL) {
2618fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        fWordBreakItr =
261950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), fDeferredStatus);
262050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(fDeferredStatus)) {
262150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FALSE;
262250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
262350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fWordBreakItr->setText(fInputText, fDeferredStatus);
262450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
262550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
262650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (pos >= fLookLimit) {
262750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fHitEnd = TRUE;
262850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        returnVal = TRUE;   // With Unicode word rules, only positions within the interior of "real"
262950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            //    words are not boundaries.  All non-word chars stand by themselves,
263050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            //    with word boundaries on both sides.
263150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
263250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (!UTEXT_USES_U16(fInputText)) {
263350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // !!!: Would like a better way to do this!
263450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UErrorCode status = U_ZERO_ERROR;
263550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            pos = utext_extract(fInputText, 0, pos, NULL, 0, &status);
263650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
263750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        returnVal = fWordBreakItr->isBoundary((int32_t)pos);
263850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
263950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
264050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return   returnVal;
264150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
264250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
264350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
264450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
264550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   IncrementTime     This function is called once each TIMER_INITIAL_VALUE state
264650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     saves. Increment the "time" counter, and call the
264750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     user callback function if there is one installed.
264850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
264950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     If the match operation needs to be aborted, either for a time-out
265050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     or because the user callback asked for it, just set an error status.
265150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                     The engine will pick that up and stop in its outer loop.
265250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
265350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
265450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::IncrementTime(UErrorCode &status) {
265550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTickCounter = TIMER_INITIAL_VALUE;
265650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTime++;
265750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fCallbackFn != NULL) {
265850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if ((*fCallbackFn)(fCallbackContext, fTime) == FALSE) {
265950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            status = U_REGEX_STOPPED_BY_CALLER;
266050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return;
266150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
266250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
266350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fTimeLimit > 0 && fTime >= fTimeLimit) {
266450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_REGEX_TIME_OUT;
266550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
266650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
266750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
266850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
266950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
267050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   StateSave
267150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       Make a new stack frame, initialized as a copy of the current stack frame.
267250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       Set the pattern index in the original stack frame from the operand value
267350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       in the opcode.  Execution of the engine continues with the state in
267450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       the newly created stack frame
267550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
267650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       Note that reserveBlock() may grow the stack, resulting in the
267750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       whole thing being relocated in memory.
267850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
267950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    Parameters:
2680fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius//       fp           The top frame pointer when called.  At return, a new
268150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                    fame will be present
268250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//       savePatIdx   An index into the compiled pattern.  Goes into the original
268350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                    (not new) frame.  If execution ever back-tracks out of the
268450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                    new frame, this will be where we continue from in the pattern.
268550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//    Return
268650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                    The new frame pointer.
268750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
268850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
268950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoinline REStackFrame *RegexMatcher::StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status) {
2690fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // push storage for a new frame.
269150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t *newFP = fStack->reserveBlock(fFrameSize, status);
269250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (newFP == NULL) {
269350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Failure on attempted stack expansion.
269450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   Stack function set some other error code, change it to a more
269550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //   specific one for regular expressions.
269650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        status = U_REGEX_STACK_OVERFLOW;
269750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // We need to return a writable stack frame, so just return the
269850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //    previous frame.  The match operation will stop quickly
269950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //    because of the error status, after which the frame will never
270050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //    be looked at again.
270150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return fp;
270250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
270350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fp = (REStackFrame *)(newFP - fFrameSize);  // in case of realloc of stack.
2704fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
270550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // New stack frame = copy of old top frame.
270650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t *source = (int64_t *)fp;
270750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t *dest   = newFP;
270850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (;;) {
270950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *dest++ = *source++;
271050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (source == newFP) {
271150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
271250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
271350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
2714fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
271550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fTickCounter--;
271650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fTickCounter <= 0) {
271750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       IncrementTime(status);    // Re-initializes fTickCounter
271850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
271950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fp->fPatIdx = savePatIdx;
272050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return (REStackFrame *)newFP;
272150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
272250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
272350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
272450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
272550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
272650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   MatchAt      This is the actual matching engine.
272750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
272850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  startIdx:    begin matching a this index.
272950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  toEnd:       if true, match must extend to end of the input region
273050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//
273150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//--------------------------------------------------------------------------------
273250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) {
273350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UBool       isMatch  = FALSE;      // True if the we have a match.
2734fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
273550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t     backSearchIndex = U_INT64_MAX; // used after greedy single-character matches for searching backwards
273650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
273750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     op;                    // Operation from the compiled pattern, split into
273850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     opType;                //    the opcode
273950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     opValue;               //    and the operand value.
2740fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
2741fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifdef REGEX_RUN_DEBUG
274250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (fTraceDebug)
274350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    {
274450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("MatchAt(startIdx=%ld)\n", startIdx);
274550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("Original Pattern: ");
274650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c = utext_next32From(fPattern->fPattern, 0);
274750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while (c != U_SENTINEL) {
274850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (c<32 || c>256) {
274950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = '.';
275050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
2751fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            printf("%c", c);
2752fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
275350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c = UTEXT_NEXT32(fPattern->fPattern);
275450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
275550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("\n");
275650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("Input String: ");
275750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        c = utext_next32From(fInputText, 0);
275850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while (c != U_SENTINEL) {
275950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (c<32 || c>256) {
276050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = '.';
276150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
276250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            printf("%c", c);
2763fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
276450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c = UTEXT_NEXT32(fInputText);
276550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
276650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("\n");
276750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        printf("\n");
276850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
2769fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif
277050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
277150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
277250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return;
277350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
277450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
277550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Cache frequently referenced items from the compiled pattern
277650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
277750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t             *pat           = fPattern->fCompiledPat->getBuffer();
277850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
277950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar         *litText       = fPattern->fLiteralText.getBuffer();
278050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UVector             *sets          = fPattern->fSets;
278150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
278250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fFrameSize = fPattern->fFrameSize;
278350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    REStackFrame        *fp            = resetStack();
278450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
278550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fp->fPatIdx   = 0;
278650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fp->fInputIdx = startIdx;
278750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
278850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Zero out the pattern's static data
278950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t i;
279050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (i = 0; i<fPattern->fDataSize; i++) {
279150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fData[i] = 0;
279250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
279350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
279450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
279550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  Main loop for interpreting the compiled pattern.
279650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //  One iteration of the loop per pattern operation performed.
279750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    //
279850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for (;;) {
279950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        op      = (int32_t)pat[fp->fPatIdx];
280050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        opType  = URX_TYPE(op);
280150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        opValue = URX_VAL(op);
2802fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifdef REGEX_RUN_DEBUG
280350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (fTraceDebug) {
280450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
280559d709d503bab6e2b61931737e662dd293b40578ccornelius            printf("inputIdx=%ld   inputChar=%x   sp=%3ld   activeLimit=%ld  ", fp->fInputIdx,
280650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit);
280750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fPattern->dumpOp(fp->fPatIdx);
280850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
2809fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif
281050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fp->fPatIdx++;
2811fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
281250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        switch (opType) {
281350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
281450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
281550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_NOP:
281650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
281750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
281850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
281950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKTRACK:
282050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Force a backtrack.  In some circumstances, the pattern compiler
282150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   will notice that the pattern can't possibly match anything, and will
282250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   emit one of these at that point.
282350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
282450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
282550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
282650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
282750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_ONECHAR:
282850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx < fActiveLimit) {
282950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
283050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
283150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c == opValue) {
283250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
283350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
283450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
283550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
283650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
283750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
283850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
283950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
284050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
284150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
284250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_STRING:
284350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
284450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Test input against a literal string.
284550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Strings require two slots in the compiled pattern, one for the
284650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   offset to the string text, and one for the length.
284750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2848103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                int32_t   stringStartIdx = opValue;
284950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                op      = (int32_t)pat[fp->fPatIdx];     // Fetch the second operand
285050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fPatIdx++;
285150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                opType    = URX_TYPE(op);
2852103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                int32_t stringLen = URX_VAL(op);
285350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opType == URX_STRING_LEN);
285450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(stringLen >= 2);
2855fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
2856103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                const UChar *patternString = litText+stringStartIdx;
2857103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                int32_t patternStringIndex = 0;
285850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
2859103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                UChar32 inputChar;
2860103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                UChar32 patternChar;
286150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UBool success = TRUE;
2862103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                while (patternStringIndex < stringLen) {
2863103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (UTEXT_GETNATIVEINDEX(fInputText) >= fActiveLimit) {
286450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        success = FALSE;
2865103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        fHitEnd = TRUE;
2866103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        break;
2867103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    }
2868103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    inputChar = UTEXT_NEXT32(fInputText);
2869103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    U16_NEXT(patternString, patternStringIndex, stringLen, patternChar);
2870103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (patternChar != inputChar) {
2871103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = FALSE;
2872103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        break;
287350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
287450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
2875fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
287650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (success) {
287750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
287850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
287950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
288050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
288150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
288250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
288350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
288450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
288550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_STATE_SAVE:
288650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp = StateSave(fp, opValue, status);
288750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
288850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
288950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
289050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_END:
289150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // The match loop will exit via this path on a successful match,
289250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   when we reach the end of the pattern.
289350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (toEnd && fp->fInputIdx != fActiveLimit) {
289450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // The pattern matched, but not to the end of input.  Try some more.
289550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
289650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
289750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
289850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isMatch = TRUE;
289950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto  breakFromLoop;
290050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
290150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        // Start and End Capture stack frame variables are laid out out like this:
290250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  fp->fExtra[opValue]  - The start of a completed capture group
290350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //             opValue+1 - The end   of a completed capture group
290450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //             opValue+2 - the start of a capture group whose end
290550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //                          has not yet been reached (and might not ever be).
290650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_START_CAPTURE:
290750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U_ASSERT(opValue >= 0 && opValue < fFrameSize-3);
290850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp->fExtra[opValue+2] = fp->fInputIdx;
290950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
291050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
291150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
291250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_END_CAPTURE:
291350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U_ASSERT(opValue >= 0 && opValue < fFrameSize-3);
291450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U_ASSERT(fp->fExtra[opValue+2] >= 0);            // Start pos for this group must be set.
291550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp->fExtra[opValue]   = fp->fExtra[opValue+2];   // Tentative start becomes real.
291650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp->fExtra[opValue+1] = fp->fInputIdx;           // End position
291750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U_ASSERT(fp->fExtra[opValue] <= fp->fExtra[opValue+1]);
291850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
291950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
292050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
292150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_DOLLAR:                   //  $, test for End of line
292250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                           //     or for position before new line at end of input
292350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
292450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fAnchorLimit) {
292550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We really are at the end of input.  Success.
292650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
292750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fRequireEnd = TRUE;
292850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
292950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
2930fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
293150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
2932fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
293350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If we are positioned just before a new-line that is located at the
293450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   end of input, succeed.
293550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
293650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) {
29371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    if (isLineTerminator(c)) {
293850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // If not in the middle of a CR/LF sequence
29391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && ((void)UTEXT_PREVIOUS32(fInputText), UTEXT_PREVIOUS32(fInputText))==0x0d)) {
294050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            // At new-line at end of input. Success
294150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fHitEnd = TRUE;
294250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fRequireEnd = TRUE;
2943fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
294450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
294550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
294650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
294750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
294850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar32 nextC = UTEXT_NEXT32(fInputText);
294950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (c == 0x0d && nextC == 0x0a && UTEXT_GETNATIVEINDEX(fInputText) >= fAnchorLimit) {
295050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fHitEnd = TRUE;
295150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fRequireEnd = TRUE;
295250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;                         // At CR/LF at end of input.  Success
295350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
295450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
295550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
295650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
295750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
295850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
295950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
296050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
296150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         case URX_DOLLAR_D:                   //  $, test for End of Line, in UNIX_LINES mode.
296250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx >= fAnchorLimit) {
296350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Off the end of input.  Success.
296450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
296550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fRequireEnd = TRUE;
296650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
296750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
296850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
296950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
297050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Either at the last character of input, or off the end.
297150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c == 0x0a && UTEXT_GETNATIVEINDEX(fInputText) == fAnchorLimit) {
297250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
297350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fRequireEnd = TRUE;
297450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
297550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
297650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
297750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
297850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Not at end of input.  Back-track out.
297950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
298050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
298150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
298250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
298350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         case URX_DOLLAR_M:                //  $, test for End of line in multi-line mode
298450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             {
298550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 if (fp->fInputIdx >= fAnchorLimit) {
298650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     // We really are at the end of input.  Success.
298750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     fHitEnd = TRUE;
298850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     fRequireEnd = TRUE;
298950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     break;
299050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 }
299150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 // If we are positioned just before a new-line, succeed.
299250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 // It makes no difference where the new-line is within the input.
299350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
299450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 UChar32 c = UTEXT_CURRENT32(fInputText);
29951b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                 if (isLineTerminator(c)) {
299650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     // At a line end, except for the odd chance of  being in the middle of a CR/LF sequence
299750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     //  In multi-line mode, hitting a new-line just before the end of input does not
299850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     //   set the hitEnd or requireEnd flags
299950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && UTEXT_PREVIOUS32(fInputText)==0x0d)) {
300050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
300150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     }
300250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 }
300350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 // not at a new line.  Fail.
300450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 fp = (REStackFrame *)fStack->popFrame(fFrameSize);
300550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             }
300650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             break;
300750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
300850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
300950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         case URX_DOLLAR_MD:                //  $, test for End of line in multi-line and UNIX_LINES mode
301050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             {
301150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 if (fp->fInputIdx >= fAnchorLimit) {
301250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     // We really are at the end of input.  Success.
301350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     fHitEnd = TRUE;
301450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     fRequireEnd = TRUE;  // Java set requireEnd in this case, even though
301550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     break;               //   adding a new-line would not lose the match.
301650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 }
301750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 // If we are not positioned just before a new-line, the test fails; backtrack out.
301850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 // It makes no difference where the new-line is within the input.
301950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
302050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 if (UTEXT_CURRENT32(fInputText) != 0x0a) {
302150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     fp = (REStackFrame *)fStack->popFrame(fFrameSize);
302250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 }
302350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             }
302450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             break;
302550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
302650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
302750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       case URX_CARET:                    //  ^, test for start of line
302850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx != fAnchorStart) {
302950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
303050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
303150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
303250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
303350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
303450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       case URX_CARET_M:                   //  ^, test for start of line in mulit-line mode
303550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           {
303650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               if (fp->fInputIdx == fAnchorStart) {
303750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   // We are at the start input.  Success.
303850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   break;
303950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               }
304050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               // Check whether character just before the current pos is a new-line
304150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               //   unless we are at the end of input
304250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
3043fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius               UChar32  c = UTEXT_PREVIOUS32(fInputText);
30441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert               if ((fp->fInputIdx < fAnchorLimit) && isLineTerminator(c)) {
304550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   //  It's a new-line.  ^ is true.  Success.
304650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   //  TODO:  what should be done with positions between a CR and LF?
304750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   break;
304850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               }
304950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               // Not at the start of a line.  Fail.
305050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               fp = (REStackFrame *)fStack->popFrame(fFrameSize);
305150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           }
305250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           break;
305350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
305450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
305550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho       case URX_CARET_M_UNIX:       //  ^, test for start of line in mulit-line + Unix-line mode
305650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           {
305750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               U_ASSERT(fp->fInputIdx >= fAnchorStart);
305850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               if (fp->fInputIdx <= fAnchorStart) {
305950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   // We are at the start input.  Success.
306050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   break;
306150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               }
306250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               // Check whether character just before the current pos is a new-line
306350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               U_ASSERT(fp->fInputIdx <= fAnchorLimit);
306450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
306550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               UChar32  c = UTEXT_PREVIOUS32(fInputText);
306650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               if (c != 0x0a) {
306750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   // Not at the start of a line.  Back-track out.
306850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   fp = (REStackFrame *)fStack->popFrame(fFrameSize);
306950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho               }
307050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           }
307150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           break;
307250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
307350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKSLASH_B:          // Test for word boundaries
307450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
307550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UBool success = isWordBoundary(fp->fInputIdx);
307654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                success ^= (UBool)(opValue != 0);     // flip sense for \B
307750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (!success) {
307850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
307950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
308050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
308150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
308250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
308350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
308450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKSLASH_BU:          // Test for word boundaries, Unicode-style
308550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
308650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UBool success = isUWordBoundary(fp->fInputIdx);
308754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                success ^= (UBool)(opValue != 0);     // flip sense for \B
308850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (!success) {
308950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
309050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
309150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
309250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
309350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
309450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
309550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKSLASH_D:            // Test for decimal digit
309650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
309750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
309850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
309950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
310050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
310150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
310250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
310350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
310450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
310550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
310650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int8_t ctype = u_charType(c);     // TODO:  make a unicode set for this.  Will be faster.
310750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UBool success = (ctype == U_DECIMAL_DIGIT_NUMBER);
310854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                success ^= (UBool)(opValue != 0);        // flip sense for \D
310950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (success) {
311050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
311150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
311250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
311350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
311450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
311550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
311650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
311750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
311850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKSLASH_G:          // Test for position at end of previous match
311927f654740f2a26ad62a5c155af9199af9e69b889claireho            if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->fInputIdx==fActiveStart))) {
312050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
312150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
312250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
312350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
312450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
31251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        case URX_BACKSLASH_H:            // Test for \h, horizontal white space.
31261b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            {
31271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (fp->fInputIdx >= fActiveLimit) {
31281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fHitEnd = TRUE;
31291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
31301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    break;
31311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                }
31321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
31331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                UChar32 c = UTEXT_NEXT32(fInputText);
31341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                int8_t ctype = u_charType(c);
31351b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                UBool success = (ctype == U_SPACE_SEPARATOR || c == 9);  // SPACE_SEPARATOR || TAB
31361b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                success ^= (UBool)(opValue != 0);        // flip sense for \H
31371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (success) {
31381b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
31391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                } else {
31401b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
31411b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                }
31421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            }
31431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            break;
31441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
31451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
31461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        case URX_BACKSLASH_R:            // Test for \R, any line break sequence.
31471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            {
31481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (fp->fInputIdx >= fActiveLimit) {
31491b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fHitEnd = TRUE;
31501b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
31511b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    break;
31521b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                }
31531b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
31541b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                UChar32 c = UTEXT_NEXT32(fInputText);
31551b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (isLineTerminator(c)) {
31561b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    if (c == 0x0d && utext_current32(fInputText) == 0x0a) {
31571b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        utext_next32(fInputText);
31581b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    }
31591b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
31601b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                } else {
31611b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
31621b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                }
31631b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            }
31641b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            break;
31651b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
31661b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
31671b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        case URX_BACKSLASH_V:            // \v, any single line ending character.
31681b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            {
31691b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (fp->fInputIdx >= fActiveLimit) {
31701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fHitEnd = TRUE;
31711b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
31721b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    break;
31731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                }
31741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
31751b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                UChar32 c = UTEXT_NEXT32(fInputText);
31761b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                UBool success = isLineTerminator(c);
31771b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                success ^= (UBool)(opValue != 0);        // flip sense for \V
31781b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (success) {
31791b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
31801b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                } else {
31811b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
31821b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                }
31831b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            }
31841b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            break;
31851b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
31861b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
3187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case URX_BACKSLASH_X:
318850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  Match a Grapheme, as defined by Unicode TR 29.
318950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //  Differs slightly from Perl, which consumes combining marks independently
319050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //    of context.
319150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
319250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
319350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fail if at end of input
319450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
319550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
319650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
319750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
319850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
3199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
320050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
320150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
320250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Examine (and consume) the current char.
320350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   Dispatch into a little state machine, based on the char.
320450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32  c;
320550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
320650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
320750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UnicodeSet **sets = fPattern->fStaticSets;
320850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_NORMAL]->contains(c))  goto GC_Extend;
320950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_CONTROL]->contains(c)) goto GC_Control;
321050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_L]->contains(c))       goto GC_L;
321150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_LV]->contains(c))      goto GC_V;
321250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_LVT]->contains(c))     goto GC_T;
321350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_V]->contains(c))       goto GC_V;
321450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_T]->contains(c))       goto GC_T;
321550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto GC_Extend;
321650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
321750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
321850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
321950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_L:
322050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
322150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
322250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
322350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_L]->contains(c))       goto GC_L;
322450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_LV]->contains(c))      goto GC_V;
322550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_LVT]->contains(c))     goto GC_T;
322650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_V]->contains(c))       goto GC_V;
3227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                (void)UTEXT_PREVIOUS32(fInputText);
322850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
322950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto GC_Extend;
323050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
323150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_V:
323250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
323350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
323450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
323550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_V]->contains(c))       goto GC_V;
323650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_T]->contains(c))       goto GC_T;
3237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                (void)UTEXT_PREVIOUS32(fInputText);
323850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
323950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto GC_Extend;
324050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
324150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_T:
324250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
324350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
324450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
324550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_T]->contains(c))       goto GC_T;
3246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                (void)UTEXT_PREVIOUS32(fInputText);
324750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
324850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto GC_Extend;
324950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
325050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_Extend:
325150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Combining characters are consumed here
325250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                for (;;) {
325350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (fp->fInputIdx >= fActiveLimit) {
325450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
325550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
325650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    c = UTEXT_CURRENT32(fInputText);
325750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (sets[URX_GC_EXTEND]->contains(c) == FALSE) {
325850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
325950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
3260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    (void)UTEXT_NEXT32(fInputText);
326150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
326250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
326350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                goto GC_Done;
326450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
326550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_Control:
3266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Most control chars stand alone (don't combine with combining chars),
326750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   except for that CR/LF sequence is a single grapheme cluster.
326850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c == 0x0d && fp->fInputIdx < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) {
326950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    c = UTEXT_NEXT32(fInputText);
327050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
327150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
327250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
327350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoGC_Done:
327450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
327550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
327650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
327750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
327850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
3279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
328050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
328150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
328250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
328350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKSLASH_Z:          // Test for end of Input
328450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx < fAnchorLimit) {
328550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
328650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
328750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
328850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fRequireEnd = TRUE;
328950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
329050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
329150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
329250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
329350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
329450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_STATIC_SETREF:
329550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
329650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Test input character against one of the predefined sets
329750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    (Word Characters, for example)
329850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // The high bit of the op value is a flag for the match polarity.
329950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    0:   success if input char is in set.
330050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    1:   success if input char is not in set.
330150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
330250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
330350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
330450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
330550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
330650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                UBool success = ((opValue & URX_NEG_SET) == URX_NEG_SET);
330850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                opValue &= ~URX_NEG_SET;
330950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
331050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
331150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
331250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
331350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c < 256) {
331450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
331550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s8->contains(c)) {
331650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        success = !success;
331750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
331850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
331950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UnicodeSet *s = fPattern->fStaticSets[opValue];
332050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s->contains(c)) {
332150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        success = !success;
332250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
332350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
332450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (success) {
332550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
332650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
332750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // the character wasn't in the set.
332850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
332950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
333050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
333150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
3332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
333350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
333450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_STAT_SETREF_N:
333550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
3336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Test input character for NOT being a member of  one of
333750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    the predefined sets (Word Characters, for example)
333850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
333950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
334050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
334150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
334250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
334350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
334450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
334550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
334650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
3347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
334850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
334950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c < 256) {
335050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
335150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s8->contains(c) == FALSE) {
335250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
335350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
335450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
335550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
335650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UnicodeSet *s = fPattern->fStaticSets[opValue];
335750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s->contains(c) == FALSE) {
335850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
335950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
336050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
336150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
336250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // the character wasn't in the set.
336350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
336450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
336550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
3366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
336750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
336850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_SETREF:
336950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx >= fActiveLimit) {
337050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
337150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
337250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
337350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
337450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
3375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
337650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // There is input left.  Pick up one char and test it for set membership.
337750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
337850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue > 0 && opValue < sets->size());
337950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c<256) {
338050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    Regex8BitSet *s8 = &fPattern->fSets8[opValue];
338150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s8->contains(c)) {
338250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
338350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
338450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
338550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
338650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue);
338750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s->contains(c)) {
338850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // The character is in the set.  A Match.
338950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
339050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
339150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
339250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
3393fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
339450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // the character wasn't in the set.
339550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
339650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
339750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
339850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
339950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
340050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_DOTANY:
340150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
340250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // . matches anything, but stops at end-of-line.
340350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
340450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // At end of input.  Match failed.  Backtrack out.
340550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
340650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
340750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
340850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
3409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
341050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
3411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
341250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // There is input left.  Advance over one char, unless we've hit end-of-line
341350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
34141b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (isLineTerminator(c)) {
341550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // End of line in normal mode.   . does not match.
341650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp = (REStackFrame *)fStack->popFrame(fFrameSize);
341750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
341850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
341950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
342050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
342150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
342250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
342350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
342450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_DOTANY_ALL:
342550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
342650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // ., in dot-matches-all (including new lines) mode
342750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
342850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // At end of input.  Match failed.  Backtrack out.
342950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
343050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
343150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
343250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
3433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
343450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
3435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
343650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // There is input left.  Advance over one char, except if we are
343750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   at a cr/lf, advance over both of them.
3438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                UChar32 c;
343950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = UTEXT_NEXT32(fInputText);
344050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
344150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c==0x0d && fp->fInputIdx < fActiveLimit) {
344250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // In the case of a CR/LF, we need to advance over both.
344350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar32 nextc = UTEXT_CURRENT32(fInputText);
344450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (nextc == 0x0a) {
3445b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        (void)UTEXT_NEXT32(fInputText);
344650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
344750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
344850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
344950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
345050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
345150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
345250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
345350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_DOTANY_UNIX:
345450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
345550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // '.' operator, matches all, but stops at end-of-line.
345650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   UNIX_LINES mode, so 0x0a is the only recognized line ending.
345750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
345850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // At end of input.  Match failed.  Backtrack out.
345950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
346050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
346150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
346250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
346350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
346450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
3465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
346650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // There is input left.  Advance over one char, unless we've hit end-of-line
346750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
346850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c == 0x0a) {
346950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // End of line in normal mode.   '.' does not match the \n
347050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
347150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
347250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
347350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
347450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
347550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
347650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
347750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
347850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_JMP:
347950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp->fPatIdx = opValue;
348050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
348150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
348250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_FAIL:
348350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isMatch = FALSE;
348450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto breakFromLoop;
348550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
348650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_JMP_SAV:
348750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U_ASSERT(opValue < fPattern->fCompiledPat->size());
348850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp = StateSave(fp, fp->fPatIdx, status);       // State save to loc following current
348950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp->fPatIdx = opValue;                         // Then JMP.
349050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
349150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
349250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_JMP_SAV_X:
349350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // This opcode is used with (x)+, when x can match a zero length string.
349450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Same as JMP_SAV, except conditional on the match having made forward progress.
349550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Destination of the JMP must be a URX_STO_INP_LOC, from which we get the
349650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   data address of the input position at the start of the loop.
349750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
349850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue > 0 && opValue < fPattern->fCompiledPat->size());
349950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t  stoOp = (int32_t)pat[opValue-1];
350050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(URX_TYPE(stoOp) == URX_STO_INP_LOC);
350150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t  frameLoc = URX_VAL(stoOp);
350250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(frameLoc >= 0 && frameLoc < fFrameSize);
350350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t prevInputIdx = fp->fExtra[frameLoc];
350450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(prevInputIdx <= fp->fInputIdx);
350550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (prevInputIdx < fp->fInputIdx) {
350650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // The match did make progress.  Repeat the loop.
350750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = StateSave(fp, fp->fPatIdx, status);  // State save to loc following current
350850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx = opValue;
350950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fExtra[frameLoc] = fp->fInputIdx;
3510fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
351150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If the input position did not advance, we do nothing here,
351250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   execution will fall out of the loop.
351350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
351450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
351550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
351650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_CTR_INIT:
351750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
351850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
351959d709d503bab6e2b61931737e662dd293b40578ccornelius                fp->fExtra[opValue] = 0;                 //  Set the loop counter variable to zero
352050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
352150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Pick up the three extra operands that CTR_INIT has, and
3522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //    skip the pattern location counter past
352350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
352450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fPatIdx += 3;
352550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t loopLoc  = URX_VAL(pat[instrOperandLoc]);
352650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount = (int32_t)pat[instrOperandLoc+1];
352750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount = (int32_t)pat[instrOperandLoc+2];
352850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(minCount>=0);
352950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(maxCount>=minCount || maxCount==-1);
353059d709d503bab6e2b61931737e662dd293b40578ccornelius                U_ASSERT(loopLoc>=fp->fPatIdx);
353150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
353250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (minCount == 0) {
353350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = StateSave(fp, loopLoc+1, status);
353450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
353559d709d503bab6e2b61931737e662dd293b40578ccornelius                if (maxCount == -1) {
353659d709d503bab6e2b61931737e662dd293b40578ccornelius                    fp->fExtra[opValue+1] = fp->fInputIdx;   //  For loop breaking.
353759d709d503bab6e2b61931737e662dd293b40578ccornelius                } else if (maxCount == 0) {
353850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
353950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
354050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
354150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
354250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
354350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_CTR_LOOP:
354450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
354550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2);
354650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t initOp = (int32_t)pat[opValue];
354750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT);
354850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
354950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount  = (int32_t)pat[opValue+2];
355050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount  = (int32_t)pat[opValue+3];
355150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                (*pCounter)++;
355259d709d503bab6e2b61931737e662dd293b40578ccornelius                if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) {
355359d709d503bab6e2b61931737e662dd293b40578ccornelius                    U_ASSERT(*pCounter == maxCount);
355450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
355550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
355650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (*pCounter >= minCount) {
355759d709d503bab6e2b61931737e662dd293b40578ccornelius                    if (maxCount == -1) {
355859d709d503bab6e2b61931737e662dd293b40578ccornelius                        // Loop has no hard upper bound.
355959d709d503bab6e2b61931737e662dd293b40578ccornelius                        // Check that it is progressing through the input, break if it is not.
356059d709d503bab6e2b61931737e662dd293b40578ccornelius                        int64_t *pLastInputIdx =  &fp->fExtra[URX_VAL(initOp) + 1];
356159d709d503bab6e2b61931737e662dd293b40578ccornelius                        if (fp->fInputIdx == *pLastInputIdx) {
356259d709d503bab6e2b61931737e662dd293b40578ccornelius                            break;
356359d709d503bab6e2b61931737e662dd293b40578ccornelius                        } else {
356459d709d503bab6e2b61931737e662dd293b40578ccornelius                            *pLastInputIdx = fp->fInputIdx;
356559d709d503bab6e2b61931737e662dd293b40578ccornelius                        }
356659d709d503bab6e2b61931737e662dd293b40578ccornelius                    }
356750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = StateSave(fp, fp->fPatIdx, status);
356850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
356950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fPatIdx = opValue + 4;    // Loop back.
357050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
357150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
357250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
357350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_CTR_INIT_NG:
357450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
357550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Initialize a non-greedy loop
357650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
357759d709d503bab6e2b61931737e662dd293b40578ccornelius                fp->fExtra[opValue] = 0;                 //  Set the loop counter variable to zero
357850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
357959d709d503bab6e2b61931737e662dd293b40578ccornelius                // Pick up the three extra operands that CTR_INIT_NG has, and
3580fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //    skip the pattern location counter past
358150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
358250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fPatIdx += 3;
358350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t loopLoc  = URX_VAL(pat[instrOperandLoc]);
358450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount = (int32_t)pat[instrOperandLoc+1];
358550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount = (int32_t)pat[instrOperandLoc+2];
358650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(minCount>=0);
358750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(maxCount>=minCount || maxCount==-1);
358850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(loopLoc>fp->fPatIdx);
358959d709d503bab6e2b61931737e662dd293b40578ccornelius                if (maxCount == -1) {
359059d709d503bab6e2b61931737e662dd293b40578ccornelius                    fp->fExtra[opValue+1] = fp->fInputIdx;   //  Save initial input index for loop breaking.
359159d709d503bab6e2b61931737e662dd293b40578ccornelius                }
359250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
359350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (minCount == 0) {
359450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (maxCount != 0) {
359550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp = StateSave(fp, fp->fPatIdx, status);
359650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
359750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx = loopLoc+1;   // Continue with stuff after repeated block
3598fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
359950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
360050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
360150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
360250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_CTR_LOOP_NG:
360350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
360450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Non-greedy {min, max} loops
360550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2);
360650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t initOp = (int32_t)pat[opValue];
360750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT_NG);
360850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
360950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount  = (int32_t)pat[opValue+2];
361050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount  = (int32_t)pat[opValue+3];
361150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
361259d709d503bab6e2b61931737e662dd293b40578ccornelius                (*pCounter)++;
361359d709d503bab6e2b61931737e662dd293b40578ccornelius                if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) {
361450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // The loop has matched the maximum permitted number of times.
361550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   Break out of here with no action.  Matching will
361650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   continue with the following pattern.
361759d709d503bab6e2b61931737e662dd293b40578ccornelius                    U_ASSERT(*pCounter == maxCount);
361850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
361950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
362050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
362150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (*pCounter < minCount) {
362250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We haven't met the minimum number of matches yet.
362350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   Loop back for another one.
362450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx = opValue + 4;    // Loop back.
362550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
362650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We do have the minimum number of matches.
362759d709d503bab6e2b61931737e662dd293b40578ccornelius
362859d709d503bab6e2b61931737e662dd293b40578ccornelius                    // If there is no upper bound on the loop iterations, check that the input index
362959d709d503bab6e2b61931737e662dd293b40578ccornelius                    // is progressing, and stop the loop if it is not.
363059d709d503bab6e2b61931737e662dd293b40578ccornelius                    if (maxCount == -1) {
363159d709d503bab6e2b61931737e662dd293b40578ccornelius                        int64_t *pLastInputIdx =  &fp->fExtra[URX_VAL(initOp) + 1];
363259d709d503bab6e2b61931737e662dd293b40578ccornelius                        if (fp->fInputIdx == *pLastInputIdx) {
363359d709d503bab6e2b61931737e662dd293b40578ccornelius                            break;
363459d709d503bab6e2b61931737e662dd293b40578ccornelius                        }
363559d709d503bab6e2b61931737e662dd293b40578ccornelius                        *pLastInputIdx = fp->fInputIdx;
363659d709d503bab6e2b61931737e662dd293b40578ccornelius                    }
363759d709d503bab6e2b61931737e662dd293b40578ccornelius
363859d709d503bab6e2b61931737e662dd293b40578ccornelius                    // Loop Continuation: we will fall into the pattern following the loop
363959d709d503bab6e2b61931737e662dd293b40578ccornelius                    //   (non-greedy, don't execute loop body first), but first do
364059d709d503bab6e2b61931737e662dd293b40578ccornelius                    //   a state save to the top of the loop, so that a match failure
364150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   in the following pattern will try another iteration of the loop.
364250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = StateSave(fp, opValue + 4, status);
364350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
364450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
364550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
364650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
364750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_STO_SP:
364850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize);
364950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fData[opValue] = fStack->size();
365050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
365150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
365250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LD_SP:
365350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
365450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize);
365550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t newStackSize = (int32_t)fData[opValue];
365650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(newStackSize <= fStack->size());
365750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
365850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (newFP == (int64_t *)fp) {
365950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
366050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
366150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t i;
366250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                for (i=0; i<fFrameSize; i++) {
366350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    newFP[i] = ((int64_t *)fp)[i];
366450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
366550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)newFP;
366650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fStack->setSize(newStackSize);
366750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
366850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
366950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
367050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_BACKREF:
367150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
367250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue < fFrameSize);
367350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t groupStartIdx = fp->fExtra[opValue];
367450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t groupEndIdx   = fp->fExtra[opValue+1];
367550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(groupStartIdx <= groupEndIdx);
367650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (groupStartIdx < 0) {
367750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // This capture group has not participated in the match thus far,
367850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no match.
367950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
368050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
368150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fAltInputText, groupStartIdx);
368250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
3683103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
3684103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                //   Note: if the capture group match was of an empty string the backref
3685fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //         match succeeds.  Verified by testing:  Perl matches succeed
3686103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                //         in this case, so we do too.
3687fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
3688103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                UBool success = TRUE;
3689103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                for (;;) {
3690103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (utext_getNativeIndex(fAltInputText) >= groupEndIdx) {
3691103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = TRUE;
3692103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        break;
3693103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    }
3694103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (utext_getNativeIndex(fInputText) >= fActiveLimit) {
3695103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = FALSE;
369650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fHitEnd = TRUE;
3697103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        break;
3698103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    }
3699103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    UChar32 captureGroupChar = utext_next32(fAltInputText);
3700103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    UChar32 inputChar = utext_next32(fInputText);
3701103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (inputChar != captureGroupChar) {
3702103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = FALSE;
3703103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        break;
370450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
3705103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                }
3706103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
3707103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if (success) {
3708103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
3709103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                } else {
3710103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
3711103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                }
3712103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            }
3713103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
3714103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
3715103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
3716103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
3717103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case URX_BACKREF_I:
3718103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            {
3719103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                U_ASSERT(opValue < fFrameSize);
3720103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                int64_t groupStartIdx = fp->fExtra[opValue];
3721103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                int64_t groupEndIdx   = fp->fExtra[opValue+1];
3722103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                U_ASSERT(groupStartIdx <= groupEndIdx);
3723103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if (groupStartIdx < 0) {
3724103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    // This capture group has not participated in the match thus far,
372550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no match.
3726103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    break;
372750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
3728103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                utext_setNativeIndex(fAltInputText, groupStartIdx);
3729103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                utext_setNativeIndex(fInputText, fp->fInputIdx);
3730103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                CaseFoldingUTextIterator captureGroupItr(*fAltInputText);
3731103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                CaseFoldingUTextIterator inputItr(*fInputText);
3732103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
3733103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                //   Note: if the capture group match was of an empty string the backref
3734fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //         match succeeds.  Verified by testing:  Perl matches succeed
3735103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                //         in this case, so we do too.
3736fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
3737103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                UBool success = TRUE;
3738103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                for (;;) {
3739103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (!captureGroupItr.inExpansion() && utext_getNativeIndex(fAltInputText) >= groupEndIdx) {
3740103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = TRUE;
3741103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        break;
3742103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    }
3743103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (!inputItr.inExpansion() && utext_getNativeIndex(fInputText) >= fActiveLimit) {
3744103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = FALSE;
3745103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        fHitEnd = TRUE;
3746103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        break;
3747103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    }
3748103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    UChar32 captureGroupChar = captureGroupItr.next();
3749103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    UChar32 inputChar = inputItr.next();
3750103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (inputChar != captureGroupChar) {
3751103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = FALSE;
3752103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        break;
3753103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    }
3754103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                }
3755103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
3756103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if (success && inputItr.inExpansion()) {
3757fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // We otained a match by consuming part of a string obtained from
3758fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // case-folding a single code point of the input text.
3759103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    // This does not count as an overall match.
3760103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    success = FALSE;
3761103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                }
3762103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
3763103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if (success) {
3764103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
3765103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                } else {
3766103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
3767103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                }
3768fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
376950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
377050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
3771fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
377250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_STO_INP_LOC:
377350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
377450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue >= 0 && opValue < fFrameSize);
377550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fExtra[opValue] = fp->fInputIdx;
377650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
377750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
377850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
377950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_JMPX:
378050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
378150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
378250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fPatIdx += 1;
378350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t dataLoc  = URX_VAL(pat[instrOperandLoc]);
378450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(dataLoc >= 0 && dataLoc < fFrameSize);
378550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t savedInputIdx = fp->fExtra[dataLoc];
378650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(savedInputIdx <= fp->fInputIdx);
378750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (savedInputIdx < fp->fInputIdx) {
378850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx = opValue;                               // JMP
378950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
379050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no progress in loop.
379150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
379250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
379350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
379450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
379550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LA_START:
379650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
379750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Entering a lookahead block.
379850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Save Stack Ptr, Input Pos.
379950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
380050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fData[opValue]   = fStack->size();
380150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fData[opValue+1] = fp->fInputIdx;
380250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fActiveStart     = fLookStart;          // Set the match region change for
380350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fActiveLimit     = fLookLimit;          //   transparent bounds.
380450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
380550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
380650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
380750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LA_END:
380850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
380950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Leaving a look-ahead block.
381050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //  restore Stack Ptr, Input Pos to positions they had on entry to block.
381150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
381250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t stackSize = fStack->size();
381350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t newStackSize =(int32_t)fData[opValue];
381450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(stackSize >= newStackSize);
381550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (stackSize > newStackSize) {
381650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Copy the current top frame back to the new (cut back) top frame.
381750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   This makes the capture groups from within the look-ahead
381850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   expression available.
381950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
382050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t i;
382150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    for (i=0; i<fFrameSize; i++) {
382250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        newFP[i] = ((int64_t *)fp)[i];
382350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
382450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)newFP;
382550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fStack->setSize(newStackSize);
382650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
382750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = fData[opValue+1];
382850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
382950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Restore the active region bounds in the input string; they may have
383050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    been changed because of transparent bounds on a Region.
383150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fActiveStart = fRegionStart;
383250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fActiveLimit = fRegionLimit;
383350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
383450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
383550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
383650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_ONECHAR_I:
3837103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // Case insensitive one char.  The char from the pattern is already case folded.
3838103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // Input text is not, but case folding the input can not reduce two or more code
3839103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // points to one.
384050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx < fActiveLimit) {
384150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
384250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
384350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = UTEXT_NEXT32(fInputText);
384450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) {
384550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
384650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
384750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
384850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
384950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
385050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
3851fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
385250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
385350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
385450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
385550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_STRING_I:
385650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
3857103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                // Case-insensitive test input against a literal string.
385850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Strings require two slots in the compiled pattern, one for the
385950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   offset to the string text, and one for the length.
3860103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                //   The compiled string has already been case folded.
386127f654740f2a26ad62a5c155af9199af9e69b889claireho                {
3862103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    const UChar *patternString = litText + opValue;
3863103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    int32_t      patternStringIdx  = 0;
386450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
386550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    op      = (int32_t)pat[fp->fPatIdx];
386650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx++;
386750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    opType  = URX_TYPE(op);
386850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    opValue = URX_VAL(op);
386950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(opType == URX_STRING_LEN);
3870103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    int32_t patternStringLen = opValue;  // Length of the string from the pattern.
3871fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
3872fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
3873103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    UChar32   cPattern;
3874103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    UChar32   cText;
3875103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    UBool     success = TRUE;
3876103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
387750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
3878103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    CaseFoldingUTextIterator inputIterator(*fInputText);
3879103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    while (patternStringIdx < patternStringLen) {
3880103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        if (!inputIterator.inExpansion() && UTEXT_GETNATIVEINDEX(fInputText) >= fActiveLimit) {
3881103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                            success = FALSE;
3882103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                            fHitEnd = TRUE;
3883103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                            break;
388450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
3885103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        U16_NEXT(patternString, patternStringIdx, patternStringLen, cPattern);
3886103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        cText = inputIterator.next();
3887103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        if (cText != cPattern) {
3888103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                            success = FALSE;
3889103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                            break;
389050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
389150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
3892103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (inputIterator.inExpansion()) {
3893103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = FALSE;
3894103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    }
3895103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
3896103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (success) {
3897103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
3898103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    } else {
389950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp = (REStackFrame *)fStack->popFrame(fFrameSize);
390050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
390150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
390250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
390350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
390450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
390550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LB_START:
390650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
390750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Entering a look-behind block.
390850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Save Stack Ptr, Input Pos.
390950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   TODO:  implement transparent bounds.  Ticket #6067
391050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
391150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fData[opValue]   = fStack->size();
391250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fData[opValue+1] = fp->fInputIdx;
391350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Init the variable containing the start index for attempted matches.
391450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fData[opValue+2] = -1;
391550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Save input string length, then reset to pin any matches to end at
391650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   the current position.
391750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fData[opValue+3] = fActiveLimit;
391850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fActiveLimit     = fp->fInputIdx;
391950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
392050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
392150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
392250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
392350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LB_CONT:
392450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
392550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Positive Look-Behind, at top of loop checking for matches of LB expression
392650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    at all possible input starting positions.
392750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
392850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fetch the min and max possible match lengths.  They are the operands
392950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   of this op in the pattern.
393050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minML = (int32_t)pat[fp->fPatIdx++];
393150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxML = (int32_t)pat[fp->fPatIdx++];
393250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(minML <= maxML);
393350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(minML >= 0);
393450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
393550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fetch (from data) the last input index where a match was attempted.
393650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
393750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t  *lbStartIdx = &fData[opValue+2];
393850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (*lbStartIdx < 0) {
393950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // First time through loop.
394050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *lbStartIdx = fp->fInputIdx - minML;
394150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
394250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // 2nd through nth time through the loop.
394350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Back up start position for match by one.
394450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (*lbStartIdx == 0) {
394550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (*lbStartIdx)--;
394650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
394750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UTEXT_SETNATIVEINDEX(fInputText, *lbStartIdx);
3948b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        (void)UTEXT_PREVIOUS32(fInputText);
394950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText);
395050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
395150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
395250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
395350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
395450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We have tried all potential match starting points without
395550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  getting a match.  Backtrack out, and out of the
395650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   Look Behind altogether.
395750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
395850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int64_t restoreInputLen = fData[opValue+3];
395950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(restoreInputLen >= fActiveLimit);
396050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(restoreInputLen <= fInputLength);
396150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fActiveLimit = restoreInputLen;
396250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
396350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
396450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
396550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
396650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //      (successful match will fall off the end of the loop.)
396750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = StateSave(fp, fp->fPatIdx-3, status);
396850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = *lbStartIdx;
396950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
397050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
397150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
397250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LB_END:
397350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // End of a look-behind block, after a successful match.
397450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
397550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
397650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx != fActiveLimit) {
397750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  The look-behind expression matched, but the match did not
397850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //    extend all the way to the point that we are looking behind from.
397950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  FAIL out of here, which will take us back to the LB_CONT, which
398050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //     will retry the match starting at another position or fail
398150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //     the look-behind altogether, whichever is appropriate.
398250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
398350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
398450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
398550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
398650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Look-behind match is good.  Restore the orignal input string length,
3987fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //   which had been truncated to pin the end of the lookbehind match to the
398850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   position being looked-behind.
398950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t originalInputLen = fData[opValue+3];
399050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(originalInputLen >= fActiveLimit);
399150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(originalInputLen <= fInputLength);
399250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fActiveLimit = originalInputLen;
399350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
399450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
399550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
399650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
399750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LBN_CONT:
399850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
399950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Negative Look-Behind, at top of loop checking for matches of LB expression
400050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    at all possible input starting positions.
400150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
400250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fetch the extra parameters of this op.
400350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minML       = (int32_t)pat[fp->fPatIdx++];
400450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxML       = (int32_t)pat[fp->fPatIdx++];
400550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t continueLoc = (int32_t)pat[fp->fPatIdx++];
400650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        continueLoc = URX_VAL(continueLoc);
400750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(minML <= maxML);
400850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(minML >= 0);
400950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(continueLoc > fp->fPatIdx);
401050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
401150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Fetch (from data) the last input index where a match was attempted.
401250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
401350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t  *lbStartIdx = &fData[opValue+2];
401450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (*lbStartIdx < 0) {
401550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // First time through loop.
401650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *lbStartIdx = fp->fInputIdx - minML;
401750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
401850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // 2nd through nth time through the loop.
401950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Back up start position for match by one.
402050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (*lbStartIdx == 0) {
402150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (*lbStartIdx)--;
402250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
402350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UTEXT_SETNATIVEINDEX(fInputText, *lbStartIdx);
4024b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        (void)UTEXT_PREVIOUS32(fInputText);
402550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *lbStartIdx = UTEXT_GETNATIVEINDEX(fInputText);
402650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
402750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
402850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
402950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
403050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We have tried all potential match starting points without
403150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  getting a match, which means that the negative lookbehind as
403250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  a whole has succeeded.  Jump forward to the continue location
403350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int64_t restoreInputLen = fData[opValue+3];
403450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(restoreInputLen >= fActiveLimit);
403550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(restoreInputLen <= fInputLength);
403650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fActiveLimit = restoreInputLen;
403750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx = continueLoc;
403850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
403950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
404050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
404150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
404250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //      (successful match will cause a FAIL out of the loop altogether.)
404350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = StateSave(fp, fp->fPatIdx-4, status);
404450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = *lbStartIdx;
404550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
404650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
404750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
404850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LBN_END:
404950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // End of a negative look-behind block, after a successful match.
405050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
405150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
405250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx != fActiveLimit) {
405350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  The look-behind expression matched, but the match did not
405450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //    extend all the way to the point that we are looking behind from.
405550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  FAIL out of here, which will take us back to the LB_CONT, which
405650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //     will retry the match starting at another position or succeed
405750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //     the look-behind altogether, whichever is appropriate.
405850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
405950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
406050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
406150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
406250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Look-behind expression matched, which means look-behind test as
406350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   a whole Fails
4064fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4065fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //   Restore the orignal input string length, which had been truncated
4066fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //   inorder to pin the end of the lookbehind match
406750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   to the position being looked-behind.
406850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t originalInputLen = fData[opValue+3];
406950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(originalInputLen >= fActiveLimit);
407050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(originalInputLen <= fInputLength);
407150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fActiveLimit = originalInputLen;
407250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
407350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Restore original stack position, discarding any state saved
407450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   by the successful pattern match.
407550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
407650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t newStackSize = (int32_t)fData[opValue];
407750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(fStack->size() > newStackSize);
407850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fStack->setSize(newStackSize);
4079fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4080fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //  FAIL, which will take control back to someplace
408150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //  prior to entering the look-behind test.
408250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
408350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
408450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
408550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
408650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
408750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LOOP_SR_I:
408850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Loop Initialization for the optimized implementation of
408950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //     [some character set]*
409050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   This op scans through all matching input.
409150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   The following LOOP_C op emulates stack unwinding if the following pattern fails.
409250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
409350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue > 0 && opValue < sets->size());
409450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                Regex8BitSet *s8 = &fPattern->fSets8[opValue];
409550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UnicodeSet   *s  = (UnicodeSet *)sets->elementAt(opValue);
409650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
409750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Loop through input, until either the input is exhausted or
409850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   we reach a character that is not a member of the set.
409950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t ix = fp->fInputIdx;
410050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, ix);
410150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                for (;;) {
410250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (ix >= fActiveLimit) {
410350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fHitEnd = TRUE;
410450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
410550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
410650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UChar32 c = UTEXT_NEXT32(fInputText);
410750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (c<256) {
410850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (s8->contains(c) == FALSE) {
410950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
411050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
411150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
411250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (s->contains(c) == FALSE) {
411350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
411450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
411550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
411650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ix = UTEXT_GETNATIVEINDEX(fInputText);
411750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
411850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
411950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If there were no matching characters, skip over the loop altogether.
412050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   The loop doesn't run at all, a * op always succeeds.
412150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (ix == fp->fInputIdx) {
412250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx++;   // skip the URX_LOOP_C op.
412350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
412450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
412550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
412650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Peek ahead in the compiled pattern, to the URX_LOOP_C that
412750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   must follow.  It's operand is the stack location
412850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   that holds the starting input index for the match of this [set]*
412950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t loopcOp = (int32_t)pat[fp->fPatIdx];
413050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
413150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t stackLoc = URX_VAL(loopcOp);
413250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize);
413350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fExtra[stackLoc] = fp->fInputIdx;
413450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = ix;
413550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
413650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Save State to the URX_LOOP_C op that follows this one,
413750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   so that match failures in the following code will return to there.
413850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
413950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = StateSave(fp, fp->fPatIdx, status);
414050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fPatIdx++;
414150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
414250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
414350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
414450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
414550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LOOP_DOT_I:
414650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Loop Initialization for the optimized implementation of .*
414750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   This op scans through all remaining input.
414850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   The following LOOP_C op emulates stack unwinding if the following pattern fails.
414950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
415050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Loop through input until the input is exhausted (we reach an end-of-line)
415150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // In DOTALL mode, we can just go straight to the end of the input.
415250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t ix;
415350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if ((opValue & 1) == 1) {
415450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Dot-matches-All mode.  Jump straight to the end of the string.
415550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ix = fActiveLimit;
415650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
415750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
415850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // NOT DOT ALL mode.  Line endings do not match '.'
415950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Scan forward until a line ending or end of input.
416050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ix = fp->fInputIdx;
416150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UTEXT_SETNATIVEINDEX(fInputText, ix);
416250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    for (;;) {
416350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        if (ix >= fActiveLimit) {
416450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            fHitEnd = TRUE;
416550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            break;
416650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
416750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        UChar32 c = UTEXT_NEXT32(fInputText);
416827f654740f2a26ad62a5c155af9199af9e69b889claireho                        if ((c & 0x7f) <= 0x29) {          // Fast filter of non-new-line-s
416927f654740f2a26ad62a5c155af9199af9e69b889claireho                            if ((c == 0x0a) ||             //  0x0a is newline in both modes.
417027f654740f2a26ad62a5c155af9199af9e69b889claireho                               (((opValue & 2) == 0) &&    // IF not UNIX_LINES mode
41711b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                                    isLineTerminator(c))) {
417250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                //  char is a line ending.  Exit the scanning loop.
417350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                break;
417450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                            }
417550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
417650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        ix = UTEXT_GETNATIVEINDEX(fInputText);
417750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
417850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
417950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
418050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If there were no matching characters, skip over the loop altogether.
418150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   The loop doesn't run at all, a * op always succeeds.
418250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (ix == fp->fInputIdx) {
418350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fPatIdx++;   // skip the URX_LOOP_C op.
418450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
418550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
418650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
418750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Peek ahead in the compiled pattern, to the URX_LOOP_C that
418850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   must follow.  It's operand is the stack location
418950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   that holds the starting input index for the match of this .*
419050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t loopcOp = (int32_t)pat[fp->fPatIdx];
419150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
419250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t stackLoc = URX_VAL(loopcOp);
419350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize);
419450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fExtra[stackLoc] = fp->fInputIdx;
419550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = ix;
419650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
419750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Save State to the URX_LOOP_C op that follows this one,
419850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   so that match failures in the following code will return to there.
419950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
420050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = StateSave(fp, fp->fPatIdx, status);
420150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fPatIdx++;
420250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
420350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
420450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
420550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
420650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_LOOP_C:
420750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
420850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue>=0 && opValue<fFrameSize);
420950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                backSearchIndex = fp->fExtra[opValue];
421050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(backSearchIndex <= fp->fInputIdx);
421150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (backSearchIndex == fp->fInputIdx) {
421250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We've backed up the input idx to the point that the loop started.
4213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // The loop is done.  Leave here without saving state.
421450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  Subsequent failures won't come back here.
421550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
421650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
421750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Set up for the next iteration of the loop, with input index
421850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   backed up by one from the last time through,
421950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   and a state save to this instruction in case the following code fails again.
422050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   (We're going backwards because this loop emulates stack unwinding, not
422150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //    the initial scan forward.)
422250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(fp->fInputIdx > 0);
422350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
422450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 prevC = UTEXT_PREVIOUS32(fInputText);
422550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
4226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
422750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 twoPrevC = UTEXT_PREVIOUS32(fInputText);
4228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if (prevC == 0x0a &&
422950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx > backSearchIndex &&
423050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    twoPrevC == 0x0d) {
423150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t prevOp = (int32_t)pat[fp->fPatIdx-2];
423250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (URX_TYPE(prevOp) == URX_LOOP_DOT_I) {
423350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // .*, stepping back over CRLF pair.
423450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText);
423550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
423650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
4237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
423950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = StateSave(fp, fp->fPatIdx-1, status);
424050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
424150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
4242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
424450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
424550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        default:
424650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Trouble.  The compiled pattern contains an entry with an
424750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //           unrecognized type tag.
424850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U_ASSERT(FALSE);
4249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
425050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
425150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if (U_FAILURE(status)) {
425250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            isMatch = FALSE;
4253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
425750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehobreakFromLoop:
425850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fMatch = isMatch;
425950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (isMatch) {
426050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fLastMatchEnd = fMatchEnd;
426150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fMatchStart   = startIdx;
426250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fMatchEnd     = fp->fInputIdx;
4263c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifdef REGEX_RUN_DEBUG
4266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (fTraceDebug) {
4267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (isMatch) {
4268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            printf("Match.  start=%ld   end=%ld\n\n", fMatchStart, fMatchEnd);
4269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
4270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            printf("No match\n\n");
4271c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
4272c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
4273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif
4274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
427550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fFrame = fp;                // The active stack frame when the engine stopped.
427650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                //   Contains the capture group results that we need to
427750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                //    access later.
427850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return;
4279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
4280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
4282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
4283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
428450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//   MatchChunkAt   This is the actual matching engine. Like MatchAt, but with the
428550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  assumption that the entire string is available in the UText's
428650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  chunk buffer. For now, that means we can use int32_t indexes,
428750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  except for anything that needs to be saved (like group starts
428850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho//                  and ends).
4289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
4290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//                  startIdx:    begin matching a this index.
4291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//                  toEnd:       if true, match must extend to end of the input region
4292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru//
4293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------------------
429450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status) {
4295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool       isMatch  = FALSE;      // True if the we have a match.
4296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
429750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t     backSearchIndex = INT32_MAX; // used after greedy single-character matches for searching backwards
4298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t     op;                    // Operation from the compiled pattern, split into
4300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t     opType;                //    the opcode
4301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t     opValue;               //    and the operand value.
4302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
430350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifdef REGEX_RUN_DEBUG
4304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (fTraceDebug) {
430559d709d503bab6e2b61931737e662dd293b40578ccornelius        printf("MatchAt(startIdx=%d)\n", startIdx);
4306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("Original Pattern: ");
430750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c = utext_next32From(fPattern->fPattern, 0);
430850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while (c != U_SENTINEL) {
430950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (c<32 || c>256) {
431050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                c = '.';
431150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
4312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            printf("%c", c);
4313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
431450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c = UTEXT_NEXT32(fPattern->fPattern);
4315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("\n");
4317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("Input String: ");
431850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        c = utext_next32From(fInputText, 0);
431950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while (c != U_SENTINEL) {
4320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (c<32 || c>256) {
4321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                c = '.';
4322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printf("%c", c);
4324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
432550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            c = UTEXT_NEXT32(fInputText);
4326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
4327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("\n");
4328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("\n");
4329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
433050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
4331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
4333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
4334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Cache frequently referenced items from the compiled pattern
4337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
433850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int64_t             *pat           = fPattern->fCompiledPat->getBuffer();
4339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar         *litText       = fPattern->fLiteralText.getBuffer();
4341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UVector             *sets          = fPattern->fSets;
4342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
434350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar         *inputBuf      = fInputText->chunkContents;
4344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFrameSize = fPattern->fFrameSize;
4346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    REStackFrame        *fp            = resetStack();
4347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fp->fPatIdx   = 0;
4349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fp->fInputIdx = startIdx;
4350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Zero out the pattern's static data
4352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
4353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i = 0; i<fPattern->fDataSize; i++) {
4354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fData[i] = 0;
4355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
4356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Main loop for interpreting the compiled pattern.
4359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  One iteration of the loop per pattern operation performed.
4360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
4361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
436250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        op      = (int32_t)pat[fp->fPatIdx];
4363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        opType  = URX_TYPE(op);
4364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        opValue = URX_VAL(op);
436550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifdef REGEX_RUN_DEBUG
4366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (fTraceDebug) {
436750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UTEXT_SETNATIVEINDEX(fInputText, fp->fInputIdx);
436859d709d503bab6e2b61931737e662dd293b40578ccornelius            printf("inputIdx=%ld   inputChar=%x   sp=%3ld   activeLimit=%ld  ", fp->fInputIdx,
436950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   UTEXT_CURRENT32(fInputText), (int64_t *)fp-fStack->getBuffer(), fActiveLimit);
4370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fPattern->dumpOp(fp->fPatIdx);
4371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
437250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
4373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fp->fPatIdx++;
4374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        switch (opType) {
4376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_NOP:
4379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKTRACK:
4383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Force a backtrack.  In some circumstances, the pattern compiler
4384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   will notice that the pattern can't possibly match anything, and will
4385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   emit one of these at that point.
4386c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_ONECHAR:
4391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx < fActiveLimit) {
439250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c;
4393c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
4394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (c == opValue) {
4395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
4396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
4398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fHitEnd = TRUE;
4399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
440050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
440150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
4402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_STRING:
4405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
4406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Test input against a literal string.
4407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Strings require two slots in the compiled pattern, one for the
4408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   offset to the string text, and one for the length.
4409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t   stringStartIdx = opValue;
4410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t   stringLen;
4411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
441250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                op      = (int32_t)pat[fp->fPatIdx];     // Fetch the second operand
4413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fPatIdx++;
4414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                opType    = URX_TYPE(op);
4415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                stringLen = URX_VAL(op);
4416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opType == URX_STRING_LEN);
4417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(stringLen >= 2);
4418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UChar * pInp = inputBuf + fp->fInputIdx;
4420103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                const UChar * pInpLimit = inputBuf + fActiveLimit;
4421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UChar * pPat = litText+stringStartIdx;
4422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UChar * pEnd = pInp + stringLen;
4423103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                UBool success = TRUE;
4424103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                while (pInp < pEnd) {
4425103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (pInp >= pInpLimit) {
4426103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        fHitEnd = TRUE;
4427103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = FALSE;
4428103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        break;
4429103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    }
4430103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (*pInp++ != *pPat++) {
4431103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = FALSE;
4432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
4433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
443650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (success) {
443750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx += stringLen;
443850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
443950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
444050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
4441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_STATE_SAVE:
4446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fp = StateSave(fp, opValue, status);
4447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_END:
4451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // The match loop will exit via this path on a successful match,
4452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   when we reach the end of the pattern.
4453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (toEnd && fp->fInputIdx != fActiveLimit) {
4454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // The pattern matched, but not to the end of input.  Try some more.
4455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                break;
4457c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
4458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            isMatch = TRUE;
4459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto  breakFromLoop;
4460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
446150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Start and End Capture stack frame variables are laid out out like this:
4462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //  fp->fExtra[opValue]  - The start of a completed capture group
4463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //             opValue+1 - The end   of a completed capture group
4464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //             opValue+2 - the start of a capture group whose end
4465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                          has not yet been reached (and might not ever be).
4466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_START_CAPTURE:
4467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            U_ASSERT(opValue >= 0 && opValue < fFrameSize-3);
4468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fp->fExtra[opValue+2] = fp->fInputIdx;
4469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4470fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4471fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_END_CAPTURE:
4473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            U_ASSERT(opValue >= 0 && opValue < fFrameSize-3);
4474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(fp->fExtra[opValue+2] >= 0);            // Start pos for this group must be set.
4475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fp->fExtra[opValue]   = fp->fExtra[opValue+2];   // Tentative start becomes real.
4476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fp->fExtra[opValue+1] = fp->fInputIdx;           // End position
4477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(fp->fExtra[opValue] <= fp->fExtra[opValue+1]);
4478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4479fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4480fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_DOLLAR:                   //  $, test for End of line
448250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //     or for position before new line at end of input
4483c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx < fAnchorLimit-2) {
4484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // We are no where near the end of input.  Fail.
4485c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //   This is the common case.  Keep it first.
4486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
4488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx >= fAnchorLimit) {
4490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // We really are at the end of input.  Success.
4491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fHitEnd = TRUE;
4492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fRequireEnd = TRUE;
4493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
4494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // If we are positioned just before a new-line that is located at the
4497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   end of input, succeed.
4498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx == fAnchorLimit-1) {
449950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c;
450050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_GET(inputBuf, fAnchorStart, fp->fInputIdx, fAnchorLimit, c);
4501fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
45021b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (isLineTerminator(c)) {
4503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp->fInputIdx-1]==0x0d)) {
4504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // At new-line at end of input. Success
4505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fHitEnd = TRUE;
4506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fRequireEnd = TRUE;
4507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
4508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
451050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if (fp->fInputIdx == fAnchorLimit-2 &&
451150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                inputBuf[fp->fInputIdx]==0x0d && inputBuf[fp->fInputIdx+1]==0x0a) {
4512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
4513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fRequireEnd = TRUE;
4514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;                         // At CR/LF at end of input.  Success
4515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4516fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4519c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            break;
4520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
452250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_DOLLAR_D:                   //  $, test for End of Line, in UNIX_LINES mode.
4523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx >= fAnchorLimit-1) {
4524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Either at the last character of input, or off the end.
4525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx == fAnchorLimit-1) {
4526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // At last char of input.  Success if it's a new line.
452750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (inputBuf[fp->fInputIdx] == 0x0a) {
4528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fHitEnd = TRUE;
4529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fRequireEnd = TRUE;
4530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
4531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    }
4532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                } else {
4533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Off the end of input.  Success.
4534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
4535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fRequireEnd = TRUE;
4536c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
4537c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
4538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
4539fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Not at end of input.  Back-track out.
4541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4543fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4544fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
454550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_DOLLAR_M:                //  $, test for End of line in multi-line mode
454650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
454750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fAnchorLimit) {
454850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We really are at the end of input.  Success.
454950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
455050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fRequireEnd = TRUE;
455150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
455250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
455350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If we are positioned just before a new-line, succeed.
455450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // It makes no difference where the new-line is within the input.
455550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c = inputBuf[fp->fInputIdx];
45561b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (isLineTerminator(c)) {
455750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // At a line end, except for the odd chance of  being in the middle of a CR/LF sequence
455850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  In multi-line mode, hitting a new-line just before the end of input does not
455950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //   set the hitEnd or requireEnd flags
456050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if ( !(c==0x0a && fp->fInputIdx>fAnchorStart && inputBuf[fp->fInputIdx-1]==0x0d)) {
4561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        break;
456250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
456350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
456450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // not at a new line.  Fail.
456550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
456650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
456750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
4568fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4569fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
457050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_DOLLAR_MD:                //  $, test for End of line in multi-line and UNIX_LINES mode
457150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
457250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fAnchorLimit) {
457350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We really are at the end of input.  Success.
457450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
457550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fRequireEnd = TRUE;  // Java set requireEnd in this case, even though
457650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;               //   adding a new-line would not lose the match.
457750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
457850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // If we are not positioned just before a new-line, the test fails; backtrack out.
457950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // It makes no difference where the new-line is within the input.
458050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (inputBuf[fp->fInputIdx] != 0x0a) {
458150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
458250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
458350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
458450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
4585fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4586fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
458750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_CARET:                    //  ^, test for start of line
4588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx != fAnchorStart) {
4589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
4591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4592fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4593fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
459450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_CARET_M:                   //  ^, test for start of line in mulit-line mode
459550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
459650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx == fAnchorStart) {
459750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We are at the start input.  Success.
459850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
459950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
460050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Check whether character just before the current pos is a new-line
460150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                //   unless we are at the end of input
4602fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                UChar  c = inputBuf[fp->fInputIdx - 1];
4603fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if ((fp->fInputIdx < fAnchorLimit) &&
46041b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    isLineTerminator(c)) {
460550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  It's a new-line.  ^ is true.  Success.
460650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    //  TODO:  what should be done with positions between a CR and LF?
460750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
460850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
460950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Not at the start of a line.  Fail.
461050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
461150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
461250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
4613fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4614fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
461550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        case URX_CARET_M_UNIX:       //  ^, test for start of line in mulit-line + Unix-line mode
461650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
461750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(fp->fInputIdx >= fAnchorStart);
461850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx <= fAnchorStart) {
461950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // We are at the start input.  Success.
462050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
462150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
462250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // Check whether character just before the current pos is a new-line
462350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(fp->fInputIdx <= fAnchorLimit);
4624fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                UChar  c = inputBuf[fp->fInputIdx - 1];
462550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c != 0x0a) {
462650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    // Not at the start of a line.  Back-track out.
462750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
462850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
462950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
463050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
4631fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKSLASH_B:          // Test for word boundaries
4633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
463450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UBool success = isChunkWordBoundary((int32_t)fp->fInputIdx);
463554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                success ^= (UBool)(opValue != 0);     // flip sense for \B
4636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (!success) {
4637c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4641fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4642fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKSLASH_BU:          // Test for word boundaries, Unicode-style
4644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
4645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UBool success = isUWordBoundary(fp->fInputIdx);
464654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                success ^= (UBool)(opValue != 0);     // flip sense for \B
4647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (!success) {
4648c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4652fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4653fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKSLASH_D:            // Test for decimal digit
4655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
4656c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx >= fActiveLimit) {
4657c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
4658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
4660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4661fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
466250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c;
466350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
4664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                int8_t ctype = u_charType(c);     // TODO:  make a unicode set for this.  Will be faster.
4665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UBool success = (ctype == U_DECIMAL_DIGIT_NUMBER);
466654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                success ^= (UBool)(opValue != 0);        // flip sense for \D
466750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (!success) {
4668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4672fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4673fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKSLASH_G:          // Test for position at end of previous match
467527f654740f2a26ad62a5c155af9199af9e69b889claireho            if (!((fMatch && fp->fInputIdx==fMatchEnd) || (fMatch==FALSE && fp->fInputIdx==fActiveStart))) {
4676c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4679fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4680fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
46811b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        case URX_BACKSLASH_H:            // Test for \h, horizontal white space.
46821b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            {
46831b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (fp->fInputIdx >= fActiveLimit) {
46841b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fHitEnd = TRUE;
46851b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
46861b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    break;
46871b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                }
46881b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                UChar32 c;
46891b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
46901b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                int8_t ctype = u_charType(c);
46911b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                UBool success = (ctype == U_SPACE_SEPARATOR || c == 9);  // SPACE_SEPARATOR || TAB
46921b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                success ^= (UBool)(opValue != 0);        // flip sense for \H
46931b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (!success) {
46941b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
46951b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                }
46961b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            }
46971b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            break;
46981b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
46991b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
47001b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        case URX_BACKSLASH_R:            // Test for \R, any line break sequence.
47011b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            {
47021b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (fp->fInputIdx >= fActiveLimit) {
47031b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fHitEnd = TRUE;
47041b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
47051b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    break;
47061b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                }
47071b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                UChar32 c;
47081b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
47091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (isLineTerminator(c)) {
47101b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    if (c == 0x0d && fp->fInputIdx < fActiveLimit) {
47111b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        // Check for CR/LF sequence. Consume both together when found.
47121b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        UChar c2;
47131b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c2);
47141b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        if (c2 != 0x0a) {
47151b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                            U16_PREV(inputBuf, 0, fp->fInputIdx, c2);
47161b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        }
47171b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    }
47181b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                } else {
47191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
47201b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                }
47211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            }
47221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            break;
47231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
47241b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
47251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        case URX_BACKSLASH_V:         // Any single code point line ending.
47261b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            {
47271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (fp->fInputIdx >= fActiveLimit) {
47281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fHitEnd = TRUE;
47291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
47301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    break;
47311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                }
47321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                UChar32 c;
47331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
47341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                UBool success = isLineTerminator(c);
47351b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                success ^= (UBool)(opValue != 0);        // flip sense for \V
47361b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (!success) {
47371b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
47381b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                }
47391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            }
47401b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            break;
47411b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
47421b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
47431b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
4744fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case URX_BACKSLASH_X:
474550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Match a Grapheme, as defined by Unicode TR 29.
474650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //  Differs slightly from Perl, which consumes combining marks independently
474750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        //    of context.
474850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        {
4749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
475050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Fail if at end of input
475150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx >= fActiveLimit) {
475250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
475350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
475450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
475550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
4756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
475750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Examine (and consume) the current char.
475850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   Dispatch into a little state machine, based on the char.
475950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UChar32  c;
476050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
476150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            UnicodeSet **sets = fPattern->fStaticSets;
476250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_NORMAL]->contains(c))  goto GC_Extend;
476350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_CONTROL]->contains(c)) goto GC_Control;
476450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_L]->contains(c))       goto GC_L;
476550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_LV]->contains(c))      goto GC_V;
476650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_LVT]->contains(c))     goto GC_T;
476750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_V]->contains(c))       goto GC_V;
476850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_T]->contains(c))       goto GC_T;
476950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto GC_Extend;
4770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_L:
477450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
477550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
477650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_L]->contains(c))       goto GC_L;
477750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_LV]->contains(c))      goto GC_V;
477850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_LVT]->contains(c))     goto GC_T;
477950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_V]->contains(c))       goto GC_V;
478050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_PREV(inputBuf, 0, fp->fInputIdx, c);
478150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto GC_Extend;
4782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_V:
478450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
478550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
478650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_V]->contains(c))       goto GC_V;
478750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_T]->contains(c))       goto GC_T;
478850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_PREV(inputBuf, 0, fp->fInputIdx, c);
478950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto GC_Extend;
4790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_T:
479250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx >= fActiveLimit)         goto GC_Done;
479350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
479450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (sets[URX_GC_T]->contains(c))       goto GC_T;
479550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            U16_PREV(inputBuf, 0, fp->fInputIdx, c);
479650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto GC_Extend;
4797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_Extend:
479950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // Combining characters are consumed here
480050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            for (;;) {
480150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
480250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
4803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
480450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
480550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (sets[URX_GC_EXTEND]->contains(c) == FALSE) {
480650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U16_BACK_1(inputBuf, 0, fp->fInputIdx);
480750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
480850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
480950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
481050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            goto GC_Done;
4811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_Control:
4813fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Most control chars stand alone (don't combine with combining chars),
481450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            //   except for that CR/LF sequence is a single grapheme cluster.
481550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (c == 0x0d && fp->fInputIdx < fActiveLimit && inputBuf[fp->fInputIdx] == 0x0a) {
481650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp->fInputIdx++;
481750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
4818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
4819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruGC_Done:
482050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if (fp->fInputIdx >= fActiveLimit) {
482150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fHitEnd = TRUE;
4822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
482350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
482450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
4825fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4826fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4827fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4828fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4829c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case URX_BACKSLASH_Z:          // Test for end of Input
4830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx < fAnchorLimit) {
4831c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4832c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
4833c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fHitEnd = TRUE;
4834c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fRequireEnd = TRUE;
4835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4837fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4838fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4839fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_STATIC_SETREF:
4841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
4842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Test input character against one of the predefined sets
4843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    (Word Characters, for example)
4844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // The high bit of the op value is a flag for the match polarity.
4845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    0:   success if input char is in set.
4846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    1:   success if input char is not in set.
4847c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx >= fActiveLimit) {
4848c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
4849c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
4851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4852fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4853fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                UBool success = ((opValue & URX_NEG_SET) == URX_NEG_SET);
4854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                opValue &= ~URX_NEG_SET;
4855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
4856fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
485750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 c;
4858c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
4859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (c < 256) {
4860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
4861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (s8->contains(c)) {
4862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        success = !success;
4863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
4865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    const UnicodeSet *s = fPattern->fStaticSets[opValue];
4866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (s->contains(c)) {
4867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        success = !success;
4868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (!success) {
4871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4875fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4876fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_STAT_SETREF_N:
4878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
4879fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Test input character for NOT being a member of  one of
4880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    the predefined sets (Word Characters, for example)
4881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx >= fActiveLimit) {
4882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
4883c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
4885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4886fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue > 0 && opValue < URX_LAST_SET);
4888fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UChar32  c;
4890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
4891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (c < 256) {
4892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    Regex8BitSet *s8 = &fPattern->fStaticSets8[opValue];
4893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (s8->contains(c) == FALSE) {
4894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
4895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
4897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    const UnicodeSet *s = fPattern->fStaticSets[opValue];
4898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (s->contains(c) == FALSE) {
4899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
4900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4905fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4906fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_SETREF:
490850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            {
490950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (fp->fInputIdx >= fActiveLimit) {
491050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fHitEnd = TRUE;
491150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    break;
4913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4914fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
491550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(opValue > 0 && opValue < sets->size());
491650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
491750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // There is input left.  Pick up one char and test it for set membership.
491850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32  c;
491950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
492050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (c<256) {
492150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    Regex8BitSet *s8 = &fPattern->fSets8[opValue];
492250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s8->contains(c)) {
492350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // The character is in the set.  A Match.
492450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
492550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
492650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
492750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue);
492850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (s->contains(c)) {
492950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        // The character is in the set.  A Match.
493050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
493150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
493250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
4933fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
493450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // the character wasn't in the set.
493550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4936c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
4937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4938fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4939fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_DOTANY:
4941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
4942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // . matches anything, but stops at end-of-line.
4943c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx >= fActiveLimit) {
4944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // At end of input.  Match failed.  Backtrack out.
4945c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
4946c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
4948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4949fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // There is input left.  Advance over one char, unless we've hit end-of-line
495150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32  c;
4952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
49531b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                if (isLineTerminator(c)) {
4954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // End of line in normal mode.   . does not match.
495550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
4957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4960fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4961fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_DOTANY_ALL:
4963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
496450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                // . in dot-matches-all (including new lines) mode
4965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx >= fActiveLimit) {
4966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // At end of input.  Match failed.  Backtrack out.
4967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
4968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
4970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4971fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // There is input left.  Advance over one char, except if we are
4973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   at a cr/lf, advance over both of them.
4974fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                UChar32 c;
4975c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
4976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (c==0x0d && fp->fInputIdx < fActiveLimit) {
4977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // In the case of a CR/LF, we need to advance over both.
497850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if (inputBuf[fp->fInputIdx] == 0x0a) {
497950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        U16_FWD_1(inputBuf, fp->fInputIdx, fActiveLimit);
4980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
4981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
4983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
4984fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4985fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4986c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        case URX_DOTANY_UNIX:
4987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
4988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // '.' operator, matches all, but stops at end-of-line.
4989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //   UNIX_LINES mode, so 0x0a is the only recognized line ending.
4990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx >= fActiveLimit) {
4991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // At end of input.  Match failed.  Backtrack out.
4992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
4993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
4994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
4995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
4996fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // There is input left.  Advance over one char, unless we've hit end-of-line
4998fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                UChar32 c;
4999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
5000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (c == 0x0a) {
5001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // End of line in normal mode.   '.' does not match the \n
5002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5006fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5007fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_JMP:
5009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fp->fPatIdx = opValue;
5010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5011fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_FAIL:
5013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            isMatch = FALSE;
5014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto breakFromLoop;
5015fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_JMP_SAV:
5017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(opValue < fPattern->fCompiledPat->size());
5018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fp = StateSave(fp, fp->fPatIdx, status);       // State save to loc following current
5019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fp->fPatIdx = opValue;                         // Then JMP.
5020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5021fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_JMP_SAV_X:
5023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // This opcode is used with (x)+, when x can match a zero length string.
5024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Same as JMP_SAV, except conditional on the match having made forward progress.
5025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Destination of the JMP must be a URX_STO_INP_LOC, from which we get the
5026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   data address of the input position at the start of the loop.
5027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue > 0 && opValue < fPattern->fCompiledPat->size());
502950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t  stoOp = (int32_t)pat[opValue-1];
5030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(URX_TYPE(stoOp) == URX_STO_INP_LOC);
5031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t  frameLoc = URX_VAL(stoOp);
5032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(frameLoc >= 0 && frameLoc < fFrameSize);
503350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t prevInputIdx = (int32_t)fp->fExtra[frameLoc];
5034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(prevInputIdx <= fp->fInputIdx);
5035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (prevInputIdx < fp->fInputIdx) {
5036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // The match did make progress.  Repeat the loop.
5037c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = StateSave(fp, fp->fPatIdx, status);  // State save to loc following current
5038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fPatIdx = opValue;
5039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fExtra[frameLoc] = fp->fInputIdx;
5040fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
5041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // If the input position did not advance, we do nothing here,
5042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   execution will fall out of the loop.
5043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5045fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_CTR_INIT:
5047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5048c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
504959d709d503bab6e2b61931737e662dd293b40578ccornelius                fp->fExtra[opValue] = 0;                 //  Set the loop counter variable to zero
5050fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Pick up the three extra operands that CTR_INIT has, and
5052fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //    skip the pattern location counter past
505350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
5054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fPatIdx += 3;
5055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t loopLoc  = URX_VAL(pat[instrOperandLoc]);
505650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount = (int32_t)pat[instrOperandLoc+1];
505750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount = (int32_t)pat[instrOperandLoc+2];
5058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(minCount>=0);
5059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(maxCount>=minCount || maxCount==-1);
506059d709d503bab6e2b61931737e662dd293b40578ccornelius                U_ASSERT(loopLoc>=fp->fPatIdx);
5061fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (minCount == 0) {
5063c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = StateSave(fp, loopLoc+1, status);
5064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
506559d709d503bab6e2b61931737e662dd293b40578ccornelius                if (maxCount == -1) {
506659d709d503bab6e2b61931737e662dd293b40578ccornelius                    fp->fExtra[opValue+1] = fp->fInputIdx;   //  For loop breaking.
506759d709d503bab6e2b61931737e662dd293b40578ccornelius                } else if (maxCount == 0) {
5068c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5072fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_CTR_LOOP:
5074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2);
507650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t initOp = (int32_t)pat[opValue];
5077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT);
507850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
507950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount  = (int32_t)pat[opValue+2];
508050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount  = (int32_t)pat[opValue+3];
5081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                (*pCounter)++;
508259d709d503bab6e2b61931737e662dd293b40578ccornelius                if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) {
508359d709d503bab6e2b61931737e662dd293b40578ccornelius                    U_ASSERT(*pCounter == maxCount);
5084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (*pCounter >= minCount) {
508759d709d503bab6e2b61931737e662dd293b40578ccornelius                    if (maxCount == -1) {
508859d709d503bab6e2b61931737e662dd293b40578ccornelius                        // Loop has no hard upper bound.
508959d709d503bab6e2b61931737e662dd293b40578ccornelius                        // Check that it is progressing through the input, break if it is not.
509059d709d503bab6e2b61931737e662dd293b40578ccornelius                        int64_t *pLastInputIdx =  &fp->fExtra[URX_VAL(initOp) + 1];
509159d709d503bab6e2b61931737e662dd293b40578ccornelius                        if (fp->fInputIdx == *pLastInputIdx) {
509259d709d503bab6e2b61931737e662dd293b40578ccornelius                            break;
509359d709d503bab6e2b61931737e662dd293b40578ccornelius                        } else {
509459d709d503bab6e2b61931737e662dd293b40578ccornelius                            *pLastInputIdx = fp->fInputIdx;
509559d709d503bab6e2b61931737e662dd293b40578ccornelius                        }
509659d709d503bab6e2b61931737e662dd293b40578ccornelius                    }
5097c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = StateSave(fp, fp->fPatIdx, status);
5098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fPatIdx = opValue + 4;    // Loop back.
5100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_CTR_INIT_NG:
5104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Initialize a non-greedy loop
5106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(opValue >= 0 && opValue < fFrameSize-2);
510759d709d503bab6e2b61931737e662dd293b40578ccornelius                fp->fExtra[opValue] = 0;                 //  Set the loop counter variable to zero
5108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
510959d709d503bab6e2b61931737e662dd293b40578ccornelius                // Pick up the three extra operands that CTR_INIT_NG has, and
5110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //    skip the pattern location counter past
511150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
5112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fPatIdx += 3;
5113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t loopLoc  = URX_VAL(pat[instrOperandLoc]);
511450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount = (int32_t)pat[instrOperandLoc+1];
511550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount = (int32_t)pat[instrOperandLoc+2];
5116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(minCount>=0);
5117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(maxCount>=minCount || maxCount==-1);
5118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(loopLoc>fp->fPatIdx);
511959d709d503bab6e2b61931737e662dd293b40578ccornelius                if (maxCount == -1) {
512059d709d503bab6e2b61931737e662dd293b40578ccornelius                    fp->fExtra[opValue+1] = fp->fInputIdx;   //  Save initial input index for loop breaking.
512159d709d503bab6e2b61931737e662dd293b40578ccornelius                }
5122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (minCount == 0) {
5124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (maxCount != 0) {
5125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fp = StateSave(fp, fp->fPatIdx, status);
5126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fPatIdx = loopLoc+1;   // Continue with stuff after repeated block
5128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
5129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_CTR_LOOP_NG:
5133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Non-greedy {min, max} loops
5135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>0 && opValue < fp->fPatIdx-2);
513650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t initOp = (int32_t)pat[opValue];
5137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(URX_TYPE(initOp) == URX_CTR_INIT_NG);
513850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t *pCounter = &fp->fExtra[URX_VAL(initOp)];
513950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minCount  = (int32_t)pat[opValue+2];
514050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxCount  = (int32_t)pat[opValue+3];
514159d709d503bab6e2b61931737e662dd293b40578ccornelius
5142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                (*pCounter)++;
514359d709d503bab6e2b61931737e662dd293b40578ccornelius                if ((uint64_t)*pCounter >= (uint32_t)maxCount && maxCount != -1) {
5144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // The loop has matched the maximum permitted number of times.
5145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   Break out of here with no action.  Matching will
5146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   continue with the following pattern.
514759d709d503bab6e2b61931737e662dd293b40578ccornelius                    U_ASSERT(*pCounter == maxCount);
5148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (*pCounter < minCount) {
5152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // We haven't met the minimum number of matches yet.
5153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   Loop back for another one.
5154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fPatIdx = opValue + 4;    // Loop back.
5155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
5156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // We do have the minimum number of matches.
515759d709d503bab6e2b61931737e662dd293b40578ccornelius
515859d709d503bab6e2b61931737e662dd293b40578ccornelius                    // If there is no upper bound on the loop iterations, check that the input index
515959d709d503bab6e2b61931737e662dd293b40578ccornelius                    // is progressing, and stop the loop if it is not.
516059d709d503bab6e2b61931737e662dd293b40578ccornelius                    if (maxCount == -1) {
516159d709d503bab6e2b61931737e662dd293b40578ccornelius                        int64_t *pLastInputIdx =  &fp->fExtra[URX_VAL(initOp) + 1];
516259d709d503bab6e2b61931737e662dd293b40578ccornelius                        if (fp->fInputIdx == *pLastInputIdx) {
516359d709d503bab6e2b61931737e662dd293b40578ccornelius                            break;
516459d709d503bab6e2b61931737e662dd293b40578ccornelius                        }
516559d709d503bab6e2b61931737e662dd293b40578ccornelius                        *pLastInputIdx = fp->fInputIdx;
516659d709d503bab6e2b61931737e662dd293b40578ccornelius                    }
516759d709d503bab6e2b61931737e662dd293b40578ccornelius
516859d709d503bab6e2b61931737e662dd293b40578ccornelius                    // Loop Continuation: we will fall into the pattern following the loop
516959d709d503bab6e2b61931737e662dd293b40578ccornelius                    //   (non-greedy, don't execute loop body first), but first do
517059d709d503bab6e2b61931737e662dd293b40578ccornelius                    //   a state save to the top of the loop, so that a match failure
5171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   in the following pattern will try another iteration of the loop.
5172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = StateSave(fp, opValue + 4, status);
5173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_STO_SP:
5178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize);
5179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fData[opValue] = fStack->size();
5180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LD_SP:
5183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue >= 0 && opValue < fPattern->fDataSize);
518550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t newStackSize = (int32_t)fData[opValue];
5186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(newStackSize <= fStack->size());
518750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
518850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (newFP == (int64_t *)fp) {
5189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t i;
5192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                for (i=0; i<fFrameSize; i++) {
519350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    newFP[i] = ((int64_t *)fp)[i];
5194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp = (REStackFrame *)newFP;
5196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fStack->setSize(newStackSize);
5197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKREF:
5201103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            {
5202103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                U_ASSERT(opValue < fFrameSize);
5203103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                int64_t groupStartIdx = fp->fExtra[opValue];
5204103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                int64_t groupEndIdx   = fp->fExtra[opValue+1];
5205103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                U_ASSERT(groupStartIdx <= groupEndIdx);
5206103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                int64_t inputIndex = fp->fInputIdx;
5207103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if (groupStartIdx < 0) {
5208103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    // This capture group has not participated in the match thus far,
5209103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no match.
5210103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    break;
5211103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                }
5212103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                UBool success = TRUE;
5213103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                for (int64_t groupIndex = groupStartIdx; groupIndex < groupEndIdx; ++groupIndex,++inputIndex) {
5214103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (inputIndex >= fActiveLimit) {
5215103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = FALSE;
5216103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        fHitEnd = TRUE;
5217103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        break;
5218103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    }
5219103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (inputBuf[groupIndex] != inputBuf[inputIndex]) {
5220103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = FALSE;
5221103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        break;
5222103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    }
5223103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                }
5224103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if (success) {
5225103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    fp->fInputIdx = inputIndex;
5226103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                } else {
5227103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5228103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                }
5229103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            }
5230103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            break;
5231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_BACKREF_I:
5233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5234c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(opValue < fFrameSize);
523550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t groupStartIdx = fp->fExtra[opValue];
523650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t groupEndIdx   = fp->fExtra[opValue+1];
5237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(groupStartIdx <= groupEndIdx);
5238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (groupStartIdx < 0) {
5239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // This capture group has not participated in the match thus far,
5240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no match.
5241103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    break;
5242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5243103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                CaseFoldingUCharIterator captureGroupItr(inputBuf, groupStartIdx, groupEndIdx);
5244103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                CaseFoldingUCharIterator inputItr(inputBuf, fp->fInputIdx, fActiveLimit);
5245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5246103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                //   Note: if the capture group match was of an empty string the backref
5247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //         match succeeds.  Verified by testing:  Perl matches succeed
5248103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                //         in this case, so we do too.
5249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5250103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                UBool success = TRUE;
5251103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                for (;;) {
5252103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    UChar32 captureGroupChar = captureGroupItr.next();
5253103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (captureGroupChar == U_SENTINEL) {
5254103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = TRUE;
5255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
5256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5257103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    UChar32 inputChar = inputItr.next();
5258103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (inputChar == U_SENTINEL) {
5259103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = FALSE;
5260103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        fHitEnd = TRUE;
5261103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        break;
5262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5263103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (inputChar != captureGroupChar) {
5264103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = FALSE;
5265103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        break;
5266103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    }
5267103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                }
5268103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
5269103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if (success && inputItr.inExpansion()) {
5270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // We otained a match by consuming part of a string obtained from
5271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // case-folding a single code point of the input text.
5272103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    // This does not count as an overall match.
5273103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    success = FALSE;
5274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5275103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
5276103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if (success) {
5277103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    fp->fInputIdx = inputItr.getIndex();
5278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
5279103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5283103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
5284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_STO_INP_LOC:
5285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(opValue >= 0 && opValue < fFrameSize);
5287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fExtra[opValue] = fp->fInputIdx;
5288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_JMPX:
5292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
529350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t instrOperandLoc = (int32_t)fp->fPatIdx;
5294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fPatIdx += 1;
5295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t dataLoc  = URX_VAL(pat[instrOperandLoc]);
5296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(dataLoc >= 0 && dataLoc < fFrameSize);
529750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t savedInputIdx = (int32_t)fp->fExtra[dataLoc];
5298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(savedInputIdx <= fp->fInputIdx);
5299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (savedInputIdx < fp->fInputIdx) {
5300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fPatIdx = opValue;                               // JMP
5301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
530250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);   // FAIL, no progress in loop.
5303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LA_START:
5308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Entering a lookahead block.
5310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Save Stack Ptr, Input Pos.
5311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
5312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fData[opValue]   = fStack->size();
5313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fData[opValue+1] = fp->fInputIdx;
5314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fActiveStart     = fLookStart;          // Set the match region change for
5315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fActiveLimit     = fLookLimit;          //   transparent bounds.
5316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LA_END:
5320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Leaving a look-ahead block.
5322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //  restore Stack Ptr, Input Pos to positions they had on entry to block.
5323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
5324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t stackSize = fStack->size();
532550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t newStackSize = (int32_t)fData[opValue];
5326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(stackSize >= newStackSize);
5327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (stackSize > newStackSize) {
5328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Copy the current top frame back to the new (cut back) top frame.
5329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //   This makes the capture groups from within the look-ahead
5330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    //   expression available.
533150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize;
5332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t i;
5333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    for (i=0; i<fFrameSize; i++) {
533450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        newFP[i] = ((int64_t *)fp)[i];
5335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp = (REStackFrame *)newFP;
5337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fStack->setSize(newStackSize);
5338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fInputIdx = fData[opValue+1];
5340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // Restore the active region bounds in the input string; they may have
5342c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //    been changed because of transparent bounds on a Region.
5343c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fActiveStart = fRegionStart;
5344c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fActiveLimit = fRegionLimit;
5345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_ONECHAR_I:
5349c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (fp->fInputIdx < fActiveLimit) {
5350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                UChar32 c;
5351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U16_NEXT(inputBuf, fp->fInputIdx, fActiveLimit, c);
5352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (u_foldCase(c, U_FOLD_CASE_DEFAULT) == opValue) {
5353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            } else {
5356c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fHitEnd = TRUE;
5357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
5358c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_STRING_I:
5362103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // Case-insensitive test input against a literal string.
5363103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            // Strings require two slots in the compiled pattern, one for the
5364103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            //   offset to the string text, and one for the length.
5365103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            //   The compiled string has already been case folded.
5366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5367103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                const UChar *patternString = litText + opValue;
5368103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
5369103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                op      = (int32_t)pat[fp->fPatIdx];
5370103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                fp->fPatIdx++;
5371103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                opType  = URX_TYPE(op);
5372103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                opValue = URX_VAL(op);
5373103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                U_ASSERT(opType == URX_STRING_LEN);
5374103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                int32_t patternStringLen = opValue;  // Length of the string from the pattern.
5375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5376103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                UChar32      cText;
5377103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                UChar32      cPattern;
5378103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                UBool        success = TRUE;
5379103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                int32_t      patternStringIdx  = 0;
5380103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                CaseFoldingUCharIterator inputIterator(inputBuf, fp->fInputIdx, fActiveLimit);
5381103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                while (patternStringIdx < patternStringLen) {
5382103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    U16_NEXT(patternString, patternStringIdx, patternStringLen, cPattern);
5383103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    cText = inputIterator.next();
5384103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if (cText != cPattern) {
5385103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        success = FALSE;
5386103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        if (cText == U_SENTINEL) {
5387103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                            fHitEnd = TRUE;
538850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        }
5389103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        break;
5390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                }
5392103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if (inputIterator.inExpansion()) {
5393103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    success = FALSE;
5394103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                }
5395103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
5396103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if (success) {
5397103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    fp->fInputIdx = inputIterator.getIndex();
5398103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                } else {
5399103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5400103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                }
5401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5403103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
5404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LB_START:
5405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Entering a look-behind block.
5407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Save Stack Ptr, Input Pos.
5408c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //   TODO:  implement transparent bounds.  Ticket #6067
5409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
5410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fData[opValue]   = fStack->size();
5411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fData[opValue+1] = fp->fInputIdx;
5412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Init the variable containing the start index for attempted matches.
5413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fData[opValue+2] = -1;
5414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Save input string length, then reset to pin any matches to end at
5415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   the current position.
5416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fData[opValue+3] = fActiveLimit;
5417c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fActiveLimit     = fp->fInputIdx;
5418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LB_CONT:
5423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Positive Look-Behind, at top of loop checking for matches of LB expression
5425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    at all possible input starting positions.
5426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Fetch the min and max possible match lengths.  They are the operands
5428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   of this op in the pattern.
542950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minML = (int32_t)pat[fp->fPatIdx++];
543050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxML = (int32_t)pat[fp->fPatIdx++];
5431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(minML <= maxML);
5432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(minML >= 0);
5433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Fetch (from data) the last input index where a match was attempted.
5435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
543650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t  *lbStartIdx = &fData[opValue+2];
5437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (*lbStartIdx < 0) {
5438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // First time through loop.
5439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *lbStartIdx = fp->fInputIdx - minML;
5440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
5441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // 2nd through nth time through the loop.
5442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Back up start position for match by one.
5443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (*lbStartIdx == 0) {
544450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (*lbStartIdx)--;
5445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
5446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        U16_BACK_1(inputBuf, 0, *lbStartIdx);
5447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
5451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // We have tried all potential match starting points without
5452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  getting a match.  Backtrack out, and out of the
5453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //   Look Behind altogether.
5454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
545550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int64_t restoreInputLen = fData[opValue+3];
5456c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    U_ASSERT(restoreInputLen >= fActiveLimit);
545750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(restoreInputLen <= fInputLength);
5458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fActiveLimit = restoreInputLen;
5459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
5463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //      (successful match will fall off the end of the loop.)
5464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = StateSave(fp, fp->fPatIdx-3, status);
5465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fInputIdx =  *lbStartIdx;
5466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5468fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LB_END:
5470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // End of a look-behind block, after a successful match.
5471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
5473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx != fActiveLimit) {
5474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  The look-behind expression matched, but the match did not
5475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //    extend all the way to the point that we are looking behind from.
5476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  FAIL out of here, which will take us back to the LB_CONT, which
5477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //     will retry the match starting at another position or fail
5478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //     the look-behind altogether, whichever is appropriate.
5479c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5482fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Look-behind match is good.  Restore the orignal input string length,
5484fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //   which had been truncated to pin the end of the lookbehind match to the
5485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   position being looked-behind.
548650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t originalInputLen = fData[opValue+3];
5487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(originalInputLen >= fActiveLimit);
548850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(originalInputLen <= fInputLength);
5489c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fActiveLimit = originalInputLen;
5490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5492fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LBN_CONT:
5495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Negative Look-Behind, at top of loop checking for matches of LB expression
5497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    at all possible input starting positions.
5498fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Fetch the extra parameters of this op.
550050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t minML       = (int32_t)pat[fp->fPatIdx++];
550150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t maxML       = (int32_t)pat[fp->fPatIdx++];
550250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t continueLoc = (int32_t)pat[fp->fPatIdx++];
550350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                continueLoc = URX_VAL(continueLoc);
5504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(minML <= maxML);
5505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(minML >= 0);
5506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(continueLoc > fp->fPatIdx);
5507fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Fetch (from data) the last input index where a match was attempted.
5509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
551050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t  *lbStartIdx = &fData[opValue+2];
5511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (*lbStartIdx < 0) {
5512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // First time through loop.
5513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *lbStartIdx = fp->fInputIdx - minML;
5514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
5515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // 2nd through nth time through the loop.
5516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Back up start position for match by one.
5517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (*lbStartIdx == 0) {
5518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (*lbStartIdx)--;   // Because U16_BACK is unsafe starting at 0.
5519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
5520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        U16_BACK_1(inputBuf, 0, *lbStartIdx);
5521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (*lbStartIdx < 0 || *lbStartIdx < fp->fInputIdx - maxML) {
5525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // We have tried all potential match starting points without
5526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  getting a match, which means that the negative lookbehind as
5527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  a whole has succeeded.  Jump forward to the continue location
552850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int64_t restoreInputLen = fData[opValue+3];
5529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    U_ASSERT(restoreInputLen >= fActiveLimit);
553050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    U_ASSERT(restoreInputLen <= fInputLength);
5531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fActiveLimit = restoreInputLen;
5532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fPatIdx = continueLoc;
5533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5535fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    Save state to this URX_LB_CONT op, so failure to match will repeat the loop.
5537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //      (successful match will cause a FAIL out of the loop altogether.)
5538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = StateSave(fp, fp->fPatIdx-4, status);
5539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fInputIdx =  *lbStartIdx;
5540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5542fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LBN_END:
5544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // End of a negative look-behind block, after a successful match.
5545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
5547c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if (fp->fInputIdx != fActiveLimit) {
5548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  The look-behind expression matched, but the match did not
5549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //    extend all the way to the point that we are looking behind from.
5550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  FAIL out of here, which will take us back to the LB_CONT, which
5551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //     will retry the match starting at another position or succeed
5552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //     the look-behind altogether, whichever is appropriate.
5553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5556fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Look-behind expression matched, which means look-behind test as
5558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   a whole Fails
5559fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5560fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //   Restore the orignal input string length, which had been truncated
5561fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //   inorder to pin the end of the lookbehind match
5562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   to the position being looked-behind.
556350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int64_t originalInputLen = fData[opValue+3];
5564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(originalInputLen >= fActiveLimit);
556550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(originalInputLen <= fInputLength);
5566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fActiveLimit = originalInputLen;
5567fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Restore original stack position, discarding any state saved
5569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   by the successful pattern match.
5570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue+1<fPattern->fDataSize);
557150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t newStackSize = (int32_t)fData[opValue];
5572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(fStack->size() > newStackSize);
5573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fStack->setSize(newStackSize);
5574fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5575fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //  FAIL, which will take control back to someplace
5576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //  prior to entering the look-behind test.
5577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = (REStackFrame *)fStack->popFrame(fFrameSize);
5578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5580fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5581fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LOOP_SR_I:
5583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Loop Initialization for the optimized implementation of
5584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //     [some character set]*
5585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   This op scans through all matching input.
5586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   The following LOOP_C op emulates stack unwinding if the following pattern fails.
5587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(opValue > 0 && opValue < sets->size());
5589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                Regex8BitSet *s8 = &fPattern->fSets8[opValue];
5590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UnicodeSet   *s  = (UnicodeSet *)sets->elementAt(opValue);
5591fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Loop through input, until either the input is exhausted or
5593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   we reach a character that is not a member of the set.
559450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t ix = (int32_t)fp->fInputIdx;
5595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                for (;;) {
5596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    if (ix >= fActiveLimit) {
5597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        fHitEnd = TRUE;
5598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
5599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UChar32   c;
5601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    U16_NEXT(inputBuf, ix, fActiveLimit, c);
5602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (c<256) {
5603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (s8->contains(c) == FALSE) {
5604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            U16_BACK_1(inputBuf, 0, ix);
5605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            break;
5606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
5607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
5608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (s->contains(c) == FALSE) {
5609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            U16_BACK_1(inputBuf, 0, ix);
5610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            break;
5611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
5612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5614fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // If there were no matching characters, skip over the loop altogether.
5616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   The loop doesn't run at all, a * op always succeeds.
5617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (ix == fp->fInputIdx) {
5618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fPatIdx++;   // skip the URX_LOOP_C op.
5619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5621fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Peek ahead in the compiled pattern, to the URX_LOOP_C that
5623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   must follow.  It's operand is the stack location
5624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   that holds the starting input index for the match of this [set]*
562550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t loopcOp = (int32_t)pat[fp->fPatIdx];
5626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
5627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t stackLoc = URX_VAL(loopcOp);
5628c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize);
5629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fExtra[stackLoc] = fp->fInputIdx;
5630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fInputIdx = ix;
5631fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Save State to the URX_LOOP_C op that follows this one,
5633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   so that match failures in the following code will return to there.
5634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
5635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = StateSave(fp, fp->fPatIdx, status);
5636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fPatIdx++;
5637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5639fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5640fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LOOP_DOT_I:
5642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Loop Initialization for the optimized implementation of .*
5643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   This op scans through all remaining input.
5644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //   The following LOOP_C op emulates stack unwinding if the following pattern fails.
5645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Loop through input until the input is exhausted (we reach an end-of-line)
5647c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                // In DOTALL mode, we can just go straight to the end of the input.
5648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t ix;
5649c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                if ((opValue & 1) == 1) {
5650c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // Dot-matches-All mode.  Jump straight to the end of the string.
565150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ix = (int32_t)fActiveLimit;
5652c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    fHitEnd = TRUE;
5653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
5654c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    // NOT DOT ALL mode.  Line endings do not match '.'
5655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Scan forward until a line ending or end of input.
565650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ix = (int32_t)fp->fInputIdx;
5657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    for (;;) {
5658c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        if (ix >= fActiveLimit) {
5659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            fHitEnd = TRUE;
5660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            break;
5661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
5662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UChar32   c;
5663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                        U16_NEXT(inputBuf, ix, fActiveLimit, c);   // c = inputBuf[ix++]
566427f654740f2a26ad62a5c155af9199af9e69b889claireho                        if ((c & 0x7f) <= 0x29) {          // Fast filter of non-new-line-s
566527f654740f2a26ad62a5c155af9199af9e69b889claireho                            if ((c == 0x0a) ||             //  0x0a is newline in both modes.
566627f654740f2a26ad62a5c155af9199af9e69b889claireho                                (((opValue & 2) == 0) &&    // IF not UNIX_LINES mode
56671b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                                   isLineTerminator(c))) {
5668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                //  char is a line ending.  Put the input pos back to the
5669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                //    line ending char, and exit the scanning loop.
5670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                U16_BACK_1(inputBuf, 0, ix);
5671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                                break;
5672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                            }
5673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
5674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5676fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // If there were no matching characters, skip over the loop altogether.
5678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   The loop doesn't run at all, a * op always succeeds.
5679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (ix == fp->fInputIdx) {
5680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fp->fPatIdx++;   // skip the URX_LOOP_C op.
5681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5683fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Peek ahead in the compiled pattern, to the URX_LOOP_C that
5685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   must follow.  It's operand is the stack location
5686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                //   that holds the starting input index for the match of this .*
568750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                int32_t loopcOp = (int32_t)pat[fp->fPatIdx];
5688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(URX_TYPE(loopcOp) == URX_LOOP_C);
5689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t stackLoc = URX_VAL(loopcOp);
5690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(stackLoc >= 0 && stackLoc < fFrameSize);
5691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fExtra[stackLoc] = fp->fInputIdx;
5692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fInputIdx = ix;
5693fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Save State to the URX_LOOP_C op that follows this one,
5695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   so that match failures in the following code will return to there.
5696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   Then bump the pattern idx so the LOOP_C is skipped on the way out of here.
5697c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = StateSave(fp, fp->fPatIdx, status);
5698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fp->fPatIdx++;
5699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5701fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5702fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case URX_LOOP_C:
5704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            {
5705c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                U_ASSERT(opValue>=0 && opValue<fFrameSize);
570650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                backSearchIndex = (int32_t)fp->fExtra[opValue];
570750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U_ASSERT(backSearchIndex <= fp->fInputIdx);
570850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if (backSearchIndex == fp->fInputIdx) {
5709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // We've backed up the input idx to the point that the loop started.
5710fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // The loop is done.  Leave here without saving state.
5711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    //  Subsequent failures won't come back here.
5712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
5713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Set up for the next iteration of the loop, with input index
5715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   backed up by one from the last time through,
5716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   and a state save to this instruction in case the following code fails again.
5717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //   (We're going backwards because this loop emulates stack unwinding, not
5718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    the initial scan forward.)
5719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                U_ASSERT(fp->fInputIdx > 0);
572050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UChar32 prevC;
572150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                U16_PREV(inputBuf, 0, fp->fInputIdx, prevC); // !!!: should this 0 be one of f*Limit?
5722fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5723fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if (prevC == 0x0a &&
572450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    fp->fInputIdx > backSearchIndex &&
5725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    inputBuf[fp->fInputIdx-1] == 0x0d) {
572650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    int32_t prevOp = (int32_t)pat[fp->fPatIdx-2];
5727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (URX_TYPE(prevOp) == URX_LOOP_DOT_I) {
5728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // .*, stepping back over CRLF pair.
572950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        U16_BACK_1(inputBuf, 0, fp->fInputIdx);
5730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
5731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
5732fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5733fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5734c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                fp = StateSave(fp, fp->fPatIdx-1, status);
5735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
5736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5737fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5738fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5739fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        default:
5741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Trouble.  The compiled pattern contains an entry with an
5742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //           unrecognized type tag.
5743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_ASSERT(FALSE);
5744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5745fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(status)) {
5747c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            isMatch = FALSE;
5748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
5749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5751fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerubreakFromLoop:
5753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fMatch = isMatch;
5754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (isMatch) {
5755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fLastMatchEnd = fMatchEnd;
5756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fMatchStart   = startIdx;
5757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fMatchEnd     = fp->fInputIdx;
5758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5759fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5760fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifdef REGEX_RUN_DEBUG
5761fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (fTraceDebug) {
5762fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (isMatch) {
5763fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            printf("Match.  start=%ld   end=%ld\n\n", fMatchStart, fMatchEnd);
5764fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
5765fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            printf("No match\n\n");
5766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
5767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5768fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif
5769fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
5770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fFrame = fp;                // The active stack frame when the engine stopped.
5771fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                //   Contains the capture group results that we need to
5772fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                //    access later.
5773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return;
5775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
5776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexMatcher)
5779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
5781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
5782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
5783